_ZN4Simd12GetAlignmentEv:
   32|      2|    {
   33|       |#ifdef SIMD_AVX512VNNI_ENABLE
   34|       |        if (Avx512vnni::Enable)
   35|       |            return sizeof(__m512i);
   36|       |        else
   37|       |#endif
   38|      2|#ifdef SIMD_AVX512BW_ENABLE
   39|      2|        if (Avx512bw::Enable)
  ------------------
  |  Branch (39:13): [True: 0, False: 2]
  ------------------
   40|      0|            return sizeof(__m512i);
   41|      2|        else
   42|      2|#endif
   43|      2|#ifdef SIMD_AVX2_ENABLE
   44|      2|        if (Avx2::Enable)
  ------------------
  |  Branch (44:13): [True: 2, False: 0]
  ------------------
   45|      2|            return sizeof(__m256i);
   46|      0|        else
   47|      0|#endif
   48|      0|#ifdef SIMD_SSE41_ENABLE
   49|      0|        if (Sse41::Enable)
  ------------------
  |  Branch (49:13): [True: 0, False: 0]
  ------------------
   50|      0|            return sizeof(__m128i);
   51|      0|        else
   52|      0|#endif
   53|       |#ifdef SIMD_NEON_ENABLE
   54|       |        if (Neon::Enable)
   55|       |            return sizeof(uint8x16_t);
   56|       |        else
   57|       |#endif
   58|      0|            return sizeof(void *);
   59|      2|    }

_ZN4Simd9AllocatorIhE4FreeEPv:
   73|    876|        {
   74|       |#ifdef __SimdMemory_h__
   75|       |            Simd::Free(ptr);
   76|       |#else
   77|    876|            SimdFree(ptr);
   78|    876|#endif
   79|    876|        }
_ZN4Simd9AllocatorIhE5AlignEmm:
   92|    876|        {
   93|    876|#ifdef __SimdMemory_h__
   94|    876|            return Simd::AlignHi(size, align);
   95|       |#else
   96|       |            return SimdAlign(size, align);
   97|       |#endif
   98|    876|        }
_ZN4Simd9AllocatorIhE8AllocateEmm:
   55|    876|        {
   56|    876|#ifdef __SimdMemory_h__
   57|    876|            return Simd::Allocate(size, align);
   58|       |#else
   59|       |            return SimdAllocate(size, align);
   60|       |#endif
   61|    876|        }
_ZN4Simd9AllocatorIhE9AlignmentEv:
  127|    876|        {
  128|       |#if defined(__SimdAlignment_h__) && defined(_WIN32)
  129|       |            return Simd::Alignment();
  130|       |#else
  131|    876|            return SimdAlignment();
  132|    876|#endif
  133|    876|        }

_ZNK4Simd5ArrayIhE7RawSizeEv:
   96|  5.23k|        {
   97|  5.23k|            return size * sizeof(T);
   98|  5.23k|        }
_ZN4Simd5ArrayIhE6ResizeEmbm:
   51|  25.3k|        {
   52|  25.3k|            if (size_ != size)
  ------------------
  |  Branch (52:17): [True: 5.23k, False: 20.1k]
  ------------------
   53|  5.23k|            {
   54|  5.23k|                if (data)
  ------------------
  |  Branch (54:21): [True: 0, False: 5.23k]
  ------------------
   55|      0|                {
   56|      0|                    Simd::Free(data);
   57|      0|                    *(T**)&data = 0;
   58|      0|                }
   59|  5.23k|                *(size_t*)&size = size_;
   60|  5.23k|                if (size_)
  ------------------
  |  Branch (60:21): [True: 5.23k, False: 0]
  ------------------
   61|  5.23k|                    *(T**)&data = (T*)Simd::Allocate(RawSize(), align);
   62|  5.23k|            }
   63|  25.3k|            if (clear)
  ------------------
  |  Branch (63:17): [True: 0, False: 25.3k]
  ------------------
   64|      0|                Clear();
   65|  25.3k|        }
_ZN4Simd5ArrayIhEC2Embm:
   40|  20.1k|        {
   41|  20.1k|            Resize(size_, clear);
   42|  20.1k|        }
_ZN4Simd5ArrayIhED2Ev:
   45|  20.1k|        {
   46|  20.1k|            if (data)
  ------------------
  |  Branch (46:17): [True: 5.23k, False: 14.9k]
  ------------------
   47|  5.23k|                Simd::Free(data);
   48|  20.1k|        }
_ZNK4Simd5ArrayIhE5EmptyEv:
  101|  4.70k|        {
  102|  4.70k|            return data == NULL;
  103|  4.70k|        }

_ZN4Simd4Avx29RgbToBgraEPKhmmmPhmh:
  155|    405|        {
  156|    405|            if (Aligned(bgra) && Aligned(bgraStride) && Aligned(rgb) && Aligned(rgbStride))
  ------------------
  |  Branch (156:17): [True: 405, False: 0]
  |  Branch (156:34): [True: 405, False: 0]
  |  Branch (156:57): [True: 393, False: 12]
  |  Branch (156:73): [True: 75, False: 318]
  ------------------
  157|     75|                RgbToBgra<true>(rgb, width, height, rgbStride, bgra, bgraStride, alpha);
  158|    330|            else
  159|    330|                RgbToBgra<false>(rgb, width, height, rgbStride, bgra, bgraStride, alpha);
  160|    405|        }
_ZN4Simd4Avx29RgbToBgraILb1EEEvPKhmmmPhmh:
  134|     75|        {
  135|     75|            assert(width >= A);
  136|     75|            if (align)
  ------------------
  |  Branch (136:17): [Folded - Ignored]
  ------------------
  137|     75|                assert(Aligned(bgra) && Aligned(bgraStride) && Aligned(rgb) && Aligned(rgbStride));
  138|       |
  139|     75|            size_t alignedWidth = AlignLo(width, A);
  140|       |
  141|     75|            __m256i _alpha = _mm256_slli_si256(_mm256_set1_epi32(alpha), 3);
  142|       |
  143|    150|            for (size_t row = 0; row < height; ++row)
  ------------------
  |  Branch (143:34): [True: 75, False: 75]
  ------------------
  144|     75|            {
  145|  12.9k|                for (size_t col = 0; col < alignedWidth; col += A)
  ------------------
  |  Branch (145:38): [True: 12.8k, False: 75]
  ------------------
  146|  12.8k|                    RgbToBgra<align>(rgb + 3 * col, bgra + 4 * col, _alpha);
  147|     75|                if (width != alignedWidth)
  ------------------
  |  Branch (147:21): [True: 0, False: 75]
  ------------------
  148|      0|                    RgbToBgra<false>(rgb + 3 * (width - A), bgra + 4 * (width - A), _alpha);
  149|     75|                rgb += rgbStride;
  150|     75|                bgra += bgraStride;
  151|     75|            }
  152|     75|        }
_ZN4Simd4Avx29RgbToBgraILb1EEEvPKhPhDv4_x:
  126|  12.8k|        {
  127|  12.8k|            Store<align>((__m256i*)bgra + 0, RgbToBgra<false>(Load<align>((__m256i*)(rgb + 0)), alpha));
  128|  12.8k|            Store<align>((__m256i*)bgra + 1, RgbToBgra<false>(Load<false>((__m256i*)(rgb + 24)), alpha));
  129|  12.8k|            Store<align>((__m256i*)bgra + 2, RgbToBgra<false>(Load<false>((__m256i*)(rgb + 48)), alpha));
  130|  12.8k|            Store<align>((__m256i*)bgra + 3, RgbToBgra<true >(Load<align>((__m256i*)(rgb + 64)), alpha));
  131|  12.8k|        }
_ZN4Simd4Avx29RgbToBgraILb0EEEvPKhPhDv4_x:
  126|  69.3k|        {
  127|  69.3k|            Store<align>((__m256i*)bgra + 0, RgbToBgra<false>(Load<align>((__m256i*)(rgb + 0)), alpha));
  128|  69.3k|            Store<align>((__m256i*)bgra + 1, RgbToBgra<false>(Load<false>((__m256i*)(rgb + 24)), alpha));
  129|  69.3k|            Store<align>((__m256i*)bgra + 2, RgbToBgra<false>(Load<false>((__m256i*)(rgb + 48)), alpha));
  130|  69.3k|            Store<align>((__m256i*)bgra + 3, RgbToBgra<true >(Load<align>((__m256i*)(rgb + 64)), alpha));
  131|  69.3k|        }
_ZN4Simd4Avx29RgbToBgraILb0EEEvPKhmmmPhmh:
  134|    330|        {
  135|    330|            assert(width >= A);
  136|    330|            if (align)
  ------------------
  |  Branch (136:17): [Folded - Ignored]
  ------------------
  137|      0|                assert(Aligned(bgra) && Aligned(bgraStride) && Aligned(rgb) && Aligned(rgbStride));
  138|       |
  139|    330|            size_t alignedWidth = AlignLo(width, A);
  140|       |
  141|    330|            __m256i _alpha = _mm256_slli_si256(_mm256_set1_epi32(alpha), 3);
  142|       |
  143|  2.85k|            for (size_t row = 0; row < height; ++row)
  ------------------
  |  Branch (143:34): [True: 2.52k, False: 330]
  ------------------
  144|  2.52k|            {
  145|  70.0k|                for (size_t col = 0; col < alignedWidth; col += A)
  ------------------
  |  Branch (145:38): [True: 67.5k, False: 2.52k]
  ------------------
  146|  67.5k|                    RgbToBgra<align>(rgb + 3 * col, bgra + 4 * col, _alpha);
  147|  2.52k|                if (width != alignedWidth)
  ------------------
  |  Branch (147:21): [True: 1.84k, False: 680]
  ------------------
  148|  1.84k|                    RgbToBgra<false>(rgb + 3 * (width - A), bgra + 4 * (width - A), _alpha);
  149|  2.52k|                rgb += rgbStride;
  150|  2.52k|                bgra += bgraStride;
  151|  2.52k|            }
  152|    330|        }

_ZN4Simd4Avx29RgbToGrayEPKhmmmPhm:
  138|    405|        {
  139|    405|            if (Aligned(gray) && Aligned(grayStride) && Aligned(rgb) && Aligned(rgbStride))
  ------------------
  |  Branch (139:17): [True: 405, False: 0]
  |  Branch (139:34): [True: 405, False: 0]
  |  Branch (139:57): [True: 402, False: 3]
  |  Branch (139:73): [True: 78, False: 324]
  ------------------
  140|     78|                RgbToGray<true>(rgb, width, height, rgbStride, gray, grayStride);
  141|    327|            else
  142|    327|                RgbToGray<false>(rgb, width, height, rgbStride, gray, grayStride);
  143|    405|        }
_ZN4Simd4Avx29RgbToGrayILb1EEEvPKhmmmPhm:
  119|     78|        {
  120|     78|            assert(width >= A);
  121|     78|            if (align)
  ------------------
  |  Branch (121:17): [Folded - Ignored]
  ------------------
  122|     78|                assert(Aligned(gray) && Aligned(grayStride) && Aligned(rgb) && Aligned(rgbStride));
  123|       |
  124|     78|            size_t alignedWidth = AlignLo(width, A);
  125|       |
  126|    492|            for (size_t row = 0; row < height; ++row)
  ------------------
  |  Branch (126:34): [True: 414, False: 78]
  ------------------
  127|    414|            {
  128|  14.2k|                for (size_t col = 0; col < alignedWidth; col += A)
  ------------------
  |  Branch (128:38): [True: 13.8k, False: 414]
  ------------------
  129|  13.8k|                    Store<align>((__m256i*)(gray + col), RgbToGray<align>(rgb + 3 * col));
  130|    414|                if (width != alignedWidth)
  ------------------
  |  Branch (130:21): [True: 0, False: 414]
  ------------------
  131|      0|                    Store<false>((__m256i*)(gray + width - A), RgbToGray<false>(rgb + 3 * (width - A)));
  132|    414|                rgb += rgbStride;
  133|    414|                gray += grayStride;
  134|    414|            }
  135|     78|        }
_ZN4Simd4Avx29RgbToGrayILb1EEEDv4_xPKh:
  109|  13.8k|        {
  110|  13.8k|            __m256i rgba[4];
  111|  13.8k|            rgba[0] = BgrToBgra<false>(Load<align>((__m256i*)(rgb + 0)), K32_01000000);
  112|  13.8k|            rgba[1] = BgrToBgra<false>(Load<false>((__m256i*)(rgb + 24)), K32_01000000);
  113|  13.8k|            rgba[2] = BgrToBgra<false>(Load<false>((__m256i*)(rgb + 48)), K32_01000000);
  114|  13.8k|            rgba[3] = BgrToBgra<true>(Load<align>((__m256i*)(rgb + 64)), K32_01000000);
  115|  13.8k|            return RgbaToGray(rgba);
  116|  13.8k|        }
_ZN4Simd4Avx29RgbToGrayILb0EEEDv4_xPKh:
  109|  68.3k|        {
  110|  68.3k|            __m256i rgba[4];
  111|  68.3k|            rgba[0] = BgrToBgra<false>(Load<align>((__m256i*)(rgb + 0)), K32_01000000);
  112|  68.3k|            rgba[1] = BgrToBgra<false>(Load<false>((__m256i*)(rgb + 24)), K32_01000000);
  113|  68.3k|            rgba[2] = BgrToBgra<false>(Load<false>((__m256i*)(rgb + 48)), K32_01000000);
  114|  68.3k|            rgba[3] = BgrToBgra<true>(Load<align>((__m256i*)(rgb + 64)), K32_01000000);
  115|  68.3k|            return RgbaToGray(rgba);
  116|  68.3k|        }
_ZN4Simd4Avx29RgbToGrayILb0EEEvPKhmmmPhm:
  119|    327|        {
  120|    327|            assert(width >= A);
  121|    327|            if (align)
  ------------------
  |  Branch (121:17): [Folded - Ignored]
  ------------------
  122|      0|                assert(Aligned(gray) && Aligned(grayStride) && Aligned(rgb) && Aligned(rgbStride));
  123|       |
  124|    327|            size_t alignedWidth = AlignLo(width, A);
  125|       |
  126|  2.51k|            for (size_t row = 0; row < height; ++row)
  ------------------
  |  Branch (126:34): [True: 2.18k, False: 327]
  ------------------
  127|  2.18k|            {
  128|  68.6k|                for (size_t col = 0; col < alignedWidth; col += A)
  ------------------
  |  Branch (128:38): [True: 66.4k, False: 2.18k]
  ------------------
  129|  66.4k|                    Store<align>((__m256i*)(gray + col), RgbToGray<align>(rgb + 3 * col));
  130|  2.18k|                if (width != alignedWidth)
  ------------------
  |  Branch (130:21): [True: 1.84k, False: 341]
  ------------------
  131|  1.84k|                    Store<false>((__m256i*)(gray + width - A), RgbToGray<false>(rgb + 3 * (width - A)));
  132|  2.18k|                rgb += rgbStride;
  133|  2.18k|                gray += grayStride;
  134|  2.18k|            }
  135|    327|        }

_ZN4Simd4Avx28BgrToRgbEPKhmmmPhm:
   89|    405|        {
   90|    405|            if (Aligned(bgr) && Aligned(bgrStride) && Aligned(rgb) && Aligned(rgbStride))
  ------------------
  |  Branch (90:17): [True: 401, False: 4]
  |  Branch (90:33): [True: 78, False: 323]
  |  Branch (90:55): [True: 78, False: 0]
  |  Branch (90:71): [True: 78, False: 0]
  ------------------
   91|     78|                BgrToRgb<true>(bgr, width, height, bgrStride, rgb, rgbStride);
   92|    327|            else
   93|    327|                BgrToRgb<false>(bgr, width, height, bgrStride, rgb, rgbStride);
   94|    405|        }
_ZN4Simd4Avx28BgrToRgbILb1EEEvPKhmmmPhm:
   68|     78|        {
   69|     78|            assert(width >= A);
   70|     78|            if (align)
  ------------------
  |  Branch (70:17): [Folded - Ignored]
  ------------------
   71|     78|                assert(Aligned(bgr) && Aligned(bgrStride) && Aligned(rgb) && Aligned(rgbStride));
   72|       |
   73|     78|            const size_t A3 = A * 3;
   74|     78|            size_t size = width * 3;
   75|     78|            size_t aligned = AlignLo(width, A) * 3;
   76|       |
   77|    720|            for (size_t row = 0; row < height; ++row)
  ------------------
  |  Branch (77:34): [True: 642, False: 78]
  ------------------
   78|    642|            {
   79|  14.4k|                for (size_t i = 0; i < aligned; i += A3)
  ------------------
  |  Branch (79:36): [True: 13.8k, False: 642]
  ------------------
   80|  13.8k|                    BgrToRgb<align>(bgr + i, rgb + i);
   81|    642|                if (aligned < size)
  ------------------
  |  Branch (81:21): [True: 0, False: 642]
  ------------------
   82|      0|                    BgrToRgb<false>(bgr + size - A3, rgb + size - A3);
   83|    642|                bgr += bgrStride;
   84|    642|                rgb += rgbStride;
   85|    642|            }
   86|     78|        }
_ZN4Simd4Avx28BgrToRgbILb1EEEvPKhPh:
   52|  13.8k|        {
   53|  13.8k|            __m256i s0 = Load<align>((__m256i*)src + 0);
   54|  13.8k|            __m256i s1 = Load<align>((__m256i*)src + 1);
   55|  13.8k|            __m256i s2 = Load<align>((__m256i*)src + 2);
   56|  13.8k|            __m256i p0 = _mm256_permute4x64_epi64(s0, 0x1B);
   57|  13.8k|            __m256i p1 = _mm256_permute4x64_epi64(s1, 0x1B);
   58|  13.8k|            __m256i p2 = _mm256_permute4x64_epi64(s2, 0x1B);
   59|  13.8k|            Store<align>((__m256i*)dst + 0, _mm256_or_si256(_mm256_or_si256(_mm256_shuffle_epi8(s0, K8_SHFL_0S0), 
   60|  13.8k|                _mm256_shuffle_epi8(p0, K8_SHFL_0P0)), _mm256_shuffle_epi8(p1, K8_SHFL_0P1)));
   61|  13.8k|            Store<align>((__m256i*)dst + 1, _mm256_or_si256(_mm256_or_si256(_mm256_shuffle_epi8(s1, K8_SHFL_1S1),
   62|  13.8k|                _mm256_shuffle_epi8(p0, K8_SHFL_1P0)), _mm256_shuffle_epi8(p2, K8_SHFL_1P2)));
   63|  13.8k|            Store<align>((__m256i*)dst + 2, _mm256_or_si256(_mm256_or_si256(_mm256_shuffle_epi8(s2, K8_SHFL_2S2),
   64|  13.8k|                _mm256_shuffle_epi8(p1, K8_SHFL_2P1)), _mm256_shuffle_epi8(p2, K8_SHFL_2P2)));
   65|  13.8k|        }
_ZN4Simd4Avx28BgrToRgbILb0EEEvPKhPh:
   52|  68.3k|        {
   53|  68.3k|            __m256i s0 = Load<align>((__m256i*)src + 0);
   54|  68.3k|            __m256i s1 = Load<align>((__m256i*)src + 1);
   55|  68.3k|            __m256i s2 = Load<align>((__m256i*)src + 2);
   56|  68.3k|            __m256i p0 = _mm256_permute4x64_epi64(s0, 0x1B);
   57|  68.3k|            __m256i p1 = _mm256_permute4x64_epi64(s1, 0x1B);
   58|  68.3k|            __m256i p2 = _mm256_permute4x64_epi64(s2, 0x1B);
   59|  68.3k|            Store<align>((__m256i*)dst + 0, _mm256_or_si256(_mm256_or_si256(_mm256_shuffle_epi8(s0, K8_SHFL_0S0), 
   60|  68.3k|                _mm256_shuffle_epi8(p0, K8_SHFL_0P0)), _mm256_shuffle_epi8(p1, K8_SHFL_0P1)));
   61|  68.3k|            Store<align>((__m256i*)dst + 1, _mm256_or_si256(_mm256_or_si256(_mm256_shuffle_epi8(s1, K8_SHFL_1S1),
   62|  68.3k|                _mm256_shuffle_epi8(p0, K8_SHFL_1P0)), _mm256_shuffle_epi8(p2, K8_SHFL_1P2)));
   63|  68.3k|            Store<align>((__m256i*)dst + 2, _mm256_or_si256(_mm256_or_si256(_mm256_shuffle_epi8(s2, K8_SHFL_2S2),
   64|  68.3k|                _mm256_shuffle_epi8(p1, K8_SHFL_2P1)), _mm256_shuffle_epi8(p2, K8_SHFL_2P2)));
   65|  68.3k|        }
_ZN4Simd4Avx28BgrToRgbILb0EEEvPKhmmmPhm:
   68|    327|        {
   69|    327|            assert(width >= A);
   70|    327|            if (align)
  ------------------
  |  Branch (70:17): [Folded - Ignored]
  ------------------
   71|      0|                assert(Aligned(bgr) && Aligned(bgrStride) && Aligned(rgb) && Aligned(rgbStride));
   72|       |
   73|    327|            const size_t A3 = A * 3;
   74|    327|            size_t size = width * 3;
   75|    327|            size_t aligned = AlignLo(width, A) * 3;
   76|       |
   77|  2.28k|            for (size_t row = 0; row < height; ++row)
  ------------------
  |  Branch (77:34): [True: 1.96k, False: 327]
  ------------------
   78|  1.96k|            {
   79|  68.4k|                for (size_t i = 0; i < aligned; i += A3)
  ------------------
  |  Branch (79:36): [True: 66.4k, False: 1.96k]
  ------------------
   80|  66.4k|                    BgrToRgb<align>(bgr + i, rgb + i);
   81|  1.96k|                if (aligned < size)
  ------------------
  |  Branch (81:21): [True: 1.84k, False: 113]
  ------------------
   82|  1.84k|                    BgrToRgb<false>(bgr + size - A3, rgb + size - A3);
   83|  1.96k|                bgr += bgrStride;
   84|  1.96k|                rgb += rgbStride;
   85|  1.96k|            }
   86|    327|        }

_ZN4Simd4Avx210RgbaToGrayEPDv4_x:
  106|  82.1k|        {
  107|  82.1k|            const __m256i lo = PackI32ToI16(RgbaToGray32(rgba[0]), RgbaToGray32(rgba[1]));
  108|  82.1k|            const __m256i hi = PackI32ToI16(RgbaToGray32(rgba[2]), RgbaToGray32(rgba[3]));
  109|  82.1k|            return PackI16ToU8(lo, hi);
  110|  82.1k|        }
_ZN4Simd4Avx212RgbaToGray32EDv4_x:
   98|   328k|        {
   99|   328k|            const __m256i g0a0 = _mm256_and_si256(_mm256_srli_si256(rgba, 1), K16_00FF);
  100|   328k|            const __m256i r0b0 = _mm256_and_si256(rgba, K16_00FF);
  101|   328k|            const __m256i weightedSum = _mm256_add_epi32(_mm256_madd_epi16(g0a0, K16_GREEN_0000), _mm256_madd_epi16(r0b0, K16_RED_BLUE));
  102|   328k|            return _mm256_srli_epi32(_mm256_add_epi32(weightedSum, K32_ROUND_TERM), Base::BGR_TO_GRAY_AVERAGING_SHIFT);
  103|   328k|        }

_ZN4Simd4Avx29GetEnableEv:
   66|     20|        {
   67|     20|            return SupportedByCPU() && SupportedByOS();
  ------------------
  |  Branch (67:20): [True: 20, False: 0]
  |  Branch (67:40): [True: 20, False: 0]
  ------------------
   68|     20|        }
_ZN4Simd4Avx213GetSlowGatherEv:
   73|     20|        {
   74|     20|            const char* vendorId = Base::VendorId();
   75|     20|            if (memcmp(vendorId, "GenuineIntel", 12) == 0)
  ------------------
  |  Branch (75:17): [True: 20, False: 0]
  ------------------
   76|     20|            {
   77|     20|                return false;
   78|     20|            }
   79|      0|            else if (memcmp(vendorId, "AuthenticAMD", 12) == 0)
  ------------------
  |  Branch (79:22): [True: 0, False: 0]
  ------------------
   80|      0|            {
   81|      0|                return true;
   82|      0|            }
   83|      0|            else 
   84|      0|                return true;
   85|     20|        }
_ZN4Simd4Avx214SupportedByCPUEv:
   37|     20|        {
   38|     20|            return
   39|     20|                Base::CheckBit(7, 0, Cpuid::Ebx, Cpuid::AVX2) &&
  ------------------
  |  Branch (39:17): [True: 20, False: 0]
  ------------------
   40|     20|                Base::CheckBit(7, 0, Cpuid::Ebx, Cpuid::BMI1) &&
  ------------------
  |  Branch (40:17): [True: 20, False: 0]
  ------------------
   41|     20|                Base::CheckBit(7, 0, Cpuid::Ebx, Cpuid::BMI2) &&
  ------------------
  |  Branch (41:17): [True: 20, False: 0]
  ------------------
   42|     20|                Base::CheckBit(1, 0, Cpuid::Ecx, Cpuid::OSXSAVE) &&
  ------------------
  |  Branch (42:17): [True: 20, False: 0]
  ------------------
   43|     20|                Base::CheckBit(1, 0, Cpuid::Ecx, Cpuid::AVX) &&
  ------------------
  |  Branch (43:17): [True: 20, False: 0]
  ------------------
   44|     20|                Base::CheckBit(1, 0, Cpuid::Ecx, Cpuid::FMA) &&
  ------------------
  |  Branch (44:17): [True: 20, False: 0]
  ------------------
   45|     20|                Base::CheckBit(1, 0, Cpuid::Ecx, Cpuid::F16C);
  ------------------
  |  Branch (45:17): [True: 20, False: 0]
  ------------------
   46|     20|        }
_ZN4Simd4Avx213SupportedByOSEv:
   49|     20|        {
   50|       |#if defined(_MSC_VER)
   51|       |            __try
   52|       |            {
   53|       |                __m256i value = _mm256_abs_epi8(_mm256_set1_epi8(1));// try to execute of AVX2 instructions;
   54|       |                return true;
   55|       |            }
   56|       |            __except (EXCEPTION_EXECUTE_HANDLER)
   57|       |            {
   58|       |                return false;
   59|       |            }
   60|       |#else
   61|     20|            return true;
   62|     20|#endif
   63|     20|        }

_ZN4Simd4Avx29GrayToBgrEPKhmmmPhm:
   65|    648|        {
   66|    648|            if (Aligned(bgr) && Aligned(gray) && Aligned(bgrStride) && Aligned(grayStride))
  ------------------
  |  Branch (66:17): [True: 648, False: 0]
  |  Branch (66:33): [True: 634, False: 14]
  |  Branch (66:50): [True: 634, False: 0]
  |  Branch (66:72): [True: 310, False: 324]
  ------------------
   67|    310|                GrayToBgr<true>(gray, width, height, grayStride, bgr, bgrStride);
   68|    338|            else
   69|    338|                GrayToBgr<false>(gray, width, height, grayStride, bgr, bgrStride);
   70|    648|        }
_ZN4Simd4Avx29GrayToBgrILb1EEEvPKhmmmPhm:
   41|    310|        {
   42|    310|            assert(width >= A);
   43|    310|            if (align)
  ------------------
  |  Branch (43:17): [Folded - Ignored]
  ------------------
   44|    310|                assert(Aligned(bgr) && Aligned(bgrStride) && Aligned(gray) && Aligned(grayStride));
   45|       |
   46|    310|            size_t alignedWidth = AlignLo(width, A);
   47|    620|            for (size_t row = 0; row < height; ++row)
  ------------------
  |  Branch (47:34): [True: 310, False: 310]
  ------------------
   48|    310|            {
   49|   161k|                for (size_t col = 0; col < alignedWidth; col += A)
  ------------------
  |  Branch (49:38): [True: 160k, False: 310]
  ------------------
   50|   160k|                {
   51|   160k|                    __m256i _gray = Load<align>((__m256i*)(gray + col));
   52|   160k|                    GrayToBgr<align>(bgr + 3 * col, _gray);
   53|   160k|                }
   54|    310|                if (alignedWidth != width)
  ------------------
  |  Branch (54:21): [True: 0, False: 310]
  ------------------
   55|      0|                {
   56|      0|                    __m256i _gray = Load<false>((__m256i*)(gray + width - A));
   57|      0|                    GrayToBgr<false>(bgr + 3 * (width - A), _gray);
   58|      0|                }
   59|    310|                gray += grayStride;
   60|    310|                bgr += bgrStride;
   61|    310|            }
   62|    310|        }
_ZN4Simd4Avx29GrayToBgrILb1EEEvPhDv4_x:
   34|   160k|        {
   35|   160k|            Store<align>((__m256i*)bgr + 0, GrayToBgr<0>(gray));
   36|   160k|            Store<align>((__m256i*)bgr + 1, GrayToBgr<1>(gray));
   37|   160k|            Store<align>((__m256i*)bgr + 2, GrayToBgr<2>(gray));
   38|   160k|        }
_ZN4Simd4Avx29GrayToBgrILb0EEEvPhDv4_x:
   34|   201k|        {
   35|   201k|            Store<align>((__m256i*)bgr + 0, GrayToBgr<0>(gray));
   36|   201k|            Store<align>((__m256i*)bgr + 1, GrayToBgr<1>(gray));
   37|   201k|            Store<align>((__m256i*)bgr + 2, GrayToBgr<2>(gray));
   38|   201k|        }
_ZN4Simd4Avx29GrayToBgrILb0EEEvPKhmmmPhm:
   41|    338|        {
   42|    338|            assert(width >= A);
   43|    338|            if (align)
  ------------------
  |  Branch (43:17): [Folded - Ignored]
  ------------------
   44|      0|                assert(Aligned(bgr) && Aligned(bgrStride) && Aligned(gray) && Aligned(grayStride));
   45|       |
   46|    338|            size_t alignedWidth = AlignLo(width, A);
   47|  3.75k|            for (size_t row = 0; row < height; ++row)
  ------------------
  |  Branch (47:34): [True: 3.42k, False: 338]
  ------------------
   48|  3.42k|            {
   49|   202k|                for (size_t col = 0; col < alignedWidth; col += A)
  ------------------
  |  Branch (49:38): [True: 199k, False: 3.42k]
  ------------------
   50|   199k|                {
   51|   199k|                    __m256i _gray = Load<align>((__m256i*)(gray + col));
   52|   199k|                    GrayToBgr<align>(bgr + 3 * col, _gray);
   53|   199k|                }
   54|  3.42k|                if (alignedWidth != width)
  ------------------
  |  Branch (54:21): [True: 2.39k, False: 1.02k]
  ------------------
   55|  2.39k|                {
   56|  2.39k|                    __m256i _gray = Load<false>((__m256i*)(gray + width - A));
   57|  2.39k|                    GrayToBgr<false>(bgr + 3 * (width - A), _gray);
   58|  2.39k|                }
   59|  3.42k|                gray += grayStride;
   60|  3.42k|                bgr += bgrStride;
   61|  3.42k|            }
   62|    338|        }

_ZN4Simd4Avx210GrayToBgraEPKhmmmPhmh:
   72|    324|        {
   73|    324|            if (Aligned(bgra) && Aligned(gray) && Aligned(bgraStride) && Aligned(grayStride))
  ------------------
  |  Branch (73:17): [True: 324, False: 0]
  |  Branch (73:34): [True: 320, False: 4]
  |  Branch (73:51): [True: 320, False: 0]
  |  Branch (73:74): [True: 158, False: 162]
  ------------------
   74|    158|                GrayToBgra<true>(gray, width, height, grayStride, bgra, bgraStride, alpha);
   75|    166|            else
   76|    166|                GrayToBgra<false>(gray, width, height, grayStride, bgra, bgraStride, alpha);
   77|    324|        }
_ZN4Simd4Avx210GrayToBgraILb1EEEvPKhmmmPhmh:
   46|    158|        {
   47|    158|            assert(width >= A);
   48|    158|            if (align)
  ------------------
  |  Branch (48:17): [Folded - Ignored]
  ------------------
   49|    158|                assert(Aligned(bgra) && Aligned(bgraStride) && Aligned(gray) && Aligned(grayStride));
   50|       |
   51|    158|            __m256i _alpha = _mm256_set1_epi8(alpha);
   52|    158|            __m256i permuteOffsets = _mm256_setr_epi32(0, 2, 4, 6, 1, 3, 5, 7);
   53|    158|            size_t alignedWidth = AlignLo(width, A);
   54|    827|            for (size_t row = 0; row < height; ++row)
  ------------------
  |  Branch (54:34): [True: 669, False: 158]
  ------------------
   55|    669|            {
   56|  83.1k|                for (size_t col = 0; col < alignedWidth; col += A)
  ------------------
  |  Branch (56:38): [True: 82.5k, False: 669]
  ------------------
   57|  82.5k|                {
   58|  82.5k|                    __m256i _gray = _mm256_permutevar8x32_epi32(Load<align>((__m256i*)(gray + col)), permuteOffsets);
   59|  82.5k|                    GrayToBgra<align>(bgra + 4 * col, _gray, _alpha);
   60|  82.5k|                }
   61|    669|                if (alignedWidth != width)
  ------------------
  |  Branch (61:21): [True: 0, False: 669]
  ------------------
   62|      0|                {
   63|      0|                    __m256i _gray = _mm256_permutevar8x32_epi32(Load<false>((__m256i*)(gray + width - A)), permuteOffsets);
   64|      0|                    GrayToBgra<false>(bgra + 4 * (width - A), _gray, _alpha);
   65|      0|                }
   66|    669|                gray += grayStride;
   67|    669|                bgra += bgraStride;
   68|    669|            }
   69|    158|        }
_ZN4Simd4Avx210GrayToBgraILb1EEEvPhDv4_xS3_:
   33|  82.5k|        {
   34|  82.5k|            __m256i bgLo = _mm256_unpacklo_epi8(gray, gray);
   35|  82.5k|            __m256i bgHi = _mm256_unpackhi_epi8(gray, gray);
   36|  82.5k|            __m256i raLo = _mm256_unpacklo_epi8(gray, alpha);
   37|  82.5k|            __m256i raHi = _mm256_unpackhi_epi8(gray, alpha);
   38|       |
   39|  82.5k|            Store<align>((__m256i*)bgra + 0, _mm256_unpacklo_epi16(bgLo, raLo));
   40|  82.5k|            Store<align>((__m256i*)bgra + 1, _mm256_unpackhi_epi16(bgLo, raLo));
   41|  82.5k|            Store<align>((__m256i*)bgra + 2, _mm256_unpacklo_epi16(bgHi, raHi));
   42|  82.5k|            Store<align>((__m256i*)bgra + 3, _mm256_unpackhi_epi16(bgHi, raHi));
   43|  82.5k|        }
_ZN4Simd4Avx210GrayToBgraILb0EEEvPhDv4_xS3_:
   33|  98.6k|        {
   34|  98.6k|            __m256i bgLo = _mm256_unpacklo_epi8(gray, gray);
   35|  98.6k|            __m256i bgHi = _mm256_unpackhi_epi8(gray, gray);
   36|  98.6k|            __m256i raLo = _mm256_unpacklo_epi8(gray, alpha);
   37|  98.6k|            __m256i raHi = _mm256_unpackhi_epi8(gray, alpha);
   38|       |
   39|  98.6k|            Store<align>((__m256i*)bgra + 0, _mm256_unpacklo_epi16(bgLo, raLo));
   40|  98.6k|            Store<align>((__m256i*)bgra + 1, _mm256_unpackhi_epi16(bgLo, raLo));
   41|  98.6k|            Store<align>((__m256i*)bgra + 2, _mm256_unpacklo_epi16(bgHi, raHi));
   42|  98.6k|            Store<align>((__m256i*)bgra + 3, _mm256_unpackhi_epi16(bgHi, raHi));
   43|  98.6k|        }
_ZN4Simd4Avx210GrayToBgraILb0EEEvPKhmmmPhmh:
   46|    166|        {
   47|    166|            assert(width >= A);
   48|    166|            if (align)
  ------------------
  |  Branch (48:17): [Folded - Ignored]
  ------------------
   49|      0|                assert(Aligned(bgra) && Aligned(bgraStride) && Aligned(gray) && Aligned(grayStride));
   50|       |
   51|    166|            __m256i _alpha = _mm256_set1_epi8(alpha);
   52|    166|            __m256i permuteOffsets = _mm256_setr_epi32(0, 2, 4, 6, 1, 3, 5, 7);
   53|    166|            size_t alignedWidth = AlignLo(width, A);
   54|  1.36k|            for (size_t row = 0; row < height; ++row)
  ------------------
  |  Branch (54:34): [True: 1.19k, False: 166]
  ------------------
   55|  1.19k|            {
   56|  98.6k|                for (size_t col = 0; col < alignedWidth; col += A)
  ------------------
  |  Branch (56:38): [True: 97.4k, False: 1.19k]
  ------------------
   57|  97.4k|                {
   58|  97.4k|                    __m256i _gray = _mm256_permutevar8x32_epi32(Load<align>((__m256i*)(gray + col)), permuteOffsets);
   59|  97.4k|                    GrayToBgra<align>(bgra + 4 * col, _gray, _alpha);
   60|  97.4k|                }
   61|  1.19k|                if (alignedWidth != width)
  ------------------
  |  Branch (61:21): [True: 1.19k, False: 0]
  ------------------
   62|  1.19k|                {
   63|  1.19k|                    __m256i _gray = _mm256_permutevar8x32_epi32(Load<false>((__m256i*)(gray + width - A)), permuteOffsets);
   64|  1.19k|                    GrayToBgra<false>(bgra + 4 * (width - A), _gray, _alpha);
   65|  1.19k|                }
   66|  1.19k|                gray += grayStride;
   67|  1.19k|                bgra += bgraStride;
   68|  1.19k|            }
   69|    166|        }

_ZN4Simd4Avx217ImagePgmTxtLoaderC2ERKNS_16ImageLoaderParamE:
   37|     80|        {
   38|     80|        }
_ZN4Simd4Avx217ImagePgmTxtLoader13SetConvertersEv:
   41|     56|        {
   42|     56|            Sse41::ImagePgmTxtLoader::SetConverters();
   43|     56|            if (_image.width >= A)
  ------------------
  |  Branch (43:17): [True: 56, False: 0]
  ------------------
   44|     56|            {
   45|     56|                switch (_param.format)
   46|     56|                {
   47|     14|                case SimdPixelFormatBgr24: _toAny = Avx2::GrayToBgr; break;
  ------------------
  |  Branch (47:17): [True: 14, False: 42]
  ------------------
   48|     14|                case SimdPixelFormatBgra32: _toBgra = Avx2::GrayToBgra; break;
  ------------------
  |  Branch (48:17): [True: 14, False: 42]
  ------------------
   49|     14|                case SimdPixelFormatRgb24: _toAny = Avx2::GrayToBgr; break;
  ------------------
  |  Branch (49:17): [True: 14, False: 42]
  ------------------
   50|      0|                case SimdPixelFormatRgba32: _toBgra = Avx2::GrayToBgra; break;
  ------------------
  |  Branch (50:17): [True: 0, False: 56]
  ------------------
   51|     14|                default: break;
  ------------------
  |  Branch (51:17): [True: 14, False: 42]
  ------------------
   52|     56|                }
   53|     56|            }
   54|     56|        }
_ZN4Simd4Avx217ImagePgmBinLoaderC2ERKNS_16ImageLoaderParamE:
   60|    292|        {
   61|    292|        }
_ZN4Simd4Avx217ImagePgmBinLoader13SetConvertersEv:
   64|    292|        {
   65|    292|            Sse41::ImagePgmBinLoader::SetConverters();
   66|    292|            if (_image.width >= A)
  ------------------
  |  Branch (66:17): [True: 96, False: 196]
  ------------------
   67|     96|            {
   68|     96|                switch (_param.format)
   69|     96|                {
   70|     24|                case SimdPixelFormatBgr24: _toAny = Avx2::GrayToBgr; break;
  ------------------
  |  Branch (70:17): [True: 24, False: 72]
  ------------------
   71|     24|                case SimdPixelFormatBgra32: _toBgra = Avx2::GrayToBgra; break;
  ------------------
  |  Branch (71:17): [True: 24, False: 72]
  ------------------
   72|     24|                case SimdPixelFormatRgb24: _toAny = Avx2::GrayToBgr; break;
  ------------------
  |  Branch (72:17): [True: 24, False: 72]
  ------------------
   73|      0|                case SimdPixelFormatRgba32: _toBgra = Avx2::GrayToBgra; break;
  ------------------
  |  Branch (73:17): [True: 0, False: 96]
  ------------------
   74|     24|                default: break;
  ------------------
  |  Branch (74:17): [True: 24, False: 72]
  ------------------
   75|     96|                }
   76|     96|            }
   77|    292|        }
_ZN4Simd4Avx217ImagePpmTxtLoaderC2ERKNS_16ImageLoaderParamE:
   83|    120|        {
   84|    120|        }
_ZN4Simd4Avx217ImagePpmTxtLoader13SetConvertersEv:
   87|     96|        {
   88|     96|            Sse41::ImagePpmTxtLoader::SetConverters();
   89|     96|            if (_image.width >= A)
  ------------------
  |  Branch (89:17): [True: 96, False: 0]
  ------------------
   90|     96|            {
   91|     96|                switch (_param.format)
   92|     96|                {
   93|     24|                case SimdPixelFormatGray8: _toAny = Avx2::RgbToGray; break;
  ------------------
  |  Branch (93:17): [True: 24, False: 72]
  ------------------
   94|     24|                case SimdPixelFormatBgr24: _toAny = Avx2::BgrToRgb; break;
  ------------------
  |  Branch (94:17): [True: 24, False: 72]
  ------------------
   95|     24|                case SimdPixelFormatBgra32: _toBgra = Avx2::RgbToBgra; break;
  ------------------
  |  Branch (95:17): [True: 24, False: 72]
  ------------------
   96|      0|                case SimdPixelFormatRgba32: _toBgra = Avx2::BgrToBgra; break;
  ------------------
  |  Branch (96:17): [True: 0, False: 96]
  ------------------
   97|     24|                default: break;
  ------------------
  |  Branch (97:17): [True: 24, False: 72]
  ------------------
   98|     96|                }
   99|     96|            }
  100|     96|        }
_ZN4Simd4Avx217ImagePpmBinLoaderC2ERKNS_16ImageLoaderParamE:
  106|    284|        {
  107|    284|        }
_ZN4Simd4Avx217ImagePpmBinLoader13SetConvertersEv:
  110|    272|        {
  111|    272|            Sse41::ImagePpmBinLoader::SetConverters();
  112|    272|            if (_image.width >= A)
  ------------------
  |  Branch (112:17): [True: 140, False: 132]
  ------------------
  113|    140|            {
  114|    140|                switch (_param.format)
  115|    140|                {
  116|     35|                case SimdPixelFormatGray8: _toAny = Avx2::RgbToGray; break;
  ------------------
  |  Branch (116:17): [True: 35, False: 105]
  ------------------
  117|     35|                case SimdPixelFormatBgr24: _toAny = Avx2::BgrToRgb; break;
  ------------------
  |  Branch (117:17): [True: 35, False: 105]
  ------------------
  118|     35|                case SimdPixelFormatBgra32: _toBgra = Avx2::RgbToBgra; break;
  ------------------
  |  Branch (118:17): [True: 35, False: 105]
  ------------------
  119|      0|                case SimdPixelFormatRgba32: _toBgra = Avx2::BgrToBgra; break;
  ------------------
  |  Branch (119:17): [True: 0, False: 140]
  ------------------
  120|     35|                default: break;
  ------------------
  |  Branch (120:17): [True: 35, False: 105]
  ------------------
  121|    140|                }
  122|    140|            }
  123|    272|        }
_ZN4Simd4Avx217CreateImageLoaderERKNS_16ImageLoaderParamE:
  128|  2.39k|        {
  129|  2.39k|            switch (param.file)
  130|  2.39k|            {
  131|     80|            case SimdImageFilePgmTxt: return new ImagePgmTxtLoader(param);
  ------------------
  |  Branch (131:13): [True: 80, False: 2.31k]
  ------------------
  132|    292|            case SimdImageFilePgmBin: return new ImagePgmBinLoader(param);
  ------------------
  |  Branch (132:13): [True: 292, False: 2.10k]
  ------------------
  133|    120|            case SimdImageFilePpmTxt: return new ImagePpmTxtLoader(param);
  ------------------
  |  Branch (133:13): [True: 120, False: 2.27k]
  ------------------
  134|    284|            case SimdImageFilePpmBin: return new ImagePpmBinLoader(param);
  ------------------
  |  Branch (134:13): [True: 284, False: 2.11k]
  ------------------
  135|    168|            case SimdImageFilePng: return new Sse41::ImagePngLoader(param);
  ------------------
  |  Branch (135:13): [True: 168, False: 2.22k]
  ------------------
  136|  1.45k|            case SimdImageFileJpeg: return new Base::ImageJpegLoader(param);
  ------------------
  |  Branch (136:13): [True: 1.45k, False: 944]
  ------------------
  137|      0|            default:
  ------------------
  |  Branch (137:13): [True: 0, False: 2.39k]
  ------------------
  138|      0|                return NULL;
  139|  2.39k|            }
  140|  2.39k|        }
_ZN4Simd4Avx219ImageLoadFromMemoryEPKhmPmS3_S3_P19SimdPixelFormatType:
  143|  2.40k|        {
  144|  2.40k|            ImageLoaderParam param(data, size, *format);
  145|  2.40k|            if (param.Validate())
  ------------------
  |  Branch (145:17): [True: 2.39k, False: 8]
  ------------------
  146|  2.39k|            {
  147|  2.39k|                Holder<ImageLoader> loader(CreateImageLoader(param));
  148|  2.39k|                if (loader)
  ------------------
  |  Branch (148:21): [True: 2.39k, False: 0]
  ------------------
  149|  2.39k|                {
  150|  2.39k|                    if (loader->FromStream())
  ------------------
  |  Branch (150:25): [True: 336, False: 2.06k]
  ------------------
  151|    336|                        return loader->Release(stride, width, height, format);
  152|  2.39k|                }
  153|  2.39k|            }
  154|  2.06k|            return NULL;
  155|  2.40k|        }

_ZN4Simd8Avx512bw9GetEnableEv:
   64|     10|        {
   65|     10|            return SupportedByCPU() && SupportedByOS();
  ------------------
  |  Branch (65:20): [True: 0, False: 10]
  |  Branch (65:40): [True: 0, False: 0]
  ------------------
   66|     10|        }
_ZN4Simd8Avx512bw14SupportedByCPUEv:
   37|     10|        {
   38|     10|            return
   39|     10|                Base::CheckBit(7, 0, Cpuid::Ebx, Cpuid::AVX512_F) &&
  ------------------
  |  Branch (39:17): [True: 0, False: 10]
  ------------------
   40|     10|                Base::CheckBit(7, 0, Cpuid::Ebx, Cpuid::AVX512_CD) &&
  ------------------
  |  Branch (40:17): [True: 0, False: 0]
  ------------------
   41|     10|                Base::CheckBit(7, 0, Cpuid::Ebx, Cpuid::AVX512_DQ) &&
  ------------------
  |  Branch (41:17): [True: 0, False: 0]
  ------------------
   42|     10|                Base::CheckBit(7, 0, Cpuid::Ebx, Cpuid::AVX512_BW) &&
  ------------------
  |  Branch (42:17): [True: 0, False: 0]
  ------------------
   43|     10|                Base::CheckBit(7, 0, Cpuid::Ebx, Cpuid::AVX512_VL);
  ------------------
  |  Branch (43:17): [True: 0, False: 0]
  ------------------
   44|     10|        }

_ZN4Simd4Base9RgbToBgraEPKhmmmPhmh:
  111|    130|        {
  112|    130|            size_t rgbGap = rgbStride - width * 3;
  113|    130|            size_t bgraGap = bgraStride - width * 4;
  114|   791k|            for (size_t row = 0; row < height; ++row)
  ------------------
  |  Branch (114:34): [True: 791k, False: 130]
  ------------------
  115|   791k|            {
  116|  2.16M|                for (size_t col = 0; col < width; ++col, rgb += 3, bgra += 4)
  ------------------
  |  Branch (116:38): [True: 1.36M, False: 791k]
  ------------------
  117|  1.36M|                {
  118|  1.36M|                    bgra[0] = rgb[2];
  119|  1.36M|                    bgra[1] = rgb[1];
  120|  1.36M|                    bgra[2] = rgb[0];
  121|  1.36M|                    bgra[3] = alpha;
  122|  1.36M|                }
  123|   791k|                rgb += rgbGap;
  124|   791k|                bgra += bgraGap;
  125|   791k|            }
  126|    130|        }

_ZN4Simd4Base9RgbToGrayEPKhmmmPhm:
   44|    130|        {
   45|   791k|            for (size_t row = 0; row < height; ++row)
  ------------------
  |  Branch (45:34): [True: 791k, False: 130]
  ------------------
   46|   791k|            {
   47|   791k|                const uint8_t* pRgb = rgb + row * rgbStride;
   48|   791k|                uint8_t* pGray = gray + row * grayStride;
   49|  2.16M|                for (const uint8_t* pGrayEnd = pGray + width; pGray < pGrayEnd; pGray += 1, pRgb += 3)
  ------------------
  |  Branch (49:63): [True: 1.36M, False: 791k]
  ------------------
   50|  1.36M|                {
   51|  1.36M|                    *pGray = BgrToGray(pRgb[2], pRgb[1], pRgb[0]);
   52|  1.36M|                }
   53|   791k|            }
   54|    130|        }

_ZN4Simd4Base8BgrToRgbEPKhmmmPhm:
   31|    130|        {
   32|    130|            size_t size = width * 3;
   33|   791k|            for (size_t row = 0; row < height; ++row)
  ------------------
  |  Branch (33:34): [True: 791k, False: 130]
  ------------------
   34|   791k|            {
   35|  2.16M|                for (size_t i = 0; i < size; i += 3)
  ------------------
  |  Branch (35:36): [True: 1.36M, False: 791k]
  ------------------
   36|  1.36M|                {
   37|  1.36M|                    rgb[i + 0] = bgr[i + 2];
   38|  1.36M|                    rgb[i + 1] = bgr[i + 1];
   39|  1.36M|                    rgb[i + 2] = bgr[i + 0];
   40|  1.36M|                }
   41|   791k|                bgr += bgrStride;
   42|   791k|                rgb += rgbStride;
   43|   791k|            }
   44|    130|        }

_ZN4Simd4Base9BgraToBgrEPKhmPhb:
   31|  3.42k|        {
   32|  2.12M|            for (size_t i = (lastRow ? 1 : 0); i < size; ++i, bgr += 3, bgra += 4)
  ------------------
  |  Branch (32:30): [True: 40, False: 3.38k]
  |  Branch (32:48): [True: 2.12M, False: 3.42k]
  ------------------
   33|  2.12M|            {
   34|  2.12M|                *(int32_t*)bgr = (*(int32_t*)bgra);
   35|  2.12M|            }
   36|  3.42k|            if (lastRow)
  ------------------
  |  Branch (36:17): [True: 40, False: 3.38k]
  ------------------
   37|     40|            {
   38|     40|                bgr[0] = bgra[0];
   39|     40|                bgr[1] = bgra[1];
   40|     40|                bgr[2] = bgra[2];
   41|     40|            }
   42|  3.42k|        }
_ZN4Simd4Base9BgraToBgrEPKhmmmPhm:
   45|     40|        {
   46|  3.42k|            for (size_t row = 1; row < height; ++row)
  ------------------
  |  Branch (46:34): [True: 3.38k, False: 40]
  ------------------
   47|  3.38k|            {
   48|  3.38k|                BgraToBgr(bgra, width, bgr, false);
   49|  3.38k|                bgr += bgrStride;
   50|  3.38k|                bgra += bgraStride;
   51|  3.38k|            }
   52|     40|            BgraToBgr(bgra, width, bgr, true);
   53|     40|        }
_ZN4Simd4Base9BgraToRgbEPKhmmmPhm:
   56|     40|        {
   57|     40|            size_t bgraGap = bgraStride - width * 4;
   58|     40|            size_t rgbGap = rgbStride - width * 3;
   59|  3.46k|            for (size_t row = 0; row < height; ++row)
  ------------------
  |  Branch (59:34): [True: 3.42k, False: 40]
  ------------------
   60|  3.42k|            {
   61|  2.12M|                for (size_t col = 0; col < width; ++col, bgra += 4, rgb += 3)
  ------------------
  |  Branch (61:38): [True: 2.12M, False: 3.42k]
  ------------------
   62|  2.12M|                {
   63|  2.12M|                    rgb[2] = bgra[0];
   64|  2.12M|                    rgb[1] = bgra[1];
   65|  2.12M|                    rgb[0] = bgra[2];
   66|  2.12M|                }
   67|  3.42k|                bgra += bgraGap;
   68|  3.42k|                rgb += rgbGap;
   69|  3.42k|            }
   70|     40|        }
_ZN4Simd4Base10BgraToRgbaEPKhmmmPhm:
   73|     40|        {
   74|     40|            size_t bgraGap = bgraStride - width * 4;
   75|     40|            size_t rgbaGap = rgbaStride - width * 4;
   76|  3.46k|            for (size_t row = 0; row < height; ++row)
  ------------------
  |  Branch (76:34): [True: 3.42k, False: 40]
  ------------------
   77|  3.42k|            {
   78|  2.12M|                for (size_t col = 0; col < width; ++col, bgra += 4, rgba += 4)
  ------------------
  |  Branch (78:38): [True: 2.12M, False: 3.42k]
  ------------------
   79|  2.12M|                {
   80|  2.12M|                    rgba[2] = bgra[0];
   81|  2.12M|                    rgba[1] = bgra[1];
   82|  2.12M|                    rgba[0] = bgra[2];
   83|  2.12M|                    rgba[3] = bgra[3];
   84|  2.12M|                }
   85|  3.42k|                bgra += bgraGap;
   86|  3.42k|                rgba += rgbaGap;
   87|  3.42k|            }
   88|     40|        }

_ZN4Simd4Base10RgbaToGrayEPKhmmmPhm:
   44|     40|        {
   45|  3.46k|            for (size_t row = 0; row < height; ++row)
  ------------------
  |  Branch (45:34): [True: 3.42k, False: 40]
  ------------------
   46|  3.42k|            {
   47|  3.42k|                const uint8_t* pRgba = rgba + row * rgbaStride;
   48|  3.42k|                uint8_t* pGray = gray + row * grayStride;
   49|  2.12M|                for (const uint8_t* pGrayEnd = pGray + width; pGray < pGrayEnd; pGray += 1, pRgba += 4)
  ------------------
  |  Branch (49:63): [True: 2.12M, False: 3.42k]
  ------------------
   50|  2.12M|                {
   51|  2.12M|                    *pGray = BgrToGray(pRgba[2], pRgba[1], pRgba[0]);
   52|  2.12M|                }
   53|  3.42k|            }
   54|     40|        }

_ZN4Simd4Base8CheckBitEiiNS_5Cpuid8RegisterENS1_3BitE:
   93|    330|        {
   94|    330|            unsigned int registers[4] = { 0, 0, 0, 0 };
   95|    330|            if (!CpuId(eax, ecx, registers))
  ------------------
  |  Branch (95:17): [True: 0, False: 330]
  ------------------
   96|      0|                return false;
   97|    330|            return (registers[index] & bit) == bit;
   98|    330|        }
_ZN4Simd4Base8VendorIdEv:
  101|     20|        {
  102|     20|            unsigned int regs[4] = { 0, 0, 0, 0};
  103|     20|            CpuId(0, 0, regs);
  104|     20|            static unsigned int vendorId[4] = { regs[1], regs[3], regs[2], 0 };
  105|     20|            return (char*)vendorId;
  106|     20|        }
_ZN4Simd4Base15CpuThreadNumberEv:
  139|      2|        {
  140|      2|            return std::thread::hardware_concurrency();
  141|      2|        }
_ZN4Simd4Base15CpuSocketNumberEv:
  274|      2|        {
  275|      2|            uint32_t number = 0;
  276|      2|#if !defined(__APPLE__)
  277|      2|            ::FILE * p = ::popen("lscpu -b -p=Socket 2>/dev/null | grep -v '^#' | sort -u 2>/dev/null | wc -l 2>/dev/null", "r");
  278|      2|            if (p)
  ------------------
  |  Branch (278:17): [True: 2, False: 0]
  ------------------
  279|      2|            {
  280|      2|                char buffer[PATH_MAX];
  281|      4|                while (::fgets(buffer, PATH_MAX, p));
  ------------------
  |  Branch (281:24): [True: 2, False: 2]
  ------------------
  282|      2|                number = ::atoi(buffer);
  283|      2|                ::pclose(p);
  284|      2|            }
  285|      2|#endif
  286|      2|            return number;
  287|      2|        }
_ZN4Simd4Base13CpuCoreNumberEv:
  290|      2|        {
  291|      2|            uint32_t number = 0;
  292|      2|#if !defined(__APPLE__)
  293|      2|            ::FILE * p = ::popen("lscpu -b -p=Core 2>/dev/null | grep -v '^#' | sort -u 2>/dev/null | wc -l 2>/dev/null", "r");
  294|      2|            if (p)
  ------------------
  |  Branch (294:17): [True: 2, False: 0]
  ------------------
  295|      2|            {
  296|      2|                char buffer[PATH_MAX];
  297|      4|                while (::fgets(buffer, PATH_MAX, p));
  ------------------
  |  Branch (297:24): [True: 2, False: 2]
  ------------------
  298|      2|                number = ::atoi(buffer);
  299|      2|                ::pclose(p);
  300|      2|            }
  301|      2|#endif
  302|      2|            return number;
  303|      2|        }
_ZN4Simd4Base12CpuCacheSizeEm:
  312|      6|        {
  313|      6|            switch (level)
  314|      6|            {
  315|      2|            case 1:
  ------------------
  |  Branch (315:13): [True: 2, False: 4]
  ------------------
  316|      2|            {
  317|      2|                const size_t sz = ::sysconf(_SC_LEVEL1_DCACHE_SIZE) < 0 ? 0 : ::sysconf(_SC_LEVEL1_DCACHE_SIZE);
  ------------------
  |  Branch (317:35): [True: 0, False: 2]
  ------------------
  318|      2|                return CorrectIfZero(sz, 32 * 1024);
  319|      0|            }
  320|      2|            case 2:
  ------------------
  |  Branch (320:13): [True: 2, False: 4]
  ------------------
  321|      2|            {
  322|      2|                const size_t sz = ::sysconf(_SC_LEVEL2_CACHE_SIZE) < 0 ? 0 : ::sysconf(_SC_LEVEL2_CACHE_SIZE);
  ------------------
  |  Branch (322:35): [True: 0, False: 2]
  ------------------
  323|      2|                return CorrectIfZero(sz, 256 * 1024);
  324|      0|            }
  325|      2|            case 3:
  ------------------
  |  Branch (325:13): [True: 2, False: 4]
  ------------------
  326|      2|            {
  327|      2|                const size_t sz = ::sysconf(_SC_LEVEL3_CACHE_SIZE) < 0 ? 0 : ::sysconf(_SC_LEVEL3_CACHE_SIZE);
  ------------------
  |  Branch (327:35): [True: 0, False: 2]
  ------------------
  328|      2|                return CorrectIfZero(sz, 2048 * 1024);
  329|      0|            }
  330|      0|            default:
  ------------------
  |  Branch (330:13): [True: 0, False: 6]
  ------------------
  331|      0|                return 0;
  332|      6|            }
  333|      6|        }
_ZN4Simd4Base10CpuRamSizeEv:
  348|      2|        {
  349|      2|            uint64_t size = 0;
  350|      2|#if !defined(__APPLE__)
  351|      2|            ::FILE* file = ::popen("grep MemTotal /proc/meminfo | awk '{printf \"%d\", $2 }'", "r");
  352|      2|            if (file)
  ------------------
  |  Branch (352:17): [True: 2, False: 0]
  ------------------
  353|      2|            {
  354|      2|                char buf[PATH_MAX];
  355|      4|                while (::fgets(buf, PATH_MAX, file));
  ------------------
  |  Branch (355:24): [True: 2, False: 2]
  ------------------
  356|      2|                size = atoll(buf) * 1024;
  357|      2|                ::pclose(file);
  358|      2|            }
  359|      2|#endif
  360|      2|            return size;
  361|      2|        }
_ZN4Simd4Base8CpuModelEv:
  364|      2|        {
  365|      2|            std::string model;
  366|      2|#if !defined(__APPLE__)
  367|      2|            ::FILE* file = ::popen("lscpu | grep 'Model name:' | sed -r 's/Model name:\\s{1,}//g'", "r");
  368|      2|            if (file)
  ------------------
  |  Branch (368:17): [True: 2, False: 0]
  ------------------
  369|      2|            {
  370|      2|                char buffer[PATH_MAX];
  371|      4|                while (::fgets(buffer, PATH_MAX, file));
  ------------------
  |  Branch (371:24): [True: 2, False: 2]
  ------------------
  372|      2|                model = buffer;
  373|      2|                model = model.substr(0, model.find('\n'));
  374|      2|                ::pclose(file);
  375|      2|            }
  376|      2|#endif
  377|      2|            return model;
  378|      2|        }
_ZN4Simd4Base5CpuIdEiiPj:
   75|    350|        {
   76|       |#if defined(_WIN32)
   77|       |            __cpuidex((int*)registers, eax, ecx);
   78|       |#elif (defined __GNUC__)
   79|    350|            if (__get_cpuid_max(0, NULL) < eax)
  ------------------
  |  Branch (79:17): [True: 0, False: 350]
  ------------------
   80|      0|                return false;
   81|    350|            __cpuid_count(eax, ecx,
   82|    350|                registers[Cpuid::Eax],
   83|    350|                registers[Cpuid::Ebx],
   84|    350|                registers[Cpuid::Ecx],
   85|    350|                registers[Cpuid::Edx]);
   86|       |#else
   87|       |#error Do not know how to detect CPU info!
   88|       |#endif
   89|    350|            return true;
   90|    350|        }
_ZN4Simd4Base13CorrectIfZeroEmm:
  306|      6|        {
  307|      6|            return value ? value : otherwise;
  ------------------
  |  Branch (307:20): [True: 6, False: 0]
  ------------------
  308|      6|        }

_ZN4Simd4Base9GrayToBgrEPKhmmmPhm:
   38|    240|        {
   39|  1.29M|            for (size_t row = 0; row < height; ++row)
  ------------------
  |  Branch (39:34): [True: 1.29M, False: 240]
  ------------------
   40|  1.29M|            {
   41|  8.22M|                for (size_t col = 0, offset = 0; col < width; ++col, offset += 3)
  ------------------
  |  Branch (41:50): [True: 6.92M, False: 1.29M]
  ------------------
   42|  6.92M|                    GrayToBgr(gray[col], bgr + offset);
   43|  1.29M|                gray += grayStride;
   44|  1.29M|                bgr += bgrStride;
   45|  1.29M|            }
   46|    240|        }
_ZN4Simd4Base9GrayToBgrERKhPh:
   31|  6.92M|        {
   32|  6.92M|            bgr[0] = gray;
   33|  6.92M|            bgr[1] = gray;
   34|  6.92M|            bgr[2] = gray;
   35|  6.92M|        }

_ZN4Simd4Base10GrayToBgraEPKhmmmPhmh:
   40|    120|        {
   41|   649k|            for (size_t row = 0; row < height; ++row)
  ------------------
  |  Branch (41:34): [True: 649k, False: 120]
  ------------------
   42|   649k|            {
   43|  4.11M|                for (size_t col = 0; col < width; ++col)
  ------------------
  |  Branch (43:38): [True: 3.46M, False: 649k]
  ------------------
   44|  3.46M|                    ((uint32_t*)bgra)[col] = GrayToBgra(gray[col], alpha);
   45|   649k|                gray += grayStride;
   46|   649k|                bgra += bgraStride;
   47|   649k|            }
   48|    120|        }
_ZN4Simd4Base10GrayToBgraEjj:
   31|  3.46M|        {
   32|       |#ifdef SIMD_BIG_ENDIAN
   33|       |            return alpha | (gray << 8) | (gray << 16) | (gray << 24);
   34|       |#else
   35|  3.46M|            return gray | (gray << 8) | (gray << 16) | (alpha << 24);
   36|  3.46M|#endif
   37|  3.46M|        }

_ZN4Simd16ImageLoaderParamC2EPKhm19SimdPixelFormatType:
   61|  2.40k|    {
   62|  2.40k|    }
_ZN4Simd16ImageLoaderParam8ValidateEv:
   65|  2.40k|    {
   66|  2.40k|        if (size >= 3)
  ------------------
  |  Branch (66:13): [True: 2.40k, False: 0]
  ------------------
   67|  2.40k|        {
   68|  2.40k|            if (data[0] == 'P' && data[2] == '\n')
  ------------------
  |  Branch (68:17): [True: 784, False: 1.62k]
  |  Branch (68:35): [True: 776, False: 8]
  ------------------
   69|    776|            {
   70|    776|                if (data[1] == '2')
  ------------------
  |  Branch (70:21): [True: 80, False: 696]
  ------------------
   71|     80|                    file = SimdImageFilePgmTxt;
   72|    776|                if (data[1] == '3')
  ------------------
  |  Branch (72:21): [True: 120, False: 656]
  ------------------
   73|    120|                    file = SimdImageFilePpmTxt;
   74|    776|                if (data[1] == '5')
  ------------------
  |  Branch (74:21): [True: 292, False: 484]
  ------------------
   75|    292|                    file = SimdImageFilePgmBin;
   76|    776|                if (data[1] == '6')
  ------------------
  |  Branch (76:21): [True: 284, False: 492]
  ------------------
   77|    284|                    file = SimdImageFilePpmBin;
   78|    776|            }
   79|  2.40k|        }
   80|  2.40k|        if (size >= 8)
  ------------------
  |  Branch (80:13): [True: 2.40k, False: 0]
  ------------------
   81|  2.40k|        {
   82|  2.40k|            const uint8_t SIGNATURE[8] = { 137, 80, 78, 71, 13, 10, 26, 10 };
   83|  2.40k|            if(memcmp(data, SIGNATURE, 8) == 0)
  ------------------
  |  Branch (83:16): [True: 168, False: 2.23k]
  ------------------
   84|    168|                file = SimdImageFilePng;
   85|  2.40k|        }
   86|  2.40k|        if (size >= 2)
  ------------------
  |  Branch (86:13): [True: 2.40k, False: 0]
  ------------------
   87|  2.40k|        {
   88|  2.40k|            if (data[0] == 0xFF && data[1] == 0xD8)
  ------------------
  |  Branch (88:17): [True: 1.45k, False: 952]
  |  Branch (88:36): [True: 1.45k, False: 0]
  ------------------
   89|  1.45k|                file = SimdImageFileJpeg;
   90|  2.40k|        }
   91|  2.40k|        return
   92|  2.40k|            file != SimdImageFileUndefined && 
  ------------------
  |  Branch (92:13): [True: 2.39k, False: 8]
  ------------------
   93|  2.40k|                (format == SimdPixelFormatNone || format == SimdPixelFormatGray8 || 
  ------------------
  |  Branch (93:18): [True: 0, False: 2.39k]
  |  Branch (93:51): [True: 599, False: 1.79k]
  ------------------
   94|  2.39k|                format == SimdPixelFormatBgr24 || format == SimdPixelFormatBgra32 || 
  ------------------
  |  Branch (94:17): [True: 599, False: 1.19k]
  |  Branch (94:51): [True: 599, False: 599]
  ------------------
   95|  2.39k|                format == SimdPixelFormatRgb24 || format == SimdPixelFormatRgba32);
  ------------------
  |  Branch (95:17): [True: 599, False: 0]
  |  Branch (95:51): [True: 0, False: 0]
  ------------------
   96|  2.40k|    }
_ZN4Simd4Base14ImagePxmLoaderC2ERKNS_16ImageLoaderParamE:
  104|    776|        {
  105|    776|        }
_ZN4Simd4Base14ImagePxmLoader10ReadHeaderEm:
  108|    776|        {
  109|    776|            if (_stream.Size() < 3 ||
  ------------------
  |  Branch (109:17): [True: 0, False: 776]
  ------------------
  110|    776|                _stream.Data()[0] != 'P' ||
  ------------------
  |  Branch (110:17): [True: 0, False: 776]
  ------------------
  111|    776|                _stream.Data()[1] != '0' + version ||
  ------------------
  |  Branch (111:17): [True: 0, False: 776]
  ------------------
  112|    776|                _stream.Data()[2] != '\n')
  ------------------
  |  Branch (112:17): [True: 0, False: 776]
  ------------------
  113|      0|                return false;
  114|    776|            _stream.Seek(3);
  115|    776|            uint32_t width, height, max;
  116|    776|            if (!(_stream.ReadUnsigned(width) && _stream.ReadUnsigned(height) && _stream.ReadUnsigned(max)))
  ------------------
  |  Branch (116:19): [True: 756, False: 20]
  |  Branch (116:50): [True: 744, False: 12]
  |  Branch (116:82): [True: 736, False: 8]
  ------------------
  117|     40|                return false;
  118|    736|            if (!(width > 0 && height > 0 && max == 255))
  ------------------
  |  Branch (118:19): [True: 728, False: 8]
  |  Branch (118:32): [True: 728, False: 0]
  |  Branch (118:46): [True: 716, False: 12]
  ------------------
  119|     20|                return false;
  120|    716|            uint8_t byte;
  121|    716|            if (!(_stream.Read(byte) && byte == '\n'))
  ------------------
  |  Branch (121:19): [True: 716, False: 0]
  |  Branch (121:41): [True: 716, False: 0]
  ------------------
  122|      0|                return false;
  123|    716|            _image.Recreate(width, height, (Image::Format)_param.format);
  124|    716|            _block = height;
  125|    716|            if (_param.file == SimdImageFilePgmTxt || _param.file == SimdImageFilePgmBin)
  ------------------
  |  Branch (125:17): [True: 56, False: 660]
  |  Branch (125:55): [True: 292, False: 368]
  ------------------
  126|    348|            {
  127|    348|                _size = width * 1;
  128|    348|                if (_param.format != SimdPixelFormatGray8)
  ------------------
  |  Branch (128:21): [True: 261, False: 87]
  ------------------
  129|    261|                {
  130|    261|                    _block = Simd::RestrictRange<size_t>(Base::AlgCacheL1() / _size, 1, height);
  131|    261|                    _buffer.Resize(_block * _size);
  132|    261|                }
  133|    348|            }
  134|    368|            else if (_param.file == SimdImageFilePpmTxt || _param.file == SimdImageFilePpmBin)
  ------------------
  |  Branch (134:22): [True: 96, False: 272]
  |  Branch (134:60): [True: 272, False: 0]
  ------------------
  135|    368|            {
  136|    368|                _size = width * 3;
  137|    368|                if (_param.format != SimdPixelFormatRgb24)
  ------------------
  |  Branch (137:21): [True: 276, False: 92]
  ------------------
  138|    276|                {
  139|    276|                    _block = Simd::RestrictRange<size_t>(Base::AlgCacheL1() / _size, 1, height);
  140|    276|                    _buffer.Resize(_block * _size);
  141|    276|                }
  142|    368|            }
  143|      0|            else
  144|      0|                return false;
  145|    716|            SetConverters();
  146|    716|            return true;
  147|    716|        }
_ZN4Simd4Base17ImagePgmTxtLoaderC2ERKNS_16ImageLoaderParamE:
  153|     80|        {
  154|     80|            if (_param.format == SimdPixelFormatNone)
  ------------------
  |  Branch (154:17): [True: 0, False: 80]
  ------------------
  155|      0|                _param.format = SimdPixelFormatGray8;
  156|     80|        }
_ZN4Simd4Base17ImagePgmTxtLoader10FromStreamEv:
  159|     80|        {
  160|     80|            if (!ReadHeader(2))
  ------------------
  |  Branch (160:17): [True: 24, False: 56]
  ------------------
  161|     24|                return false;
  162|     56|            size_t grayStride = _param.format == SimdPixelFormatGray8 ? _image.stride : _size;
  ------------------
  |  Branch (162:33): [True: 14, False: 42]
  ------------------
  163|    264|            for (size_t row = 0; row < _image.height;)
  ------------------
  |  Branch (163:34): [True: 260, False: 4]
  ------------------
  164|    260|            {
  165|    260|                size_t block = Simd::Min(row + _block, _image.height) - row;
  166|    260|                uint8_t * gray = _param.format == SimdPixelFormatGray8 ? _image.Row<uint8_t>(row) : _buffer.data;
  ------------------
  |  Branch (166:34): [True: 14, False: 246]
  ------------------
  167|  2.58k|                for (size_t b = 0; b < block; ++b)
  ------------------
  |  Branch (167:36): [True: 2.37k, False: 208]
  ------------------
  168|  2.37k|                {
  169|  5.92M|                    for (size_t i = 0; i < _size; ++i)
  ------------------
  |  Branch (169:40): [True: 5.92M, False: 2.32k]
  ------------------
  170|  5.92M|                    {
  171|  5.92M|                        if (!_stream.ReadUnsigned(gray[i]))
  ------------------
  |  Branch (171:29): [True: 52, False: 5.92M]
  ------------------
  172|     52|                            return false;
  173|  5.92M|                    }
  174|  2.32k|                    gray += grayStride;
  175|  2.32k|                }
  176|    208|                if(_param.format == SimdPixelFormatBgr24 || _param.format == SimdPixelFormatRgb24)
  ------------------
  |  Branch (176:20): [True: 69, False: 139]
  |  Branch (176:61): [True: 69, False: 70]
  ------------------
  177|    138|                    _toAny(_buffer.data, _image.width, block, _size, _image.Row<uint8_t>(row), _image.stride);
  178|    208|                if (_param.format == SimdPixelFormatBgra32 || _param.format == SimdPixelFormatRgba32)
  ------------------
  |  Branch (178:21): [True: 69, False: 139]
  |  Branch (178:63): [True: 0, False: 139]
  ------------------
  179|     69|                    _toBgra(_buffer.data, _image.width, block, _size, _image.Row<uint8_t>(row), _image.stride, 0xFF);
  180|    208|                row += block;
  181|    208|            }
  182|      4|            return true;
  183|     56|        }
_ZN4Simd4Base17ImagePgmTxtLoader13SetConvertersEv:
  186|     56|        {
  187|     56|            switch (_param.format)
  188|     56|            {
  189|     14|            case SimdPixelFormatBgr24: _toAny = Base::GrayToBgr; break;
  ------------------
  |  Branch (189:13): [True: 14, False: 42]
  ------------------
  190|     14|            case SimdPixelFormatBgra32: _toBgra = Base::GrayToBgra; break;
  ------------------
  |  Branch (190:13): [True: 14, False: 42]
  ------------------
  191|     14|            case SimdPixelFormatRgb24: _toAny = Base::GrayToBgr; break;
  ------------------
  |  Branch (191:13): [True: 14, False: 42]
  ------------------
  192|      0|            case SimdPixelFormatRgba32: _toBgra = Base::GrayToBgra; break;
  ------------------
  |  Branch (192:13): [True: 0, False: 56]
  ------------------
  193|     14|            default: break;
  ------------------
  |  Branch (193:13): [True: 14, False: 42]
  ------------------
  194|     56|            }
  195|     56|        }
_ZN4Simd4Base17ImagePgmBinLoaderC2ERKNS_16ImageLoaderParamE:
  201|    292|        {
  202|    292|            if (_param.format == SimdPixelFormatNone)
  ------------------
  |  Branch (202:17): [True: 0, False: 292]
  ------------------
  203|      0|                _param.format = SimdPixelFormatGray8;
  204|    292|        }
_ZN4Simd4Base17ImagePgmBinLoader10FromStreamEv:
  207|    292|        {
  208|    292|            if (!ReadHeader(5))
  ------------------
  |  Branch (208:17): [True: 0, False: 292]
  ------------------
  209|      0|                return false;
  210|    292|            size_t grayStride = _param.format == SimdPixelFormatGray8 ? _image.stride : _size;
  ------------------
  |  Branch (210:33): [True: 73, False: 219]
  ------------------
  211|  2.24k|            for (size_t row = 0; row < _image.height;)
  ------------------
  |  Branch (211:34): [True: 2.13k, False: 108]
  ------------------
  212|  2.13k|            {
  213|  2.13k|                size_t block = Simd::Min(row + _block, _image.height) - row;
  214|  2.13k|                uint8_t* gray = _param.format == SimdPixelFormatGray8 ? _image.Row<uint8_t>(row) : _buffer.data;
  ------------------
  |  Branch (214:33): [True: 73, False: 2.06k]
  ------------------
  215|  4.57M|                for (size_t b = 0; b < block; ++b)
  ------------------
  |  Branch (215:36): [True: 4.57M, False: 1.95k]
  ------------------
  216|  4.57M|                {
  217|  4.57M|                    if (_stream.Read(_size, gray) != _size)
  ------------------
  |  Branch (217:25): [True: 184, False: 4.57M]
  ------------------
  218|    184|                        return false;
  219|  4.57M|                    gray += grayStride;
  220|  4.57M|                }
  221|  1.95k|                if (_param.format == SimdPixelFormatBgr24 || _param.format == SimdPixelFormatRgb24)
  ------------------
  |  Branch (221:21): [True: 641, False: 1.30k]
  |  Branch (221:62): [True: 641, False: 668]
  ------------------
  222|  1.28k|                    _toAny(_buffer.data, _image.width, block, _size, _image.Row<uint8_t>(row), _image.stride);
  223|  1.95k|                if (_param.format == SimdPixelFormatBgra32 || _param.format == SimdPixelFormatRgba32)
  ------------------
  |  Branch (223:21): [True: 641, False: 1.30k]
  |  Branch (223:63): [True: 0, False: 1.30k]
  ------------------
  224|    641|                    _toBgra(_buffer.data, _image.width, block, _size, _image.Row<uint8_t>(row), _image.stride, 0xFF);
  225|  1.95k|                row += block;
  226|  1.95k|            }
  227|    108|            return true;
  228|    292|        }
_ZN4Simd4Base17ImagePgmBinLoader13SetConvertersEv:
  231|    292|        {
  232|    292|            switch (_param.format)
  233|    292|            {
  234|     73|            case SimdPixelFormatBgr24: _toAny = Base::GrayToBgr; break;
  ------------------
  |  Branch (234:13): [True: 73, False: 219]
  ------------------
  235|     73|            case SimdPixelFormatBgra32: _toBgra = Base::GrayToBgra; break;
  ------------------
  |  Branch (235:13): [True: 73, False: 219]
  ------------------
  236|     73|            case SimdPixelFormatRgb24: _toAny = Base::GrayToBgr; break;
  ------------------
  |  Branch (236:13): [True: 73, False: 219]
  ------------------
  237|      0|            case SimdPixelFormatRgba32: _toBgra = Base::GrayToBgra; break;
  ------------------
  |  Branch (237:13): [True: 0, False: 292]
  ------------------
  238|     73|            default: break;
  ------------------
  |  Branch (238:13): [True: 73, False: 219]
  ------------------
  239|    292|            }
  240|    292|        }
_ZN4Simd4Base17ImagePpmTxtLoaderC2ERKNS_16ImageLoaderParamE:
  246|    120|        {
  247|    120|            if (_param.format == SimdPixelFormatNone)
  ------------------
  |  Branch (247:17): [True: 0, False: 120]
  ------------------
  248|      0|                _param.format = SimdPixelFormatRgb24;
  249|    120|        }
_ZN4Simd4Base17ImagePpmTxtLoader10FromStreamEv:
  252|    120|        {
  253|    120|            if (!ReadHeader(3))
  ------------------
  |  Branch (253:17): [True: 24, False: 96]
  ------------------
  254|     24|                return false;
  255|     96|            size_t rgbStride = _param.format == SimdPixelFormatRgb24 ? _image.stride : _size;
  ------------------
  |  Branch (255:32): [True: 24, False: 72]
  ------------------
  256|    370|            for (size_t row = 0; row < _image.height;)
  ------------------
  |  Branch (256:34): [True: 366, False: 4]
  ------------------
  257|    366|            {
  258|    366|                size_t block = Simd::Min(row + _block, _image.height) - row;
  259|    366|                uint8_t* rgb = _param.format == SimdPixelFormatRgb24 ? _image.Row<uint8_t>(row) : _buffer.data;
  ------------------
  |  Branch (259:32): [True: 24, False: 342]
  ------------------
  260|  4.11k|                for (size_t b = 0; b < block; ++b)
  ------------------
  |  Branch (260:36): [True: 3.84k, False: 274]
  ------------------
  261|  3.84k|                {
  262|  9.23M|                    for (size_t i = 0; i < _size; ++i)
  ------------------
  |  Branch (262:40): [True: 9.23M, False: 3.74k]
  ------------------
  263|  9.23M|                    {
  264|  9.23M|                        if (!_stream.ReadUnsigned(rgb[i]))
  ------------------
  |  Branch (264:29): [True: 92, False: 9.23M]
  ------------------
  265|     92|                            return false;
  266|  9.23M|                    }
  267|  3.74k|                    rgb += rgbStride;
  268|  3.74k|                }
  269|    274|                if (_param.format == SimdPixelFormatGray8 || _param.format == SimdPixelFormatBgr24)
  ------------------
  |  Branch (269:21): [True: 91, False: 183]
  |  Branch (269:62): [True: 91, False: 92]
  ------------------
  270|    182|                    _toAny(_buffer.data, _image.width, block, _size, _image.Row<uint8_t>(row), _image.stride);
  271|    274|                if (_param.format == SimdPixelFormatBgra32 || _param.format == SimdPixelFormatRgba32)
  ------------------
  |  Branch (271:21): [True: 91, False: 183]
  |  Branch (271:63): [True: 0, False: 183]
  ------------------
  272|     91|                    _toBgra(_buffer.data, _image.width, block, _size, _image.Row<uint8_t>(row), _image.stride, 0xFF);
  273|    274|                row += block;
  274|    274|            }
  275|      4|            return true;
  276|     96|        }
_ZN4Simd4Base17ImagePpmTxtLoader13SetConvertersEv:
  279|     96|        {
  280|     96|            switch (_param.format)
  281|     96|            {
  282|     24|            case SimdPixelFormatGray8: _toAny = Base::RgbToGray; break;
  ------------------
  |  Branch (282:13): [True: 24, False: 72]
  ------------------
  283|     24|            case SimdPixelFormatBgr24: _toAny = Base::BgrToRgb; break;
  ------------------
  |  Branch (283:13): [True: 24, False: 72]
  ------------------
  284|     24|            case SimdPixelFormatBgra32: _toBgra = Base::RgbToBgra; break;
  ------------------
  |  Branch (284:13): [True: 24, False: 72]
  ------------------
  285|      0|            case SimdPixelFormatRgba32: _toBgra = Base::BgrToBgra; break;
  ------------------
  |  Branch (285:13): [True: 0, False: 96]
  ------------------
  286|     24|            default: break;
  ------------------
  |  Branch (286:13): [True: 24, False: 72]
  ------------------
  287|     96|            }
  288|     96|        }
_ZN4Simd4Base17ImagePpmBinLoaderC2ERKNS_16ImageLoaderParamE:
  294|    284|        {
  295|    284|            if (_param.format == SimdPixelFormatNone)
  ------------------
  |  Branch (295:17): [True: 0, False: 284]
  ------------------
  296|      0|                _param.format = SimdPixelFormatRgb24;
  297|    284|        }
_ZN4Simd4Base17ImagePpmBinLoader10FromStreamEv:
  300|    284|        {
  301|    284|            if (!ReadHeader(6))
  ------------------
  |  Branch (301:17): [True: 12, False: 272]
  ------------------
  302|     12|                return false;
  303|    272|            size_t rgbStride = _param.format == SimdPixelFormatRgb24 ? _image.stride : _size;
  ------------------
  |  Branch (303:32): [True: 68, False: 204]
  ------------------
  304|  1.94k|            for (size_t row = 0; row < _image.height;)
  ------------------
  |  Branch (304:34): [True: 1.88k, False: 60]
  ------------------
  305|  1.88k|            {
  306|  1.88k|                size_t block = Simd::Min(row + _block, _image.height) - row;
  307|  1.88k|                uint8_t* rgb = _param.format == SimdPixelFormatRgb24 ? _image.Row<uint8_t>(row) : _buffer.data;
  ------------------
  |  Branch (307:32): [True: 68, False: 1.81k]
  ------------------
  308|  3.42M|                for (size_t b = 0; b < block; ++b)
  ------------------
  |  Branch (308:36): [True: 3.42M, False: 1.66k]
  ------------------
  309|  3.42M|                {
  310|  3.42M|                    if (_stream.Read(_size, rgb) != _size)
  ------------------
  |  Branch (310:25): [True: 212, False: 3.42M]
  ------------------
  311|    212|                        return false;
  312|  3.42M|                    rgb += rgbStride;
  313|  3.42M|                }
  314|  1.66k|                if (_param.format == SimdPixelFormatGray8 || _param.format == SimdPixelFormatBgr24)
  ------------------
  |  Branch (314:21): [True: 551, False: 1.11k]
  |  Branch (314:62): [True: 551, False: 566]
  ------------------
  315|  1.10k|                    _toAny(_buffer.data, _image.width, block, _size, _image.Row<uint8_t>(row), _image.stride);
  316|  1.66k|                if (_param.format == SimdPixelFormatBgra32 || _param.format == SimdPixelFormatRgba32)
  ------------------
  |  Branch (316:21): [True: 551, False: 1.11k]
  |  Branch (316:63): [True: 0, False: 1.11k]
  ------------------
  317|    551|                    _toBgra(_buffer.data, _image.width, block, _size, _image.Row<uint8_t>(row), _image.stride, 0xFF);
  318|  1.66k|                row += block;
  319|  1.66k|            }
  320|     60|            return true;
  321|    272|        }
_ZN4Simd4Base17ImagePpmBinLoader13SetConvertersEv:
  324|    272|        {
  325|    272|            switch (_param.format)
  326|    272|            {
  327|     68|            case SimdPixelFormatGray8: _toAny = Base::RgbToGray; break;
  ------------------
  |  Branch (327:13): [True: 68, False: 204]
  ------------------
  328|     68|            case SimdPixelFormatBgr24: _toAny = Base::BgrToRgb; break;
  ------------------
  |  Branch (328:13): [True: 68, False: 204]
  ------------------
  329|     68|            case SimdPixelFormatBgra32: _toBgra = Base::RgbToBgra; break;
  ------------------
  |  Branch (329:13): [True: 68, False: 204]
  ------------------
  330|      0|            case SimdPixelFormatRgba32: _toBgra = Base::BgrToBgra; break;
  ------------------
  |  Branch (330:13): [True: 0, False: 272]
  ------------------
  331|     68|            default: break;
  ------------------
  |  Branch (331:13): [True: 68, False: 204]
  ------------------
  332|    272|            }
  333|    272|        }

_ZN4Simd4Base11JpegHuffman5BuildEPKi:
   73|   106k|        {
   74|   106k|            int i, j, k = 0;
   75|  1.81M|            for (i = 0; i < 16; ++i)
  ------------------
  |  Branch (75:25): [True: 1.71M, False: 106k]
  ------------------
   76|  5.48M|                for (j = 0; j < count[i]; ++j)
  ------------------
  |  Branch (76:29): [True: 3.76M, False: 1.71M]
  ------------------
   77|  3.76M|                    size[k++] = (uint8_t)(i + 1);
   78|   106k|            size[k] = 0;
   79|   106k|            unsigned int c = 0;
   80|  1.81M|            for (j = 1, k = 0; j <= 16; ++j) 
  ------------------
  |  Branch (80:32): [True: 1.71M, False: 106k]
  ------------------
   81|  1.71M|            {
   82|  1.71M|                delta[j] = k - c;
   83|  1.71M|                if (size[k] == j) 
  ------------------
  |  Branch (83:21): [True: 940k, False: 769k]
  ------------------
   84|   940k|                {
   85|  4.67M|                    while (size[k] == j)
  ------------------
  |  Branch (85:28): [True: 3.73M, False: 940k]
  ------------------
   86|  3.73M|                        code[k++] = (uint16_t)(c++);
   87|   940k|                    if (c - 1 >= (1u << j)) 
  ------------------
  |  Branch (87:25): [True: 94, False: 940k]
  ------------------
   88|     94|                        return JpegLoadError("bad code lengths", "Corrupt JPEG");
   89|   940k|                }
   90|  1.71M|                maxcode[j] = c << (16 - j);
   91|  1.71M|                c <<= 1;
   92|  1.71M|            }
   93|   106k|            maxcode[j] = 0xffffffff;
   94|   106k|            memset(fast, 255, 1 << JpegFastBits);
   95|  3.82M|            for (i = 0; i < k; ++i) 
  ------------------
  |  Branch (95:25): [True: 3.72M, False: 106k]
  ------------------
   96|  3.72M|            {
   97|  3.72M|                int s = size[i];
   98|  3.72M|                if (s <= JpegFastBits) 
  ------------------
  |  Branch (98:21): [True: 1.76M, False: 1.95M]
  ------------------
   99|  1.76M|                {
  100|  1.76M|                    int c = code[i] << (JpegFastBits - s);
  101|  1.76M|                    int m = 1 << (JpegFastBits - s);
  102|  55.8M|                    for (j = 0; j < m; ++j)
  ------------------
  |  Branch (102:33): [True: 54.0M, False: 1.76M]
  ------------------
  103|  54.0M|                        fast[c + j] = (uint8_t)i;
  104|  1.76M|                }
  105|  3.72M|            }
  106|   106k|            return 1;
  107|   106k|        }
_ZN4Simd4Base11JpegHuffman11BuildFastAcEv:
  110|  95.6k|        {
  111|  49.0M|            for (int i = 0; i < (1 << JpegFastBits); ++i)
  ------------------
  |  Branch (111:29): [True: 48.9M, False: 95.6k]
  ------------------
  112|  48.9M|            {
  113|  48.9M|                uint8_t f = fast[i];
  114|  48.9M|                fast_ac[i] = 0;
  115|  48.9M|                if (f < 255)
  ------------------
  |  Branch (115:21): [True: 48.3M, False: 580k]
  ------------------
  116|  48.3M|                {
  117|  48.3M|                    int rs = values[f];
  118|  48.3M|                    int run = (rs >> 4) & 15;
  119|  48.3M|                    int magbits = rs & 15;
  120|  48.3M|                    int len = size[f];
  121|  48.3M|                    if (magbits && len + magbits <= JpegFastBits)
  ------------------
  |  Branch (121:25): [True: 26.5M, False: 21.8M]
  |  Branch (121:36): [True: 24.7M, False: 1.78M]
  ------------------
  122|  24.7M|                    {
  123|  24.7M|                        int k = ((i << len) & ((1 << JpegFastBits) - 1)) >> (JpegFastBits - magbits);
  124|  24.7M|                        int m = 1 << (magbits - 1);
  125|  24.7M|                        if (k < m)
  ------------------
  |  Branch (125:29): [True: 12.3M, False: 12.3M]
  ------------------
  126|  12.3M|                            k += (~0U << magbits) + 1;
  127|  24.7M|                        if (k >= -128 && k <= 127)
  ------------------
  |  Branch (127:29): [True: 24.7M, False: 0]
  |  Branch (127:42): [True: 24.7M, False: 0]
  ------------------
  128|  24.7M|                            fast_ac[i] = (int16_t)((k * 256) + (run * 16) + (len + magbits));
  129|  24.7M|                    }
  130|  48.3M|                }
  131|  48.9M|            }
  132|  95.6k|        }
_ZN4Simd4Base15ImageJpegLoaderC2ERKNS_16ImageLoaderParamE:
 1443|  1.45k|        {
 1444|  1.45k|            if (_param.format == SimdPixelFormatNone)
  ------------------
  |  Branch (1444:17): [True: 0, False: 1.45k]
  ------------------
 1445|      0|                _param.format = SimdPixelFormatRgb24;
 1446|  1.45k|        }
_ZN4Simd4Base15ImageJpegLoader10FromStreamEv:
 1449|  1.45k|        {
 1450|  1.45k|            int x, y, comp;
 1451|  1.45k|            JpegContext j;
 1452|  1.45k|            j.stream = &_stream;
 1453|  1.45k|            jpeg__setup_jpeg(&j);
 1454|  1.45k|            if (load_jpeg_image(&j, &x, &y, &comp, 4))
  ------------------
  |  Branch (1454:17): [True: 160, False: 1.29k]
  ------------------
 1455|    160|            {
 1456|    160|                size_t stride = 4 * x;
 1457|    160|                _image.Recreate(x, y, (Image::Format)_param.format);
 1458|    160|                switch (_param.format)
 1459|    160|                {
 1460|     40|                case SimdPixelFormatGray8:
  ------------------
  |  Branch (1460:17): [True: 40, False: 120]
  ------------------
 1461|     40|                    Base::RgbaToGray(j.out.data, x, y, stride, _image.data, _image.stride);
 1462|     40|                    break;
 1463|     40|                case SimdPixelFormatBgr24:
  ------------------
  |  Branch (1463:17): [True: 40, False: 120]
  ------------------
 1464|     40|                    Base::BgraToRgb(j.out.data, x, y, stride, _image.data, _image.stride);
 1465|     40|                    break;
 1466|     40|                case SimdPixelFormatBgra32:
  ------------------
  |  Branch (1466:17): [True: 40, False: 120]
  ------------------
 1467|     40|                    Base::BgraToRgba(j.out.data, x, y, stride, _image.data, _image.stride);
 1468|     40|                    break;
 1469|     40|                case SimdPixelFormatRgb24:
  ------------------
  |  Branch (1469:17): [True: 40, False: 120]
  ------------------
 1470|     40|                    Base::BgraToBgr(j.out.data, x, y, stride, _image.data, _image.stride);
 1471|     40|                    break;
 1472|      0|                case SimdPixelFormatRgba32:
  ------------------
  |  Branch (1472:17): [True: 0, False: 160]
  ------------------
 1473|      0|                    Base::Copy(j.out.data, stride, x, y, 4, _image.data, _image.stride);
 1474|      0|                    break;
 1475|      0|                default: 
  ------------------
  |  Branch (1475:17): [True: 0, False: 160]
  ------------------
 1476|      0|                    break;
 1477|    160|                }
 1478|    160|                return true;
 1479|    160|            }
 1480|  1.29k|            return false;
 1481|  1.45k|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL16jpeg__setup_jpegEPNS0_11JpegContextE:
 1246|  1.45k|        {
 1247|  1.45k|            j->idct_block_kernel = jpeg__idct_block;
 1248|  1.45k|            j->YCbCr_to_RGB_kernel = jpeg__YCbCr_to_RGB_row;
 1249|  1.45k|            j->resample_row_hv_2_kernel = jpeg__resample_row_hv_2;
 1250|  1.45k|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL16jpeg__idct_blockEPhiPs:
  569|  36.0M|        {
  570|  36.0M|            int i, val[64], * v = val;
  571|  36.0M|            uint8_t* o;
  572|  36.0M|            short* d = data;
  573|       |
  574|       |            // columns
  575|   324M|            for (i = 0; i < 8; ++i, ++d, ++v) {
  ------------------
  |  Branch (575:25): [True: 288M, False: 36.0M]
  ------------------
  576|       |                // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
  577|   288M|                if (d[8] == 0 && d[16] == 0 && d[24] == 0 && d[32] == 0
  ------------------
  |  Branch (577:21): [True: 100M, False: 188M]
  |  Branch (577:34): [True: 98.8M, False: 1.41M]
  |  Branch (577:48): [True: 98.3M, False: 518k]
  |  Branch (577:62): [True: 98.0M, False: 260k]
  ------------------
  578|   288M|                    && d[40] == 0 && d[48] == 0 && d[56] == 0) {
  ------------------
  |  Branch (578:24): [True: 97.9M, False: 77.1k]
  |  Branch (578:38): [True: 97.9M, False: 60.1k]
  |  Branch (578:52): [True: 97.8M, False: 32.7k]
  ------------------
  579|       |                    //    no shortcut                 0     seconds
  580|       |                    //    (1|2|3|4|5|6|7)==0          0     seconds
  581|       |                    //    all separate               -0.047 seconds
  582|       |                    //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
  583|  97.8M|                    int dcterm = d[0] * 4;
  584|  97.8M|                    v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
  585|  97.8M|                }
  586|   190M|                else {
  587|   190M|                    JPEG__IDCT_1D(d[0], d[8], d[16], d[24], d[32], d[40], d[48], d[56])
  ------------------
  |  |  532|   190M|   int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
  |  |  533|   190M|   p2 = s2;                                    \
  |  |  534|   190M|   p3 = s6;                                    \
  |  |  535|   190M|   p1 = (p2+p3) * jpeg__f2f(0.5411961f);       \
  |  |  ------------------
  |  |  |  |  527|   190M|#define jpeg__f2f(x)  ((int) (((x) * 4096 + 0.5)))
  |  |  ------------------
  |  |  536|   190M|   t2 = p1 + p3*jpeg__f2f(-1.847759065f);      \
  |  |  ------------------
  |  |  |  |  527|   190M|#define jpeg__f2f(x)  ((int) (((x) * 4096 + 0.5)))
  |  |  ------------------
  |  |  537|   190M|   t3 = p1 + p2*jpeg__f2f( 0.765366865f);      \
  |  |  ------------------
  |  |  |  |  527|   190M|#define jpeg__f2f(x)  ((int) (((x) * 4096 + 0.5)))
  |  |  ------------------
  |  |  538|   190M|   p2 = s0;                                    \
  |  |  539|   190M|   p3 = s4;                                    \
  |  |  540|   190M|   t0 = jpeg__fsh(p2+p3);                      \
  |  |  ------------------
  |  |  |  |  528|   190M|#define jpeg__fsh(x)  ((x) * 4096)
  |  |  ------------------
  |  |  541|   190M|   t1 = jpeg__fsh(p2-p3);                      \
  |  |  ------------------
  |  |  |  |  528|   190M|#define jpeg__fsh(x)  ((x) * 4096)
  |  |  ------------------
  |  |  542|   190M|   x0 = t0+t3;                                 \
  |  |  543|   190M|   x3 = t0-t3;                                 \
  |  |  544|   190M|   x1 = t1+t2;                                 \
  |  |  545|   190M|   x2 = t1-t2;                                 \
  |  |  546|   190M|   t0 = s7;                                    \
  |  |  547|   190M|   t1 = s5;                                    \
  |  |  548|   190M|   t2 = s3;                                    \
  |  |  549|   190M|   t3 = s1;                                    \
  |  |  550|   190M|   p3 = t0+t2;                                 \
  |  |  551|   190M|   p4 = t1+t3;                                 \
  |  |  552|   190M|   p1 = t0+t3;                                 \
  |  |  553|   190M|   p2 = t1+t2;                                 \
  |  |  554|   190M|   p5 = (p3+p4)*jpeg__f2f( 1.175875602f);      \
  |  |  ------------------
  |  |  |  |  527|   190M|#define jpeg__f2f(x)  ((int) (((x) * 4096 + 0.5)))
  |  |  ------------------
  |  |  555|   190M|   t0 = t0*jpeg__f2f( 0.298631336f);           \
  |  |  ------------------
  |  |  |  |  527|   190M|#define jpeg__f2f(x)  ((int) (((x) * 4096 + 0.5)))
  |  |  ------------------
  |  |  556|   190M|   t1 = t1*jpeg__f2f( 2.053119869f);           \
  |  |  ------------------
  |  |  |  |  527|   190M|#define jpeg__f2f(x)  ((int) (((x) * 4096 + 0.5)))
  |  |  ------------------
  |  |  557|   190M|   t2 = t2*jpeg__f2f( 3.072711026f);           \
  |  |  ------------------
  |  |  |  |  527|   190M|#define jpeg__f2f(x)  ((int) (((x) * 4096 + 0.5)))
  |  |  ------------------
  |  |  558|   190M|   t3 = t3*jpeg__f2f( 1.501321110f);           \
  |  |  ------------------
  |  |  |  |  527|   190M|#define jpeg__f2f(x)  ((int) (((x) * 4096 + 0.5)))
  |  |  ------------------
  |  |  559|   190M|   p1 = p5 + p1*jpeg__f2f(-0.899976223f);      \
  |  |  ------------------
  |  |  |  |  527|   190M|#define jpeg__f2f(x)  ((int) (((x) * 4096 + 0.5)))
  |  |  ------------------
  |  |  560|   190M|   p2 = p5 + p2*jpeg__f2f(-2.562915447f);      \
  |  |  ------------------
  |  |  |  |  527|   190M|#define jpeg__f2f(x)  ((int) (((x) * 4096 + 0.5)))
  |  |  ------------------
  |  |  561|   190M|   p3 = p3*jpeg__f2f(-1.961570560f);           \
  |  |  ------------------
  |  |  |  |  527|   190M|#define jpeg__f2f(x)  ((int) (((x) * 4096 + 0.5)))
  |  |  ------------------
  |  |  562|   190M|   p4 = p4*jpeg__f2f(-0.390180644f);           \
  |  |  ------------------
  |  |  |  |  527|   190M|#define jpeg__f2f(x)  ((int) (((x) * 4096 + 0.5)))
  |  |  ------------------
  |  |  563|   190M|   t3 += p1+p4;                                \
  |  |  564|   190M|   t2 += p2+p3;                                \
  |  |  565|   190M|   t1 += p2+p4;                                \
  |  |  566|   190M|   t0 += p1+p3;
  ------------------
  588|       |                        // constants scaled things up by 1<<12; let's bring them back
  589|       |                        // down, but keep 2 extra bits of precision
  590|   190M|                        x0 += 512; x1 += 512; x2 += 512; x3 += 512;
  591|   190M|                    v[0] = (x0 + t3) >> 10;
  592|   190M|                    v[56] = (x0 - t3) >> 10;
  593|   190M|                    v[8] = (x1 + t2) >> 10;
  594|   190M|                    v[48] = (x1 - t2) >> 10;
  595|   190M|                    v[16] = (x2 + t1) >> 10;
  596|   190M|                    v[40] = (x2 - t1) >> 10;
  597|   190M|                    v[24] = (x3 + t0) >> 10;
  598|   190M|                    v[32] = (x3 - t0) >> 10;
  599|   190M|                }
  600|   288M|            }
  601|       |
  602|   324M|            for (i = 0, v = val, o = out; i < 8; ++i, v += 8, o += out_stride) {
  ------------------
  |  Branch (602:43): [True: 288M, False: 36.0M]
  ------------------
  603|       |                // no fast case since the first 1D IDCT spread components out
  604|   288M|                JPEG__IDCT_1D(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7])
  ------------------
  |  |  532|   288M|   int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
  |  |  533|   288M|   p2 = s2;                                    \
  |  |  534|   288M|   p3 = s6;                                    \
  |  |  535|   288M|   p1 = (p2+p3) * jpeg__f2f(0.5411961f);       \
  |  |  ------------------
  |  |  |  |  527|   288M|#define jpeg__f2f(x)  ((int) (((x) * 4096 + 0.5)))
  |  |  ------------------
  |  |  536|   288M|   t2 = p1 + p3*jpeg__f2f(-1.847759065f);      \
  |  |  ------------------
  |  |  |  |  527|   288M|#define jpeg__f2f(x)  ((int) (((x) * 4096 + 0.5)))
  |  |  ------------------
  |  |  537|   288M|   t3 = p1 + p2*jpeg__f2f( 0.765366865f);      \
  |  |  ------------------
  |  |  |  |  527|   288M|#define jpeg__f2f(x)  ((int) (((x) * 4096 + 0.5)))
  |  |  ------------------
  |  |  538|   288M|   p2 = s0;                                    \
  |  |  539|   288M|   p3 = s4;                                    \
  |  |  540|   288M|   t0 = jpeg__fsh(p2+p3);                      \
  |  |  ------------------
  |  |  |  |  528|   288M|#define jpeg__fsh(x)  ((x) * 4096)
  |  |  ------------------
  |  |  541|   288M|   t1 = jpeg__fsh(p2-p3);                      \
  |  |  ------------------
  |  |  |  |  528|   288M|#define jpeg__fsh(x)  ((x) * 4096)
  |  |  ------------------
  |  |  542|   288M|   x0 = t0+t3;                                 \
  |  |  543|   288M|   x3 = t0-t3;                                 \
  |  |  544|   288M|   x1 = t1+t2;                                 \
  |  |  545|   288M|   x2 = t1-t2;                                 \
  |  |  546|   288M|   t0 = s7;                                    \
  |  |  547|   288M|   t1 = s5;                                    \
  |  |  548|   288M|   t2 = s3;                                    \
  |  |  549|   288M|   t3 = s1;                                    \
  |  |  550|   288M|   p3 = t0+t2;                                 \
  |  |  551|   288M|   p4 = t1+t3;                                 \
  |  |  552|   288M|   p1 = t0+t3;                                 \
  |  |  553|   288M|   p2 = t1+t2;                                 \
  |  |  554|   288M|   p5 = (p3+p4)*jpeg__f2f( 1.175875602f);      \
  |  |  ------------------
  |  |  |  |  527|   288M|#define jpeg__f2f(x)  ((int) (((x) * 4096 + 0.5)))
  |  |  ------------------
  |  |  555|   288M|   t0 = t0*jpeg__f2f( 0.298631336f);           \
  |  |  ------------------
  |  |  |  |  527|   288M|#define jpeg__f2f(x)  ((int) (((x) * 4096 + 0.5)))
  |  |  ------------------
  |  |  556|   288M|   t1 = t1*jpeg__f2f( 2.053119869f);           \
  |  |  ------------------
  |  |  |  |  527|   288M|#define jpeg__f2f(x)  ((int) (((x) * 4096 + 0.5)))
  |  |  ------------------
  |  |  557|   288M|   t2 = t2*jpeg__f2f( 3.072711026f);           \
  |  |  ------------------
  |  |  |  |  527|   288M|#define jpeg__f2f(x)  ((int) (((x) * 4096 + 0.5)))
  |  |  ------------------
  |  |  558|   288M|   t3 = t3*jpeg__f2f( 1.501321110f);           \
  |  |  ------------------
  |  |  |  |  527|   288M|#define jpeg__f2f(x)  ((int) (((x) * 4096 + 0.5)))
  |  |  ------------------
  |  |  559|   288M|   p1 = p5 + p1*jpeg__f2f(-0.899976223f);      \
  |  |  ------------------
  |  |  |  |  527|   288M|#define jpeg__f2f(x)  ((int) (((x) * 4096 + 0.5)))
  |  |  ------------------
  |  |  560|   288M|   p2 = p5 + p2*jpeg__f2f(-2.562915447f);      \
  |  |  ------------------
  |  |  |  |  527|   288M|#define jpeg__f2f(x)  ((int) (((x) * 4096 + 0.5)))
  |  |  ------------------
  |  |  561|   288M|   p3 = p3*jpeg__f2f(-1.961570560f);           \
  |  |  ------------------
  |  |  |  |  527|   288M|#define jpeg__f2f(x)  ((int) (((x) * 4096 + 0.5)))
  |  |  ------------------
  |  |  562|   288M|   p4 = p4*jpeg__f2f(-0.390180644f);           \
  |  |  ------------------
  |  |  |  |  527|   288M|#define jpeg__f2f(x)  ((int) (((x) * 4096 + 0.5)))
  |  |  ------------------
  |  |  563|   288M|   t3 += p1+p4;                                \
  |  |  564|   288M|   t2 += p2+p3;                                \
  |  |  565|   288M|   t1 += p2+p4;                                \
  |  |  566|   288M|   t0 += p1+p3;
  ------------------
  605|       |                    // constants scaled things up by 1<<12, plus we had 1<<2 from first
  606|       |                    // loop, plus horizontal and vertical each scale by sqrt(8) so together
  607|       |                    // we've got an extra 1<<3, so 1<<17 total we need to remove.
  608|       |                    // so we want to round that, which means adding 0.5 * 1<<17,
  609|       |                    // aka 65536. Also, we'll end up with -128 to 127 that we want
  610|       |                    // to encode as 0..255 by adding 128, so we'll add that before the shift
  611|   288M|                    x0 += 65536 + (128 << 17);
  612|   288M|                x1 += 65536 + (128 << 17);
  613|   288M|                x2 += 65536 + (128 << 17);
  614|   288M|                x3 += 65536 + (128 << 17);
  615|       |                // tried computing the shifts into temps, or'ing the temps to see
  616|       |                // if any were out of range, but that was slower
  617|   288M|                o[0] = jpeg__clamp((x0 + t3) >> 17);
  618|   288M|                o[7] = jpeg__clamp((x0 - t3) >> 17);
  619|   288M|                o[1] = jpeg__clamp((x1 + t2) >> 17);
  620|   288M|                o[6] = jpeg__clamp((x1 - t2) >> 17);
  621|   288M|                o[2] = jpeg__clamp((x2 + t1) >> 17);
  622|   288M|                o[5] = jpeg__clamp((x2 - t1) >> 17);
  623|   288M|                o[3] = jpeg__clamp((x3 + t0) >> 17);
  624|   288M|                o[4] = jpeg__clamp((x3 - t0) >> 17);
  625|   288M|            }
  626|  36.0M|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL11jpeg__clampEi:
  518|  2.30G|        {
  519|       |            // trick to use a single test to catch both cases
  520|  2.30G|            if ((unsigned int)x > 255) {
  ------------------
  |  Branch (520:17): [True: 1.47G, False: 830M]
  ------------------
  521|  1.47G|                if (x < 0) return 0;
  ------------------
  |  Branch (521:21): [True: 813M, False: 664M]
  ------------------
  522|   664M|                if (x > 255) return 255;
  ------------------
  |  Branch (522:21): [True: 664M, False: 0]
  ------------------
  523|   664M|            }
  524|   830M|            return (uint8_t)x;
  525|  2.30G|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL22jpeg__YCbCr_to_RGB_rowEPhPKhS3_S3_ii:
 1220|  5.50k|        {
 1221|  5.50k|            int i;
 1222|  6.28M|            for (i = 0; i < count; ++i) {
  ------------------
  |  Branch (1222:25): [True: 6.27M, False: 5.50k]
  ------------------
 1223|  6.27M|                int y_fixed = (y[i] << 20) + (1 << 19); // rounding
 1224|  6.27M|                int r, g, b;
 1225|  6.27M|                int cr = pcr[i] - 128;
 1226|  6.27M|                int cb = pcb[i] - 128;
 1227|  6.27M|                r = y_fixed + cr * jpeg__float2fixed(1.40200f);
  ------------------
  |  | 1218|  6.27M|#define jpeg__float2fixed(x)  (((int) ((x) * 4096.0f + 0.5f)) << 8)
  ------------------
 1228|  6.27M|                g = y_fixed + (cr * -jpeg__float2fixed(0.71414f)) + ((cb * -jpeg__float2fixed(0.34414f)) & 0xffff0000);
  ------------------
  |  | 1218|  6.27M|#define jpeg__float2fixed(x)  (((int) ((x) * 4096.0f + 0.5f)) << 8)
  ------------------
                              g = y_fixed + (cr * -jpeg__float2fixed(0.71414f)) + ((cb * -jpeg__float2fixed(0.34414f)) & 0xffff0000);
  ------------------
  |  | 1218|  6.27M|#define jpeg__float2fixed(x)  (((int) ((x) * 4096.0f + 0.5f)) << 8)
  ------------------
 1229|  6.27M|                b = y_fixed + cb * jpeg__float2fixed(1.77200f);
  ------------------
  |  | 1218|  6.27M|#define jpeg__float2fixed(x)  (((int) ((x) * 4096.0f + 0.5f)) << 8)
  ------------------
 1230|  6.27M|                r >>= 20;
 1231|  6.27M|                g >>= 20;
 1232|  6.27M|                b >>= 20;
 1233|  6.27M|                if ((unsigned)r > 255) { if (r < 0) r = 0; else r = 255; }
  ------------------
  |  Branch (1233:21): [True: 1.49M, False: 4.78M]
  |  Branch (1233:46): [True: 753k, False: 741k]
  ------------------
 1234|  6.27M|                if ((unsigned)g > 255) { if (g < 0) g = 0; else g = 255; }
  ------------------
  |  Branch (1234:21): [True: 1.28M, False: 4.98M]
  |  Branch (1234:46): [True: 640k, False: 649k]
  ------------------
 1235|  6.27M|                if ((unsigned)b > 255) { if (b < 0) b = 0; else b = 255; }
  ------------------
  |  Branch (1235:21): [True: 4.11M, False: 2.16M]
  |  Branch (1235:46): [True: 2.10M, False: 2.00M]
  ------------------
 1236|  6.27M|                out[0] = (uint8_t)r;
 1237|  6.27M|                out[1] = (uint8_t)g;
 1238|  6.27M|                out[2] = (uint8_t)b;
 1239|  6.27M|                out[3] = 255;
 1240|  6.27M|                out += step;
 1241|  6.27M|            }
 1242|  5.50k|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL23jpeg__resample_row_hv_2EPhS1_S1_ii:
 1182|  15.7k|        {
 1183|       |            // need to generate 2x2 samples for every one in input
 1184|  15.7k|            int i, t0, t1;
 1185|  15.7k|            if (w == 1) {
  ------------------
  |  Branch (1185:17): [True: 0, False: 15.7k]
  ------------------
 1186|      0|                out[0] = out[1] = jpeg__div4(3 * in_near[0] + in_far[0] + 2);
  ------------------
  |  | 1130|      0|#define jpeg__div4(x) ((uint8_t) ((x) >> 2))
  ------------------
 1187|      0|                return out;
 1188|      0|            }
 1189|       |
 1190|  15.7k|            t1 = 3 * in_near[0] + in_far[0];
 1191|  15.7k|            out[0] = jpeg__div4(t1 + 2);
  ------------------
  |  | 1130|  15.7k|#define jpeg__div4(x) ((uint8_t) ((x) >> 2))
  ------------------
 1192|  4.08M|            for (i = 1; i < w; ++i) {
  ------------------
  |  Branch (1192:25): [True: 4.06M, False: 15.7k]
  ------------------
 1193|  4.06M|                t0 = t1;
 1194|  4.06M|                t1 = 3 * in_near[i] + in_far[i];
 1195|  4.06M|                out[i * 2 - 1] = jpeg__div16(3 * t0 + t1 + 8);
  ------------------
  |  | 1179|  4.06M|#define jpeg__div16(x) ((uint8_t) ((x) >> 4))
  ------------------
 1196|  4.06M|                out[i * 2] = jpeg__div16(3 * t1 + t0 + 8);
  ------------------
  |  | 1179|  4.06M|#define jpeg__div16(x) ((uint8_t) ((x) >> 4))
  ------------------
 1197|  4.06M|            }
 1198|  15.7k|            out[w * 2 - 1] = jpeg__div4(t1 + 2);
  ------------------
  |  | 1130|  15.7k|#define jpeg__div4(x) ((uint8_t) ((x) >> 2))
  ------------------
 1199|       |
 1200|  15.7k|            JPEG_NOTUSED(hs);
  ------------------
  |  |   56|  15.7k|#define JPEG_NOTUSED(v)  (void)sizeof(v)
  ------------------
 1201|       |
 1202|  15.7k|            return out;
 1203|  15.7k|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL15load_jpeg_imageEPNS0_11JpegContextEPiS3_S3_i:
 1270|  1.45k|        {
 1271|  1.45k|            int n, decode_n, is_rgb;
 1272|  1.45k|            z->img_n = 0; // make jpeg__cleanup_jpeg safe
 1273|       |
 1274|       |            // validate req_comp
 1275|  1.45k|            if (req_comp < 0 || req_comp > 4) return JpegLoadError("bad req_comp", "Internal error");
  ------------------
  |  Branch (1275:17): [True: 0, False: 1.45k]
  |  Branch (1275:33): [True: 0, False: 1.45k]
  ------------------
 1276|       |
 1277|       |            // load a jpeg image from whichever source, but leave in YCbCr format
 1278|  1.45k|            if (!jpeg__decode_jpeg_image(z))
  ------------------
  |  Branch (1278:17): [True: 1.29k, False: 160]
  ------------------
 1279|  1.29k|                return 0;
 1280|       |
 1281|       |            // determine actual number of components to generate
 1282|    160|            n = req_comp ? req_comp : z->img_n >= 3 ? 3 : 1;
  ------------------
  |  Branch (1282:17): [True: 160, False: 0]
  |  Branch (1282:39): [True: 0, False: 0]
  ------------------
 1283|       |
 1284|    160|            is_rgb = z->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
  ------------------
  |  Branch (1284:22): [True: 140, False: 20]
  |  Branch (1284:40): [True: 0, False: 140]
  |  Branch (1284:56): [True: 60, False: 80]
  |  Branch (1284:89): [True: 60, False: 0]
  ------------------
 1285|       |
 1286|    160|            if (z->img_n == 3 && n < 3 && !is_rgb)
  ------------------
  |  Branch (1286:17): [True: 140, False: 20]
  |  Branch (1286:34): [True: 0, False: 140]
  |  Branch (1286:43): [True: 0, False: 0]
  ------------------
 1287|      0|                decode_n = 1;
 1288|    160|            else
 1289|    160|                decode_n = z->img_n;
 1290|       |
 1291|       |            // resample and color-convert
 1292|    160|            {
 1293|    160|                int k;
 1294|    160|                unsigned int i, j;
 1295|    160|                uint8_t* coutput[4] = { NULL, NULL, NULL, NULL };
 1296|       |
 1297|    160|                jpeg__resample res_comp[4];
 1298|       |
 1299|    660|                for (k = 0; k < decode_n; ++k) 
  ------------------
  |  Branch (1299:29): [True: 500, False: 160]
  ------------------
 1300|    500|                {
 1301|    500|                    jpeg__resample* r = &res_comp[k];
 1302|       |
 1303|       |                    // allocate line buffer big enough for upsampling off the edges
 1304|       |                    // with upsample factor of 4
 1305|    500|                    z->img_comp[k].bufL.Resize(z->img_x + 3);
 1306|    500|                    if (z->img_comp[k].bufL.Empty()) 
  ------------------
  |  Branch (1306:25): [True: 0, False: 500]
  ------------------
 1307|      0|                        return JpegLoadError("outofmem", "Out of memory");
 1308|       |
 1309|    500|                    r->hs = z->img_h_max / z->img_comp[k].h;
 1310|    500|                    r->vs = z->img_v_max / z->img_comp[k].v;
 1311|    500|                    r->ystep = r->vs >> 1;
 1312|    500|                    r->w_lores = (z->img_x + r->hs - 1) / r->hs;
 1313|    500|                    r->ypos = 0;
 1314|    500|                    r->line0 = r->line1 = z->img_comp[k].data;
 1315|       |
 1316|    500|                    if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
  ------------------
  |  Branch (1316:25): [True: 160, False: 340]
  |  Branch (1316:39): [True: 48, False: 112]
  ------------------
 1317|    452|                    else if (r->hs == 1 && r->vs == 2) r->resample = jpeg__resample_row_v_2;
  ------------------
  |  Branch (1317:30): [True: 112, False: 340]
  |  Branch (1317:44): [True: 20, False: 92]
  ------------------
 1318|    432|                    else if (r->hs == 2 && r->vs == 1) r->resample = jpeg__resample_row_h_2;
  ------------------
  |  Branch (1318:30): [True: 340, False: 92]
  |  Branch (1318:44): [True: 192, False: 148]
  ------------------
 1319|    240|                    else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
  ------------------
  |  Branch (1319:30): [True: 148, False: 92]
  |  Branch (1319:44): [True: 148, False: 0]
  ------------------
 1320|     92|                    else                               r->resample = jpeg__resample_row_generic;
 1321|    500|                }
 1322|       |
 1323|       |                // can't error after this so, this is safe
 1324|    160|                z->out.Resize(n * z->img_x * z->img_y + 1);
 1325|    160|                if (z->out.Empty()) return JpegLoadError("outofmem", "Out of memory");
  ------------------
  |  Branch (1325:21): [True: 0, False: 160]
  ------------------
 1326|       |
 1327|       |                // now go ahead and resample
 1328|  13.8k|                for (j = 0; j < z->img_y; ++j) {
  ------------------
  |  Branch (1328:29): [True: 13.7k, False: 160]
  ------------------
 1329|  13.7k|                    uint8_t* out = z->out.data + n * z->img_x * j;
 1330|  57.9k|                    for (k = 0; k < decode_n; ++k) {
  ------------------
  |  Branch (1330:33): [True: 44.2k, False: 13.7k]
  ------------------
 1331|  44.2k|                        jpeg__resample* r = &res_comp[k];
 1332|  44.2k|                        int y_bot = r->ystep >= (r->vs >> 1);
 1333|  44.2k|                        coutput[k] = r->resample(z->img_comp[k].bufL.data,
 1334|  44.2k|                            y_bot ? r->line1 : r->line0,
  ------------------
  |  Branch (1334:29): [True: 31.6k, False: 12.6k]
  ------------------
 1335|  44.2k|                            y_bot ? r->line0 : r->line1,
  ------------------
  |  Branch (1335:29): [True: 31.6k, False: 12.6k]
  ------------------
 1336|  44.2k|                            r->w_lores, r->hs);
 1337|  44.2k|                        if (++r->ystep >= r->vs) {
  ------------------
  |  Branch (1337:29): [True: 30.0k, False: 14.2k]
  ------------------
 1338|  30.0k|                            r->ystep = 0;
 1339|  30.0k|                            r->line0 = r->line1;
 1340|  30.0k|                            if (++r->ypos < z->img_comp[k].y)
  ------------------
  |  Branch (1340:33): [True: 28.4k, False: 1.52k]
  ------------------
 1341|  28.4k|                                r->line1 += z->img_comp[k].w2;
 1342|  30.0k|                        }
 1343|  44.2k|                    }
 1344|  13.7k|                    if (n >= 3) {
  ------------------
  |  Branch (1344:25): [True: 13.7k, False: 0]
  ------------------
 1345|  13.7k|                        uint8_t* y = coutput[0];
 1346|  13.7k|                        if (z->img_n == 3) {
  ------------------
  |  Branch (1346:29): [True: 10.5k, False: 3.16k]
  ------------------
 1347|  10.5k|                            if (is_rgb) {
  ------------------
  |  Branch (1347:33): [True: 5.04k, False: 5.50k]
  ------------------
 1348|  1.92M|                                for (i = 0; i < z->img_x; ++i) {
  ------------------
  |  Branch (1348:45): [True: 1.91M, False: 5.04k]
  ------------------
 1349|  1.91M|                                    out[0] = y[i];
 1350|  1.91M|                                    out[1] = coutput[1][i];
 1351|  1.91M|                                    out[2] = coutput[2][i];
 1352|  1.91M|                                    out[3] = 255;
 1353|  1.91M|                                    out += n;
 1354|  1.91M|                                }
 1355|  5.04k|                            }
 1356|  5.50k|                            else {
 1357|  5.50k|                                z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->img_x, n);
 1358|  5.50k|                            }
 1359|  10.5k|                        }
 1360|  3.16k|                        else if (z->img_n == 4) {
  ------------------
  |  Branch (1360:34): [True: 3.16k, False: 0]
  ------------------
 1361|  3.16k|                            if (z->app14_color_transform == 0) { // CMYK
  ------------------
  |  Branch (1361:33): [True: 3.16k, False: 0]
  ------------------
 1362|   312k|                                for (i = 0; i < z->img_x; ++i) {
  ------------------
  |  Branch (1362:45): [True: 309k, False: 3.16k]
  ------------------
 1363|   309k|                                    uint8_t m = coutput[3][i];
 1364|   309k|                                    out[0] = jpeg__blinn_8x8(coutput[0][i], m);
 1365|   309k|                                    out[1] = jpeg__blinn_8x8(coutput[1][i], m);
 1366|   309k|                                    out[2] = jpeg__blinn_8x8(coutput[2][i], m);
 1367|   309k|                                    out[3] = 255;
 1368|   309k|                                    out += n;
 1369|   309k|                                }
 1370|  3.16k|                            }
 1371|      0|                            else if (z->app14_color_transform == 2) { // YCCK
  ------------------
  |  Branch (1371:38): [True: 0, False: 0]
  ------------------
 1372|      0|                                z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->img_x, n);
 1373|      0|                                for (i = 0; i < z->img_x; ++i) {
  ------------------
  |  Branch (1373:45): [True: 0, False: 0]
  ------------------
 1374|      0|                                    uint8_t m = coutput[3][i];
 1375|      0|                                    out[0] = jpeg__blinn_8x8(255 - out[0], m);
 1376|      0|                                    out[1] = jpeg__blinn_8x8(255 - out[1], m);
 1377|      0|                                    out[2] = jpeg__blinn_8x8(255 - out[2], m);
 1378|      0|                                    out += n;
 1379|      0|                                }
 1380|      0|                            }
 1381|      0|                            else { // YCbCr + alpha?  Ignore the fourth channel for now
 1382|      0|                                z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->img_x, n);
 1383|      0|                            }
 1384|  3.16k|                        }
 1385|      0|                        else
 1386|      0|                            for (i = 0; i < z->img_x; ++i) {
  ------------------
  |  Branch (1386:41): [True: 0, False: 0]
  ------------------
 1387|      0|                                out[0] = out[1] = out[2] = y[i];
 1388|      0|                                out[3] = 255; // not used if n==3
 1389|      0|                                out += n;
 1390|      0|                            }
 1391|  13.7k|                    }
 1392|      0|                    else {
 1393|      0|                        if (is_rgb) 
  ------------------
  |  Branch (1393:29): [True: 0, False: 0]
  ------------------
 1394|      0|                        {
 1395|      0|                            if (n == 1)
  ------------------
  |  Branch (1395:33): [True: 0, False: 0]
  ------------------
 1396|      0|                                for (i = 0; i < z->img_x; ++i)
  ------------------
  |  Branch (1396:45): [True: 0, False: 0]
  ------------------
 1397|      0|                                    *out++ = jpeg__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
 1398|      0|                            else {
 1399|      0|                                for (i = 0; i < z->img_x; ++i, out += 2) {
  ------------------
  |  Branch (1399:45): [True: 0, False: 0]
  ------------------
 1400|      0|                                    out[0] = jpeg__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
 1401|      0|                                    out[1] = 255;
 1402|      0|                                }
 1403|      0|                            }
 1404|      0|                        }
 1405|      0|                        else if (z->img_n == 4 && z->app14_color_transform == 0) {
  ------------------
  |  Branch (1405:34): [True: 0, False: 0]
  |  Branch (1405:51): [True: 0, False: 0]
  ------------------
 1406|      0|                            for (i = 0; i < z->img_x; ++i) {
  ------------------
  |  Branch (1406:41): [True: 0, False: 0]
  ------------------
 1407|      0|                                uint8_t m = coutput[3][i];
 1408|      0|                                uint8_t r = jpeg__blinn_8x8(coutput[0][i], m);
 1409|      0|                                uint8_t g = jpeg__blinn_8x8(coutput[1][i], m);
 1410|      0|                                uint8_t b = jpeg__blinn_8x8(coutput[2][i], m);
 1411|      0|                                out[0] = jpeg__compute_y(r, g, b);
 1412|      0|                                out[1] = 255;
 1413|      0|                                out += n;
 1414|      0|                            }
 1415|      0|                        }
 1416|      0|                        else if (z->img_n == 4 && z->app14_color_transform == 2) {
  ------------------
  |  Branch (1416:34): [True: 0, False: 0]
  |  Branch (1416:51): [True: 0, False: 0]
  ------------------
 1417|      0|                            for (i = 0; i < z->img_x; ++i) {
  ------------------
  |  Branch (1417:41): [True: 0, False: 0]
  ------------------
 1418|      0|                                out[0] = jpeg__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
 1419|      0|                                out[1] = 255;
 1420|      0|                                out += n;
 1421|      0|                            }
 1422|      0|                        }
 1423|      0|                        else {
 1424|      0|                            uint8_t* y = coutput[0];
 1425|      0|                            if (n == 1)
  ------------------
  |  Branch (1425:33): [True: 0, False: 0]
  ------------------
 1426|      0|                                for (i = 0; i < z->img_x; ++i) out[i] = y[i];
  ------------------
  |  Branch (1426:45): [True: 0, False: 0]
  ------------------
 1427|      0|                            else
 1428|      0|                                for (i = 0; i < z->img_x; ++i) { *out++ = y[i]; *out++ = 255; }
  ------------------
  |  Branch (1428:45): [True: 0, False: 0]
  ------------------
 1429|      0|                        }
 1430|      0|                    }
 1431|  13.7k|                }
 1432|    160|                *out_x = z->img_x;
 1433|    160|                *out_y = z->img_y;
 1434|    160|                if (comp) *comp = z->img_n >= 3 ? 3 : 1; // report original components, not output
  ------------------
  |  Branch (1434:21): [True: 160, False: 0]
  |  Branch (1434:35): [True: 160, False: 0]
  ------------------
 1435|    160|                return 1;
 1436|    160|            }
 1437|    160|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL23jpeg__decode_jpeg_imageEPNS0_11JpegContextE:
 1089|  1.45k|        {
 1090|  1.45k|            int m;
 1091|  1.45k|            j->restart_interval = 0;
 1092|  1.45k|            if (!DecodeJpegHeader(j, 0)) return 0;
  ------------------
  |  Branch (1092:17): [True: 756, False: 696]
  ------------------
 1093|    696|            m = jpeg__get_marker(j);
 1094|   279k|            while (!jpeg__EOI(m)) {
  ------------------
  |  | 1055|   279k|#define jpeg__EOI(x)         ((x) == 0xd9)
  ------------------
  |  Branch (1094:20): [True: 279k, False: 160]
  ------------------
 1095|   279k|                if (jpeg__SOS(m)) {
  ------------------
  |  | 1057|   279k|#define jpeg__SOS(x)         ((x) == 0xda)
  |  |  ------------------
  |  |  |  Branch (1057:30): [True: 185k, False: 94.0k]
  |  |  ------------------
  ------------------
 1096|   185k|                    if (!jpeg__process_scan_header(j)) return 0;
  ------------------
  |  Branch (1096:25): [True: 144, False: 184k]
  ------------------
 1097|   184k|                    if (!jpeg__parse_entropy_coded_data(j)) return 0;
  ------------------
  |  Branch (1097:25): [True: 116, False: 184k]
  ------------------
 1098|   184k|                    if (j->marker == JPEG__MARKER_none) {
  ------------------
  |  |  628|   184k|#define JPEG__MARKER_none  0xff
  ------------------
  |  Branch (1098:25): [True: 317, False: 184k]
  ------------------
 1099|       |                        // handle 0s at the end of image data from IP Kamera 9060
 1100|  3.77M|                        while (!j->stream->Eof()) {
  ------------------
  |  Branch (1100:32): [True: 3.77M, False: 141]
  ------------------
 1101|  3.77M|                            int x = j->stream->Get8u();
 1102|  3.77M|                            if (x == 255) {
  ------------------
  |  Branch (1102:33): [True: 176, False: 3.77M]
  ------------------
 1103|    176|                                j->marker = j->stream->Get8u();
 1104|    176|                                break;
 1105|    176|                            }
 1106|  3.77M|                        }
 1107|       |                        // if we reach eof without hitting a marker, jpeg__get_marker() below will fail and we'll eventually return 0
 1108|    317|                    }
 1109|   184k|                }
 1110|  94.0k|                else if (jpeg__DNL(m)) {
  ------------------
  |  | 1053|  94.0k|#define jpeg__DNL(x)         ((x) == 0xdc)
  |  |  ------------------
  |  |  |  Branch (1053:30): [True: 9.28k, False: 84.7k]
  |  |  ------------------
  ------------------
 1111|  9.28k|                    int Ld = j->stream->GetBe16u();
 1112|  9.28k|                    uint32_t NL = j->stream->GetBe16u();
 1113|  9.28k|                    if (Ld != 4) return JpegLoadError("bad DNL len", "Corrupt JPEG");
  ------------------
  |  Branch (1113:25): [True: 8, False: 9.27k]
  ------------------
 1114|  9.27k|                    if (NL != j->img_y) return JpegLoadError("bad DNL height", "Corrupt JPEG");
  ------------------
  |  Branch (1114:25): [True: 0, False: 9.27k]
  ------------------
 1115|  9.27k|                }
 1116|  84.7k|                else {
 1117|  84.7k|                    if (!jpeg__process_marker(j, m)) return 0;
  ------------------
  |  Branch (1117:25): [True: 268, False: 84.4k]
  ------------------
 1118|  84.7k|                }
 1119|   278k|                m = jpeg__get_marker(j);
 1120|   278k|            }
 1121|    160|            if (j->progressive)
  ------------------
  |  Branch (1121:17): [True: 160, False: 0]
  ------------------
 1122|    160|                jpeg__jpeg_finish(j);
 1123|    160|            return 1;
 1124|    696|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL16DecodeJpegHeaderEPNS0_11JpegContextEi:
 1062|  1.45k|        {
 1063|  1.45k|            int m;
 1064|  1.45k|            z->jfif = 0;
 1065|  1.45k|            z->app14_color_transform = -1; // valid values are 0,1,2
 1066|  1.45k|            z->marker = JPEG__MARKER_none; // initialize cached marker to empty
  ------------------
  |  |  628|  1.45k|#define JPEG__MARKER_none  0xff
  ------------------
 1067|  1.45k|            m = jpeg__get_marker(z);
 1068|  1.45k|            if (!jpeg__SOI(m)) return JpegLoadError("no SOI", "Corrupt JPEG");
  ------------------
  |  | 1054|  1.45k|#define jpeg__SOI(x)         ((x) == 0xd8)
  ------------------
  |  Branch (1068:17): [True: 0, False: 1.45k]
  ------------------
 1069|  1.45k|            if (scan) 
  ------------------
  |  Branch (1069:17): [True: 0, False: 1.45k]
  ------------------
 1070|      0|                return 1;
 1071|  1.45k|            m = jpeg__get_marker(z);
 1072|  2.97M|            while (!jpeg__SOF(m)) {
  ------------------
  |  | 1056|  2.97M|#define jpeg__SOF(x)         ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
  |  |  ------------------
  |  |  |  Branch (1056:31): [True: 128, False: 2.97M]
  |  |  |  Branch (1056:46): [True: 0, False: 2.97M]
  |  |  |  Branch (1056:61): [True: 644, False: 2.97M]
  |  |  ------------------
  ------------------
 1073|  2.97M|                if (!jpeg__process_marker(z, m)) return 0;
  ------------------
  |  Branch (1073:21): [True: 304, False: 2.97M]
  ------------------
 1074|  2.97M|                m = jpeg__get_marker(z);
 1075|  57.5M|                while (m == JPEG__MARKER_none) {
  ------------------
  |  |  628|  57.5M|#define JPEG__MARKER_none  0xff
  ------------------
  |  Branch (1075:24): [True: 54.5M, False: 2.97M]
  ------------------
 1076|       |                    // some files have extra padding after their blocks, so ok, we'll scan
 1077|  54.5M|                    if (z->stream->Eof()) 
  ------------------
  |  Branch (1077:25): [True: 376, False: 54.5M]
  ------------------
 1078|    376|                        return JpegLoadError("no SOF", "Corrupt JPEG");
 1079|  54.5M|                    m = jpeg__get_marker(z);
 1080|  54.5M|                }
 1081|  2.97M|            }
 1082|    772|            z->progressive = jpeg__SOF_progressive(m);
  ------------------
  |  | 1059|    772|#define jpeg__SOF_progressive(x)   ((x) == 0xc2)
  ------------------
 1083|    772|            if (!jpeg__process_frame_header(z, scan)) return 0;
  ------------------
  |  Branch (1083:17): [True: 76, False: 696]
  ------------------
 1084|    696|            return 1;
 1085|    772|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL26jpeg__process_frame_headerEPNS0_11JpegContextEi:
  969|    772|        {
  970|    772|            int Lf, p, i, q, h_max = 1, v_max = 1, c;
  971|    772|            Lf = z->stream->GetBe16u();         if (Lf < 11) return JpegLoadError("bad SOF len", "Corrupt JPEG"); // JPEG
  ------------------
  |  Branch (971:53): [True: 0, False: 772]
  ------------------
  972|    772|            p = z->stream->Get8u();            if (p != 8) return JpegLoadError("only 8-bit", "JPEG format not supported: 8-bit only"); // JPEG baseline
  ------------------
  |  Branch (972:52): [True: 8, False: 764]
  ------------------
  973|    764|            z->img_y = z->stream->GetBe16u();   if (z->img_y == 0) return JpegLoadError("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
  ------------------
  |  Branch (973:53): [True: 0, False: 764]
  ------------------
  974|    764|            z->img_x = z->stream->GetBe16u();   if (z->img_x == 0) return JpegLoadError("0 width", "Corrupt JPEG"); // JPEG requires
  ------------------
  |  Branch (974:53): [True: 0, False: 764]
  ------------------
  975|    764|            if (z->img_y > JpegMaxDimensions) return JpegLoadError("too large", "Very large image (corrupt?)");
  ------------------
  |  Branch (975:17): [True: 0, False: 764]
  ------------------
  976|    764|            if (z->img_x > JpegMaxDimensions) return JpegLoadError("too large", "Very large image (corrupt?)");
  ------------------
  |  Branch (976:17): [True: 0, False: 764]
  ------------------
  977|    764|            c = z->stream->Get8u();
  978|    764|            if (c != 3 && c != 1 && c != 4) return JpegLoadError("bad component count", "Corrupt JPEG");
  ------------------
  |  Branch (978:17): [True: 156, False: 608]
  |  Branch (978:27): [True: 156, False: 0]
  |  Branch (978:37): [True: 0, False: 156]
  ------------------
  979|    764|            z->img_n = c;
  980|  3.21k|            for (i = 0; i < c; ++i) {
  ------------------
  |  Branch (980:25): [True: 2.44k, False: 764]
  ------------------
  981|  2.44k|                z->img_comp[i].data = NULL;
  982|       |                //z->img_comp[i].linebuf = NULL;
  983|  2.44k|            }
  984|       |
  985|    764|            if (Lf != 8 + 3 * z->img_n) return JpegLoadError("bad SOF len", "Corrupt JPEG");
  ------------------
  |  Branch (985:17): [True: 4, False: 760]
  ------------------
  986|       |
  987|    760|            z->rgb = 0;
  988|  3.03k|            for (i = 0; i < z->img_n; ++i) {
  ------------------
  |  Branch (988:25): [True: 2.33k, False: 696]
  ------------------
  989|  2.33k|                static const unsigned char rgb[3] = { 'R', 'G', 'B' };
  990|  2.33k|                z->img_comp[i].id = z->stream->Get8u();
  991|  2.33k|                if (z->img_n == 3 && z->img_comp[i].id == rgb[i])
  ------------------
  |  Branch (991:21): [True: 1.78k, False: 552]
  |  Branch (991:38): [True: 4, False: 1.78k]
  ------------------
  992|      4|                    ++z->rgb;
  993|  2.33k|                q = z->stream->Get8u();
  994|  2.33k|                z->img_comp[i].h = (q >> 4);  if (!z->img_comp[i].h || z->img_comp[i].h > 4) return JpegLoadError("bad H", "Corrupt JPEG");
  ------------------
  |  Branch (994:51): [True: 32, False: 2.30k]
  |  Branch (994:72): [True: 0, False: 2.30k]
  ------------------
  995|  2.30k|                z->img_comp[i].v = q & 15;    if (!z->img_comp[i].v || z->img_comp[i].v > 4) return JpegLoadError("bad V", "Corrupt JPEG");
  ------------------
  |  Branch (995:51): [True: 0, False: 2.30k]
  |  Branch (995:72): [True: 0, False: 2.30k]
  ------------------
  996|  2.30k|                z->img_comp[i].tq = z->stream->Get8u();  if (z->img_comp[i].tq > 3) return JpegLoadError("bad TQ", "Corrupt JPEG");
  ------------------
  |  Branch (996:62): [True: 32, False: 2.27k]
  ------------------
  997|  2.30k|            }
  998|       |
  999|    696|            if (scan) 
  ------------------
  |  Branch (999:17): [True: 0, False: 696]
  ------------------
 1000|      0|                return 1;
 1001|       |
 1002|    696|            if (z->img_x* z->img_y * z->img_n > INT_MAX) return JpegLoadError("too large", "Image too large to decode");
  ------------------
  |  Branch (1002:17): [True: 0, False: 696]
  ------------------
 1003|       |
 1004|  2.90k|            for (i = 0; i < z->img_n; ++i) {
  ------------------
  |  Branch (1004:25): [True: 2.21k, False: 696]
  ------------------
 1005|  2.21k|                if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
  ------------------
  |  Branch (1005:21): [True: 836, False: 1.37k]
  ------------------
 1006|  2.21k|                if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
  ------------------
  |  Branch (1006:21): [True: 800, False: 1.41k]
  ------------------
 1007|  2.21k|            }
 1008|       |
 1009|       |            // compute interleaved mcu info
 1010|    696|            z->img_h_max = h_max;
 1011|    696|            z->img_v_max = v_max;
 1012|    696|            z->img_mcu_w = h_max * 8;
 1013|    696|            z->img_mcu_h = v_max * 8;
 1014|       |            // these sizes can't be more than 17 bits
 1015|    696|            z->img_mcu_x = (z->img_x + z->img_mcu_w - 1) / z->img_mcu_w;
 1016|    696|            z->img_mcu_y = (z->img_y + z->img_mcu_h - 1) / z->img_mcu_h;
 1017|       |
 1018|  2.90k|            for (i = 0; i < z->img_n; ++i) {
  ------------------
  |  Branch (1018:25): [True: 2.21k, False: 696]
  ------------------
 1019|       |                // number of effective pixels (e.g. for non-interleaved MCU)
 1020|  2.21k|                z->img_comp[i].x = (z->img_x * z->img_comp[i].h + h_max - 1) / h_max;
 1021|  2.21k|                z->img_comp[i].y = (z->img_y * z->img_comp[i].v + v_max - 1) / v_max;
 1022|       |                // to simplify generation, we'll allocate enough memory to decode
 1023|       |                // the bogus oversized data from using interleaved MCUs and their
 1024|       |                // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
 1025|       |                // discard the extra data until colorspace conversion
 1026|       |                //
 1027|       |                // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
 1028|       |                // so these muls can't overflow with 32-bit ints (which we require)
 1029|  2.21k|                z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
 1030|  2.21k|                z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
 1031|  2.21k|                z->img_comp[i].coeff = 0;
 1032|       |                //z->img_comp[i].raw_coeff = 0;
 1033|       |                //z->img_comp[i].linebuf = NULL;
 1034|  2.21k|                z->img_comp[i].bufD.Resize(z->img_comp[i].w2 * z->img_comp[i].h2);
 1035|  2.21k|                if (z->img_comp[i].bufD.Empty())
  ------------------
  |  Branch (1035:21): [True: 0, False: 2.21k]
  ------------------
 1036|      0|                    return JpegLoadError("outofmem", "Out of memory");
 1037|  2.21k|                z->img_comp[i].data = z->img_comp[i].bufD.data;
 1038|  2.21k|                if (z->progressive) {
  ------------------
  |  Branch (1038:21): [True: 1.82k, False: 384]
  ------------------
 1039|       |                    // w2, h2 are multiples of 8 (see above)
 1040|  1.82k|                    z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
 1041|  1.82k|                    z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
 1042|  1.82k|                    z->img_comp[i].bufC.Resize(z->img_comp[i].w2 * z->img_comp[i].h2 * sizeof(short));
 1043|  1.82k|                    if (z->img_comp[i].bufC.Empty())
  ------------------
  |  Branch (1043:25): [True: 0, False: 1.82k]
  ------------------
 1044|      0|                        return JpegLoadError("outofmem", "Out of memory");
 1045|  1.82k|                    z->img_comp[i].coeff = (short*)z->img_comp[i].bufC.data;
 1046|  1.82k|                }
 1047|  2.21k|            }
 1048|       |
 1049|    696|            return 1;
 1050|    696|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL16jpeg__get_markerEPNS0_11JpegContextE:
  633|  57.8M|        {
  634|  57.8M|            uint8_t x;
  635|  57.8M|            if (j->marker != JPEG__MARKER_none) { x = j->marker; j->marker = JPEG__MARKER_none; return x; }
  ------------------
  |  |  628|  57.8M|#define JPEG__MARKER_none  0xff
  ------------------
                          if (j->marker != JPEG__MARKER_none) { x = j->marker; j->marker = JPEG__MARKER_none; return x; }
  ------------------
  |  |  628|   184k|#define JPEG__MARKER_none  0xff
  ------------------
  |  Branch (635:17): [True: 184k, False: 57.6M]
  ------------------
  636|  57.6M|            x = j->stream->Get8u();
  637|  57.6M|            if (x != 0xff) return JPEG__MARKER_none;
  ------------------
  |  |  628|  54.5M|#define JPEG__MARKER_none  0xff
  ------------------
  |  Branch (637:17): [True: 54.5M, False: 3.07M]
  ------------------
  638|  8.31M|            while (x == 0xff)
  ------------------
  |  Branch (638:20): [True: 5.24M, False: 3.07M]
  ------------------
  639|  5.24M|                x = j->stream->Get8u(); // consume repeated 0xff fill bytes
  640|  3.07M|            return x;
  641|  57.6M|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL25jpeg__process_scan_headerEPNS0_11JpegContextE:
  929|   185k|        {
  930|   185k|            int i;
  931|   185k|            int Ls = z->stream->GetBe16u();
  932|   185k|            z->scan_n = z->stream->Get8u();
  933|   185k|            if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int)z->img_n) return JpegLoadError("bad SOS component count", "Corrupt JPEG");
  ------------------
  |  Branch (933:17): [True: 0, False: 185k]
  |  Branch (933:34): [True: 0, False: 185k]
  |  Branch (933:51): [True: 0, False: 185k]
  ------------------
  934|   185k|            if (Ls != 6 + 2 * z->scan_n) return JpegLoadError("bad SOS len", "Corrupt JPEG");
  ------------------
  |  Branch (934:17): [True: 0, False: 185k]
  ------------------
  935|   388k|            for (i = 0; i < z->scan_n; ++i) {
  ------------------
  |  Branch (935:25): [True: 203k, False: 184k]
  ------------------
  936|   203k|                int id = z->stream->Get8u(), which;
  937|   203k|                int q = z->stream->Get8u();
  938|   420k|                for (which = 0; which < z->img_n; ++which)
  ------------------
  |  Branch (938:33): [True: 419k, False: 68]
  ------------------
  939|   419k|                    if (z->img_comp[which].id == id)
  ------------------
  |  Branch (939:25): [True: 203k, False: 216k]
  ------------------
  940|   203k|                        break;
  941|   203k|                if (which == z->img_n) return 0; // no match
  ------------------
  |  Branch (941:21): [True: 68, False: 203k]
  ------------------
  942|   203k|                z->img_comp[which].hd = q >> 4;   if (z->img_comp[which].hd > 3) return JpegLoadError("bad DC huff", "Corrupt JPEG");
  ------------------
  |  Branch (942:55): [True: 0, False: 203k]
  ------------------
  943|   203k|                z->img_comp[which].ha = q & 15;   if (z->img_comp[which].ha > 3) return JpegLoadError("bad AC huff", "Corrupt JPEG");
  ------------------
  |  Branch (943:55): [True: 56, False: 203k]
  ------------------
  944|   203k|                z->order[i] = which;
  945|   203k|            }
  946|       |
  947|   184k|            {
  948|   184k|                int aa;
  949|   184k|                z->spec_start = z->stream->Get8u();
  950|   184k|                z->spec_end = z->stream->Get8u(); // should be 63, but might be 0
  951|   184k|                aa = z->stream->Get8u();
  952|   184k|                z->succ_high = (aa >> 4);
  953|   184k|                z->succ_low = (aa & 15);
  954|   184k|                if (z->progressive) {
  ------------------
  |  Branch (954:21): [True: 184k, False: 104]
  ------------------
  955|   184k|                    if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
  ------------------
  |  Branch (955:25): [True: 20, False: 184k]
  |  Branch (955:47): [True: 0, False: 184k]
  |  Branch (955:67): [True: 0, False: 184k]
  |  Branch (955:98): [True: 0, False: 184k]
  |  Branch (955:119): [True: 0, False: 184k]
  ------------------
  956|     20|                        return JpegLoadError("bad SOS", "Corrupt JPEG");
  957|   184k|                }
  958|    104|                else {
  959|    104|                    if (z->spec_start != 0) return JpegLoadError("bad SOS", "Corrupt JPEG");
  ------------------
  |  Branch (959:25): [True: 0, False: 104]
  ------------------
  960|    104|                    if (z->succ_high != 0 || z->succ_low != 0) return JpegLoadError("bad SOS", "Corrupt JPEG");
  ------------------
  |  Branch (960:25): [True: 0, False: 104]
  |  Branch (960:46): [True: 0, False: 104]
  ------------------
  961|    104|                    z->spec_end = 63;
  962|    104|                }
  963|   184k|            }
  964|       |
  965|   184k|            return 1;
  966|   184k|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL30jpeg__parse_entropy_coded_dataEPNS0_11JpegContextE:
  663|   184k|        {
  664|   184k|            jpeg__jpeg_reset(z);
  665|   184k|            if (!z->progressive) {
  ------------------
  |  Branch (665:17): [True: 104, False: 184k]
  ------------------
  666|    104|                if (z->scan_n == 1) {
  ------------------
  |  Branch (666:21): [True: 0, False: 104]
  ------------------
  667|      0|                    int i, j;
  668|      0|                    JPEG_SIMD_ALIGN(short, data[64]);
  ------------------
  |  |   63|      0|#define JPEG_SIMD_ALIGN(type, name) SIMD_ALIGNED(16) type name
  |  |  ------------------
  |  |  |  |  150|      0|#define SIMD_ALIGNED(x) __attribute__ ((aligned(x)))
  |  |  ------------------
  ------------------
  669|      0|                    int n = z->order[0];
  670|       |                    // non-interleaved data, we just need to process one block at a time,
  671|       |                    // in trivial scanline order
  672|       |                    // number of blocks to do just depends on how many actual "pixels" this
  673|       |                    // component has, independent of interleaved MCU blocking and such
  674|      0|                    int w = (z->img_comp[n].x + 7) >> 3;
  675|      0|                    int h = (z->img_comp[n].y + 7) >> 3;
  676|      0|                    for (j = 0; j < h; ++j) {
  ------------------
  |  Branch (676:33): [True: 0, False: 0]
  ------------------
  677|      0|                        for (i = 0; i < w; ++i) {
  ------------------
  |  Branch (677:37): [True: 0, False: 0]
  ------------------
  678|      0|                            int ha = z->img_comp[n].ha;
  679|      0|                            if (!jpeg__jpeg_decode_block(z, data, z->huff_dc + z->img_comp[n].hd, z->huff_ac + ha, z->huff_ac[ha].fast_ac, n, z->dequant[z->img_comp[n].tq])) return 0;
  ------------------
  |  Branch (679:33): [True: 0, False: 0]
  ------------------
  680|      0|                            z->idct_block_kernel(z->img_comp[n].data + z->img_comp[n].w2 * j * 8 + i * 8, z->img_comp[n].w2, data);
  681|       |                            // every data block is an MCU, so countdown the restart interval
  682|      0|                            if (--z->todo <= 0) {
  ------------------
  |  Branch (682:33): [True: 0, False: 0]
  ------------------
  683|      0|                                if (z->code_bits < 24) jpeg__grow_buffer_unsafe(z);
  ------------------
  |  Branch (683:37): [True: 0, False: 0]
  ------------------
  684|       |                                // if it's NOT a restart, then just bail, so we get corrupt data
  685|       |                                // rather than no data
  686|      0|                                if (!JPEG__RESTART(z->marker)) return 1;
  ------------------
  |  |  645|      0|#define JPEG__RESTART(x)     ((x) >= 0xd0 && (x) <= 0xd7)
  |  |  ------------------
  |  |  |  Branch (645:31): [True: 0, False: 0]
  |  |  |  Branch (645:46): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  687|      0|                                jpeg__jpeg_reset(z);
  688|      0|                            }
  689|      0|                        }
  690|      0|                    }
  691|      0|                    return 1;
  692|      0|                }
  693|    104|                else { // interleaved
  694|    104|                    int i, j, k, x, y;
  695|    104|                    JPEG_SIMD_ALIGN(short, data[64]);
  ------------------
  |  |   63|    104|#define JPEG_SIMD_ALIGN(type, name) SIMD_ALIGNED(16) type name
  |  |  ------------------
  |  |  |  |  150|    104|#define SIMD_ALIGNED(x) __attribute__ ((aligned(x)))
  |  |  ------------------
  ------------------
  696|  20.9k|                    for (j = 0; j < z->img_mcu_y; ++j) {
  ------------------
  |  Branch (696:33): [True: 20.8k, False: 100]
  ------------------
  697|  6.00M|                        for (i = 0; i < z->img_mcu_x; ++i) {
  ------------------
  |  Branch (697:37): [True: 5.98M, False: 20.8k]
  ------------------
  698|       |                            // scan an interleaved mcu... process scan_n components in order
  699|  23.9M|                            for (k = 0; k < z->scan_n; ++k) {
  ------------------
  |  Branch (699:41): [True: 17.9M, False: 5.98M]
  ------------------
  700|  17.9M|                                int n = z->order[k];
  701|       |                                // scan out an mcu's worth of this component; that's just determined
  702|       |                                // by the basic H and V specified for the component
  703|  41.8M|                                for (y = 0; y < z->img_comp[n].v; ++y) {
  ------------------
  |  Branch (703:45): [True: 23.9M, False: 17.9M]
  ------------------
  704|  59.8M|                                    for (x = 0; x < z->img_comp[n].h; ++x) {
  ------------------
  |  Branch (704:49): [True: 35.8M, False: 23.9M]
  ------------------
  705|  35.8M|                                        int x2 = (i * z->img_comp[n].h + x) * 8;
  706|  35.8M|                                        int y2 = (j * z->img_comp[n].v + y) * 8;
  707|  35.8M|                                        int ha = z->img_comp[n].ha;
  708|  35.8M|                                        if (!jpeg__jpeg_decode_block(z, data, z->huff_dc + z->img_comp[n].hd, z->huff_ac + ha, z->huff_ac[ha].fast_ac, n, z->dequant[z->img_comp[n].tq])) return 0;
  ------------------
  |  Branch (708:45): [True: 4, False: 35.8M]
  ------------------
  709|  35.8M|                                        z->idct_block_kernel(z->img_comp[n].data + z->img_comp[n].w2 * y2 + x2, z->img_comp[n].w2, data);
  710|  35.8M|                                    }
  711|  23.9M|                                }
  712|  17.9M|                            }
  713|       |                            // after all interleaved components, that's an interleaved MCU,
  714|       |                            // so now count down the restart interval
  715|  5.98M|                            if (--z->todo <= 0) {
  ------------------
  |  Branch (715:33): [True: 0, False: 5.98M]
  ------------------
  716|      0|                                if (z->code_bits < 24) jpeg__grow_buffer_unsafe(z);
  ------------------
  |  Branch (716:37): [True: 0, False: 0]
  ------------------
  717|      0|                                if (!JPEG__RESTART(z->marker)) return 1;
  ------------------
  |  |  645|      0|#define JPEG__RESTART(x)     ((x) >= 0xd0 && (x) <= 0xd7)
  |  |  ------------------
  |  |  |  Branch (645:31): [True: 0, False: 0]
  |  |  |  Branch (645:46): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  718|      0|                                jpeg__jpeg_reset(z);
  719|      0|                            }
  720|  5.98M|                        }
  721|  20.8k|                    }
  722|    100|                    return 1;
  723|    104|                }
  724|    104|            }
  725|   184k|            else {
  726|   184k|                if (z->scan_n == 1) {
  ------------------
  |  Branch (726:21): [True: 178k, False: 6.13k]
  ------------------
  727|   178k|                    int i, j;
  728|   178k|                    int n = z->order[0];
  729|       |                    // non-interleaved data, we just need to process one block at a time,
  730|       |                    // in trivial scanline order
  731|       |                    // number of blocks to do just depends on how many actual "pixels" this
  732|       |                    // component has, independent of interleaved MCU blocking and such
  733|   178k|                    int w = (z->img_comp[n].x + 7) >> 3;
  734|   178k|                    int h = (z->img_comp[n].y + 7) >> 3;
  735|  27.0M|                    for (j = 0; j < h; ++j) {
  ------------------
  |  Branch (735:33): [True: 26.8M, False: 178k]
  ------------------
  736|   161M|                        for (i = 0; i < w; ++i) {
  ------------------
  |  Branch (736:37): [True: 134M, False: 26.8M]
  ------------------
  737|   134M|                            short* data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
  738|   134M|                            if (z->spec_start == 0) {
  ------------------
  |  Branch (738:33): [True: 0, False: 134M]
  ------------------
  739|      0|                                if (!jpeg__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
  ------------------
  |  Branch (739:37): [True: 0, False: 0]
  ------------------
  740|      0|                                    return 0;
  741|      0|                            }
  742|   134M|                            else {
  743|   134M|                                int ha = z->img_comp[n].ha;
  744|   134M|                                if (!jpeg__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->huff_ac[ha].fast_ac))
  ------------------
  |  Branch (744:37): [True: 112, False: 134M]
  ------------------
  745|    112|                                    return 0;
  746|   134M|                            }
  747|       |                            // every data block is an MCU, so countdown the restart interval
  748|   134M|                            if (--z->todo <= 0) {
  ------------------
  |  Branch (748:33): [True: 0, False: 134M]
  ------------------
  749|      0|                                if (z->code_bits < 24) jpeg__grow_buffer_unsafe(z);
  ------------------
  |  Branch (749:37): [True: 0, False: 0]
  ------------------
  750|      0|                                if (!JPEG__RESTART(z->marker)) return 1;
  ------------------
  |  |  645|      0|#define JPEG__RESTART(x)     ((x) >= 0xd0 && (x) <= 0xd7)
  |  |  ------------------
  |  |  |  Branch (645:31): [True: 0, False: 0]
  |  |  |  Branch (645:46): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  751|      0|                                jpeg__jpeg_reset(z);
  752|      0|                            }
  753|   134M|                        }
  754|  26.8M|                    }
  755|   178k|                    return 1;
  756|   178k|                }
  757|  6.13k|                else { // interleaved
  758|  6.13k|                    int i, j, k, x, y;
  759|  73.2k|                    for (j = 0; j < z->img_mcu_y; ++j) {
  ------------------
  |  Branch (759:33): [True: 67.1k, False: 6.13k]
  ------------------
  760|   537k|                        for (i = 0; i < z->img_mcu_x; ++i) {
  ------------------
  |  Branch (760:37): [True: 469k, False: 67.1k]
  ------------------
  761|       |                            // scan an interleaved mcu... process scan_n components in order
  762|  2.34M|                            for (k = 0; k < z->scan_n; ++k) {
  ------------------
  |  Branch (762:41): [True: 1.87M, False: 469k]
  ------------------
  763|  1.87M|                                int n = z->order[k];
  764|       |                                // scan out an mcu's worth of this component; that's just determined
  765|       |                                // by the basic H and V specified for the component
  766|  4.22M|                                for (y = 0; y < z->img_comp[n].v; ++y) {
  ------------------
  |  Branch (766:45): [True: 2.34M, False: 1.87M]
  ------------------
  767|  5.16M|                                    for (x = 0; x < z->img_comp[n].h; ++x) {
  ------------------
  |  Branch (767:49): [True: 2.81M, False: 2.34M]
  ------------------
  768|  2.81M|                                        int x2 = (i * z->img_comp[n].h + x);
  769|  2.81M|                                        int y2 = (j * z->img_comp[n].v + y);
  770|  2.81M|                                        short* data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
  771|  2.81M|                                        if (!jpeg__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
  ------------------
  |  Branch (771:45): [True: 0, False: 2.81M]
  ------------------
  772|      0|                                            return 0;
  773|  2.81M|                                    }
  774|  2.34M|                                }
  775|  1.87M|                            }
  776|       |                            // after all interleaved components, that's an interleaved MCU,
  777|       |                            // so now count down the restart interval
  778|   469k|                            if (--z->todo <= 0) {
  ------------------
  |  Branch (778:33): [True: 0, False: 469k]
  ------------------
  779|      0|                                if (z->code_bits < 24) jpeg__grow_buffer_unsafe(z);
  ------------------
  |  Branch (779:37): [True: 0, False: 0]
  ------------------
  780|      0|                                if (!JPEG__RESTART(z->marker)) return 1;
  ------------------
  |  |  645|      0|#define JPEG__RESTART(x)     ((x) >= 0xd0 && (x) <= 0xd7)
  |  |  ------------------
  |  |  |  Branch (645:31): [True: 0, False: 0]
  |  |  |  Branch (645:46): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  781|      0|                                jpeg__jpeg_reset(z);
  782|      0|                            }
  783|   469k|                        }
  784|  67.1k|                    }
  785|  6.13k|                    return 1;
  786|  6.13k|                }
  787|   184k|            }
  788|   184k|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL16jpeg__jpeg_resetEPNS0_11JpegContextE:
  650|   184k|        {
  651|   184k|            j->code_bits = 0;
  652|   184k|            j->code_buffer = 0;
  653|   184k|            j->nomore = 0;
  654|   184k|            j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;
  655|   184k|            j->marker = JPEG__MARKER_none;
  ------------------
  |  |  628|   184k|#define JPEG__MARKER_none  0xff
  ------------------
  656|   184k|            j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
  ------------------
  |  Branch (656:23): [True: 0, False: 184k]
  ------------------
  657|   184k|            j->eob_run = 0;
  658|       |            // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
  659|       |            // since we don't even allow 1<<30 pixels
  660|   184k|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL23jpeg__jpeg_decode_blockEPNS0_11JpegContextEPsPNS0_11JpegHuffmanES5_S3_iPt:
  308|  35.8M|        {
  309|  35.8M|            int diff, dc, k;
  310|  35.8M|            int t;
  311|       |
  312|  35.8M|            if (j->code_bits < 16) jpeg__grow_buffer_unsafe(j);
  ------------------
  |  Branch (312:17): [True: 5.95M, False: 29.9M]
  ------------------
  313|  35.8M|            t = jpeg__jpeg_huff_decode(j, hdc);
  314|  35.8M|            if (t < 0) return JpegLoadError("bad huffman code", "Corrupt JPEG");
  ------------------
  |  Branch (314:17): [True: 4, False: 35.8M]
  ------------------
  315|       |
  316|       |            // 0 all the ac values now so we can do it 32-bits at a time
  317|  35.8M|            memset(data, 0, 64 * sizeof(data[0]));
  318|       |
  319|  35.8M|            diff = t ? jpeg__extend_receive(j, t) : 0;
  ------------------
  |  Branch (319:20): [True: 2.54M, False: 33.3M]
  ------------------
  320|  35.8M|            dc = j->img_comp[b].dc_pred + diff;
  321|  35.8M|            j->img_comp[b].dc_pred = dc;
  322|  35.8M|            data[0] = (short)(dc * dequant[0]);
  323|       |
  324|       |            // decode AC components, see JPEG spec
  325|  35.8M|            k = 1;
  326|  1.45G|            do {
  327|  1.45G|                unsigned int zig;
  328|  1.45G|                int c, r, s;
  329|  1.45G|                if (j->code_bits < 16) jpeg__grow_buffer_unsafe(j);
  ------------------
  |  Branch (329:21): [True: 280M, False: 1.17G]
  ------------------
  330|  1.45G|                c = (j->code_buffer >> (32 - JpegFastBits)) & ((1 << JpegFastBits) - 1);
  331|  1.45G|                r = fac[c];
  332|  1.45G|                if (r) { // fast-AC path
  ------------------
  |  Branch (332:21): [True: 1.43G, False: 21.4M]
  ------------------
  333|  1.43G|                    k += (r >> 4) & 15; // run
  334|  1.43G|                    s = r & 15; // combined length
  335|  1.43G|                    j->code_buffer <<= s;
  336|  1.43G|                    j->code_bits -= s;
  337|       |                    // decode into unzigzag'd location
  338|  1.43G|                    zig = Base::JpegDeZigZag[k++];
  339|  1.43G|                    data[zig] = (short)((r >> 8) * dequant[zig]);
  340|  1.43G|                }
  341|  21.4M|                else {
  342|  21.4M|                    int rs = jpeg__jpeg_huff_decode(j, hac);
  343|  21.4M|                    if (rs < 0) return JpegLoadError("bad huffman code", "Corrupt JPEG");
  ------------------
  |  Branch (343:25): [True: 0, False: 21.4M]
  ------------------
  344|  21.4M|                    s = rs & 15;
  345|  21.4M|                    r = rs >> 4;
  346|  21.4M|                    if (s == 0) {
  ------------------
  |  Branch (346:25): [True: 13.7M, False: 7.70M]
  ------------------
  347|  13.7M|                        if (rs != 0xf0) break; // end block
  ------------------
  |  Branch (347:29): [True: 13.6M, False: 53.7k]
  ------------------
  348|  53.7k|                        k += 16;
  349|  53.7k|                    }
  350|  7.70M|                    else {
  351|  7.70M|                        k += r;
  352|       |                        // decode into unzigzag'd location
  353|  7.70M|                        zig = Base::JpegDeZigZag[k++];
  354|  7.70M|                        data[zig] = (short)(jpeg__extend_receive(j, s) * dequant[zig]);
  355|  7.70M|                    }
  356|  21.4M|                }
  357|  1.45G|            } while (k < 64);
  ------------------
  |  Branch (357:22): [True: 1.42G, False: 22.2M]
  ------------------
  358|  35.8M|            return 1;
  359|  35.8M|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL22jpeg__jpeg_huff_decodeEPNS0_11JpegContextEPKNS0_11JpegHuffmanE:
  216|   145M|        {
  217|   145M|            unsigned int temp;
  218|   145M|            int c, k;
  219|       |
  220|   145M|            if (j->code_bits < 16) jpeg__grow_buffer_unsafe(j);
  ------------------
  |  Branch (220:17): [True: 32.6M, False: 112M]
  ------------------
  221|       |
  222|       |            // look at the top FAST_BITS and determine what symbol ID it is,
  223|       |            // if the code is <= FAST_BITS
  224|   145M|            c = (j->code_buffer >> (32 - JpegFastBits)) & ((1 << JpegFastBits) - 1);
  225|   145M|            k = h->fast[c];
  226|   145M|            if (k < 255) {
  ------------------
  |  Branch (226:17): [True: 144M, False: 804k]
  ------------------
  227|   144M|                int s = h->size[k];
  228|   144M|                if (s > j->code_bits)
  ------------------
  |  Branch (228:21): [True: 0, False: 144M]
  ------------------
  229|      0|                    return -1;
  230|   144M|                j->code_buffer <<= s;
  231|   144M|                j->code_bits -= s;
  232|   144M|                return h->values[k];
  233|   144M|            }
  234|       |
  235|       |            // naive test is to shift the code_buffer down so k bits are
  236|       |            // valid, then test against maxcode. To speed this up, we've
  237|       |            // preshifted maxcode left so that it has (16-k) 0s at the
  238|       |            // end; in other words, regardless of the number of bits, it
  239|       |            // wants to be compared against something shifted to have 16;
  240|       |            // that way we don't need to shift inside the loop.
  241|   804k|            temp = j->code_buffer >> 16;
  242|   979k|            for (k = JpegFastBits + 1; ; ++k)
  243|  1.78M|                if (temp < h->maxcode[k])
  ------------------
  |  Branch (243:21): [True: 804k, False: 979k]
  ------------------
  244|   804k|                    break;
  245|   804k|            if (k == 17) {
  ------------------
  |  Branch (245:17): [True: 6, False: 804k]
  ------------------
  246|       |                // error! code not found
  247|      6|                j->code_bits -= 16;
  248|      6|                return -1;
  249|      6|            }
  250|       |
  251|   804k|            if (k > j->code_bits)
  ------------------
  |  Branch (251:17): [True: 0, False: 804k]
  ------------------
  252|      0|                return -1;
  253|       |
  254|       |            // convert the huffman code to the symbol id
  255|   804k|            c = ((j->code_buffer >> (32 - k)) & jpeg__bmask[k]) + h->delta[k];
  256|   804k|            assert((((j->code_buffer) >> (32 - h->size[c])) & jpeg__bmask[h->size[c]]) == h->code[c]);
  257|       |
  258|       |            // convert the id to a symbol
  259|   804k|            j->code_bits -= k;
  260|   804k|            j->code_buffer <<= k;
  261|   804k|            return h->values[c];
  262|   804k|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL20jpeg__extend_receiveEPNS0_11JpegContextEi:
  270|  10.2M|        {
  271|  10.2M|            unsigned int k;
  272|  10.2M|            int sgn;
  273|  10.2M|            if (j->code_bits < n) jpeg__grow_buffer_unsafe(j);
  ------------------
  |  Branch (273:17): [True: 62.6k, False: 10.1M]
  ------------------
  274|       |
  275|  10.2M|            sgn = (int32_t)j->code_buffer >> 31; // sign bit is always in MSB
  276|  10.2M|            k = jpeg_lrot(j->code_buffer, n);
  ------------------
  |  |   61|  10.2M|#define jpeg_lrot(x,y)  (((x) << (y)) | ((x) >> (32 - (y))))
  ------------------
  277|  10.2M|            if (n < 0 || n >= (int)(sizeof(jpeg__bmask) / sizeof(*jpeg__bmask))) return 0;
  ------------------
  |  Branch (277:17): [True: 0, False: 10.2M]
  |  Branch (277:26): [True: 36, False: 10.2M]
  ------------------
  278|  10.2M|            j->code_buffer = k & ~jpeg__bmask[n];
  279|  10.2M|            k &= jpeg__bmask[n];
  280|  10.2M|            j->code_bits -= n;
  281|  10.2M|            return k + (jpeg__jbias[n] & ~sgn);
  282|  10.2M|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL24jpeg__grow_buffer_unsafeEPNS0_11JpegContextE:
  193|   323M|        {
  194|   659M|            do {
  195|   659M|                unsigned int b = j->nomore ? 0 : j->stream->Get8u();
  ------------------
  |  Branch (195:34): [True: 100M, False: 558M]
  ------------------
  196|   659M|                if (b == 0xff) {
  ------------------
  |  Branch (196:21): [True: 377k, False: 658M]
  ------------------
  197|   377k|                    int c = j->stream->Get8u();
  198|   378k|                    while (c == 0xff) 
  ------------------
  |  Branch (198:28): [True: 13, False: 377k]
  ------------------
  199|     13|                        c = j->stream->Get8u(); // consume fill bytes
  200|   377k|                    if (c != 0) {
  ------------------
  |  Branch (200:25): [True: 184k, False: 193k]
  ------------------
  201|   184k|                        j->marker = (unsigned char)c;
  202|   184k|                        j->nomore = 1;
  203|   184k|                        return;
  204|   184k|                    }
  205|   377k|                }
  206|   659M|                j->code_buffer |= b << (24 - j->code_bits);
  207|   659M|                j->code_bits += 8;
  208|   659M|            } while (j->code_bits <= 24);
  ------------------
  |  Branch (208:22): [True: 335M, False: 323M]
  ------------------
  209|   323M|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL31jpeg__jpeg_decode_block_prog_dcEPNS0_11JpegContextEPsPNS0_11JpegHuffmanEi:
  362|  2.81M|        {
  363|  2.81M|            int diff, dc;
  364|  2.81M|            int t;
  365|  2.81M|            if (j->spec_end != 0) return JpegLoadError("can't merge dc and ac", "Corrupt JPEG");
  ------------------
  |  Branch (365:17): [True: 0, False: 2.81M]
  ------------------
  366|       |
  367|  2.81M|            if (j->code_bits < 16) jpeg__grow_buffer_unsafe(j);
  ------------------
  |  Branch (367:17): [True: 178k, False: 2.64M]
  ------------------
  368|       |
  369|  2.81M|            if (j->succ_high == 0) {
  ------------------
  |  Branch (369:17): [True: 0, False: 2.81M]
  ------------------
  370|       |                // first scan for DC coefficient, must be first
  371|      0|                memset(data, 0, 64 * sizeof(data[0])); // 0 all the ac values now
  372|      0|                t = jpeg__jpeg_huff_decode(j, hdc);
  373|      0|                if (t == -1) return JpegLoadError("can't merge dc and ac", "Corrupt JPEG");
  ------------------
  |  Branch (373:21): [True: 0, False: 0]
  ------------------
  374|      0|                diff = t ? jpeg__extend_receive(j, t) : 0;
  ------------------
  |  Branch (374:24): [True: 0, False: 0]
  ------------------
  375|       |
  376|      0|                dc = j->img_comp[b].dc_pred + diff;
  377|      0|                j->img_comp[b].dc_pred = dc;
  378|      0|                data[0] = (short)(dc << j->succ_low);
  379|      0|            }
  380|  2.81M|            else {
  381|       |                // refinement scan for DC coefficient
  382|  2.81M|                if (jpeg__jpeg_get_bit(j))
  ------------------
  |  Branch (382:21): [True: 7.21k, False: 2.81M]
  ------------------
  383|  7.21k|                    data[0] += (short)(1 << j->succ_low);
  384|  2.81M|            }
  385|  2.81M|            return 1;
  386|  2.81M|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL18jpeg__jpeg_get_bitEPNS0_11JpegContextE:
  297|   486M|        {
  298|   486M|            unsigned int k;
  299|   486M|            if (j->code_bits < 1) jpeg__grow_buffer_unsafe(j);
  ------------------
  |  Branch (299:17): [True: 4.45M, False: 481M]
  ------------------
  300|   486M|            k = j->code_buffer;
  301|   486M|            j->code_buffer <<= 1;
  302|   486M|            --j->code_bits;
  303|   486M|            return k & 0x80000000;
  304|   486M|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL31jpeg__jpeg_decode_block_prog_acEPNS0_11JpegContextEPsPNS0_11JpegHuffmanES3_:
  391|   134M|        {
  392|   134M|            int k;
  393|   134M|            if (j->spec_start == 0) return JpegLoadError("can't merge dc and ac", "Corrupt JPEG");
  ------------------
  |  Branch (393:17): [True: 0, False: 134M]
  ------------------
  394|       |
  395|   134M|            if (j->succ_high == 0) {
  ------------------
  |  Branch (395:17): [True: 71.1k, False: 134M]
  ------------------
  396|  71.1k|                int shift = j->succ_low;
  397|       |
  398|  71.1k|                if (j->eob_run) {
  ------------------
  |  Branch (398:21): [True: 19.7k, False: 51.4k]
  ------------------
  399|  19.7k|                    --j->eob_run;
  400|  19.7k|                    return 1;
  401|  19.7k|                }
  402|       |
  403|  51.4k|                k = j->spec_start;
  404|  51.5k|                do {
  405|  51.5k|                    unsigned int zig;
  406|  51.5k|                    int c, r, s;
  407|  51.5k|                    if (j->code_bits < 16) jpeg__grow_buffer_unsafe(j);
  ------------------
  |  Branch (407:25): [True: 9.80k, False: 41.7k]
  ------------------
  408|  51.5k|                    c = (j->code_buffer >> (32 - JpegFastBits)) & ((1 << JpegFastBits) - 1);
  409|  51.5k|                    r = fac[c];
  410|  51.5k|                    if (r) { // fast-AC path
  ------------------
  |  Branch (410:25): [True: 50.4k, False: 1.18k]
  ------------------
  411|  50.4k|                        k += (r >> 4) & 15; // run
  412|  50.4k|                        s = r & 15; // combined length
  413|  50.4k|                        j->code_buffer <<= s;
  414|  50.4k|                        j->code_bits -= s;
  415|  50.4k|                        zig = Base::JpegDeZigZag[k++];
  416|  50.4k|                        data[zig] = (short)((r >> 8) << shift);
  417|  50.4k|                    }
  418|  1.18k|                    else {
  419|  1.18k|                        int rs = jpeg__jpeg_huff_decode(j, hac);
  420|  1.18k|                        if (rs < 0) return JpegLoadError("bad huffman code", "Corrupt JPEG");
  ------------------
  |  Branch (420:29): [True: 0, False: 1.18k]
  ------------------
  421|  1.18k|                        s = rs & 15;
  422|  1.18k|                        r = rs >> 4;
  423|  1.18k|                        if (s == 0) {
  ------------------
  |  Branch (423:29): [True: 1.18k, False: 0]
  ------------------
  424|  1.18k|                            if (r < 15) {
  ------------------
  |  Branch (424:33): [True: 592, False: 588]
  ------------------
  425|    592|                                j->eob_run = (1 << r);
  426|    592|                                if (r)
  ------------------
  |  Branch (426:37): [True: 488, False: 104]
  ------------------
  427|    488|                                    j->eob_run += jpeg__jpeg_get_bits(j, r);
  428|    592|                                --j->eob_run;
  429|    592|                                break;
  430|    592|                            }
  431|    588|                            k += 16;
  432|    588|                        }
  433|      0|                        else {
  434|      0|                            k += r;
  435|      0|                            zig = Base::JpegDeZigZag[k++];
  436|      0|                            data[zig] = (short)(jpeg__extend_receive(j, s) << shift);
  437|      0|                        }
  438|  1.18k|                    }
  439|  51.5k|                } while (k <= j->spec_end);
  ------------------
  |  Branch (439:26): [True: 140, False: 50.8k]
  ------------------
  440|  51.4k|            }
  441|   134M|            else {
  442|       |                // refinement scan for these AC coefficients
  443|       |
  444|   134M|                short bit = (short)(1 << j->succ_low);
  445|       |
  446|   134M|                if (j->eob_run) {
  ------------------
  |  Branch (446:21): [True: 46.9M, False: 87.6M]
  ------------------
  447|  46.9M|                    --j->eob_run;
  448|   187M|                    for (k = j->spec_start; k <= j->spec_end; ++k) {
  ------------------
  |  Branch (448:45): [True: 140M, False: 46.9M]
  ------------------
  449|   140M|                        short* p = &data[Base::JpegDeZigZag[k]];
  450|   140M|                        if (*p != 0)
  ------------------
  |  Branch (450:29): [True: 139M, False: 1.92M]
  ------------------
  451|   139M|                            if (jpeg__jpeg_get_bit(j))
  ------------------
  |  Branch (451:33): [True: 7.84M, False: 131M]
  ------------------
  452|  7.84M|                                if ((*p & bit) == 0) {
  ------------------
  |  Branch (452:37): [True: 5.01k, False: 7.84M]
  ------------------
  453|  5.01k|                                    if (*p > 0)
  ------------------
  |  Branch (453:41): [True: 1.79k, False: 3.22k]
  ------------------
  454|  1.79k|                                        *p += bit;
  455|  3.22k|                                    else
  456|  3.22k|                                        *p -= bit;
  457|  5.01k|                                }
  458|   140M|                    }
  459|  46.9M|                }
  460|  87.6M|                else {
  461|  87.6M|                    k = j->spec_start;
  462|  87.6M|                    do {
  463|  87.6M|                        int r, s;
  464|  87.6M|                        int rs = jpeg__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
  465|  87.6M|                        if (rs < 0) return JpegLoadError("bad huffman code", "Corrupt JPEG");
  ------------------
  |  Branch (465:29): [True: 2, False: 87.6M]
  ------------------
  466|  87.6M|                        s = rs & 15;
  467|  87.6M|                        r = rs >> 4;
  468|  87.6M|                        if (s == 0) {
  ------------------
  |  Branch (468:29): [True: 623k, False: 87.0M]
  ------------------
  469|   623k|                            if (r < 15) {
  ------------------
  |  Branch (469:33): [True: 437k, False: 185k]
  ------------------
  470|   437k|                                j->eob_run = (1 << r) - 1;
  471|   437k|                                if (r)
  ------------------
  |  Branch (471:37): [True: 366k, False: 71.1k]
  ------------------
  472|   366k|                                    j->eob_run += jpeg__jpeg_get_bits(j, r);
  473|   437k|                                r = 64; // force end of block
  474|   437k|                            }
  475|   185k|                            else {
  476|       |                                // r=15 s=0 should write 16 0s, so we just do
  477|       |                                // a run of 15 0s and then write s (which is 0),
  478|       |                                // so we don't have to do anything special here
  479|   185k|                            }
  480|   623k|                        }
  481|  87.0M|                        else {
  482|  87.0M|                            if (s != 1) return JpegLoadError("bad huffman code", "Corrupt JPEG");
  ------------------
  |  Branch (482:33): [True: 110, False: 87.0M]
  ------------------
  483|       |                            // sign bit
  484|  87.0M|                            if (jpeg__jpeg_get_bit(j))
  ------------------
  |  Branch (484:33): [True: 162k, False: 86.9M]
  ------------------
  485|   162k|                                s = bit;
  486|  86.9M|                            else
  487|  86.9M|                                s = -bit;
  488|  87.0M|                        }
  489|       |
  490|       |                        // advance by r
  491|   350M|                        while (k <= j->spec_end) {
  ------------------
  |  Branch (491:32): [True: 263M, False: 87.6M]
  ------------------
  492|   263M|                            short* p = &data[Base::JpegDeZigZag[k++]];
  493|   263M|                            if (*p != 0) {
  ------------------
  |  Branch (493:33): [True: 257M, False: 5.86M]
  ------------------
  494|   257M|                                if (jpeg__jpeg_get_bit(j))
  ------------------
  |  Branch (494:37): [True: 980k, False: 256M]
  ------------------
  495|   980k|                                    if ((*p & bit) == 0) {
  ------------------
  |  Branch (495:41): [True: 3.18k, False: 977k]
  ------------------
  496|  3.18k|                                        if (*p > 0)
  ------------------
  |  Branch (496:45): [True: 2.00k, False: 1.17k]
  ------------------
  497|  2.00k|                                            *p += bit;
  498|  1.17k|                                        else
  499|  1.17k|                                            *p -= bit;
  500|  3.18k|                                    }
  501|   257M|                            }
  502|  5.86M|                            else {
  503|  5.86M|                                if (r == 0) {
  ------------------
  |  Branch (503:37): [True: 7.96k, False: 5.86M]
  ------------------
  504|  7.96k|                                    *p = (short)s;
  505|  7.96k|                                    break;
  506|  7.96k|                                }
  507|  5.86M|                                --r;
  508|  5.86M|                            }
  509|   263M|                        }
  510|  87.6M|                    } while (k <= j->spec_end);
  ------------------
  |  Branch (510:30): [True: 6.42k, False: 87.6M]
  ------------------
  511|  87.6M|                }
  512|   134M|            }
  513|   134M|            return 1;
  514|   134M|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL19jpeg__jpeg_get_bitsEPNS0_11JpegContextEi:
  286|   367k|        {
  287|   367k|            unsigned int k;
  288|   367k|            if (j->code_bits < n) jpeg__grow_buffer_unsafe(j);
  ------------------
  |  Branch (288:17): [True: 1.26k, False: 365k]
  ------------------
  289|   367k|            k = jpeg_lrot(j->code_buffer, n);
  ------------------
  |  |   61|   367k|#define jpeg_lrot(x,y)  (((x) << (y)) | ((x) >> (32 - (y))))
  ------------------
  290|   367k|            j->code_buffer = k & ~jpeg__bmask[n];
  291|   367k|            k &= jpeg__bmask[n];
  292|   367k|            j->code_bits -= n;
  293|   367k|            return k;
  294|   367k|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL20jpeg__process_markerEPNS0_11JpegContextEi:
  817|  3.05M|        {
  818|  3.05M|            int L;
  819|  3.05M|            switch (m) {
  ------------------
  |  Branch (819:21): [True: 2.77M, False: 284k]
  ------------------
  820|    212|            case JPEG__MARKER_none: // no marker found
  ------------------
  |  |  628|    212|#define JPEG__MARKER_none  0xff
  ------------------
  |  Branch (820:13): [True: 212, False: 3.05M]
  ------------------
  821|    212|                return JpegLoadError("expected marker", "Corrupt JPEG");
  822|       |
  823|   176k|            case 0xDD: // DRI - specify restart interval
  ------------------
  |  Branch (823:13): [True: 176k, False: 2.88M]
  ------------------
  824|   176k|                if (z->stream->GetBe16u() != 4) return JpegLoadError("bad DRI len", "Corrupt JPEG");
  ------------------
  |  Branch (824:21): [True: 8, False: 176k]
  ------------------
  825|   176k|                z->restart_interval = z->stream->GetBe16u();
  826|   176k|                return 1;
  827|       |
  828|  1.14k|            case 0xDB: // DQT - define quantization table
  ------------------
  |  Branch (828:13): [True: 1.14k, False: 3.05M]
  ------------------
  829|  1.14k|                L = z->stream->GetBe16u() - 2;
  830|  4.48k|                while (L > 0) {
  ------------------
  |  Branch (830:24): [True: 3.37k, False: 1.11k]
  ------------------
  831|  3.37k|                    int q = z->stream->Get8u();
  832|  3.37k|                    int p = q >> 4, sixteen = (p != 0);
  833|  3.37k|                    int t = q & 15, i;
  834|  3.37k|                    if (p != 0 && p != 1) return JpegLoadError("bad DQT type", "Corrupt JPEG");
  ------------------
  |  Branch (834:25): [True: 2.25k, False: 1.12k]
  |  Branch (834:35): [True: 28, False: 2.22k]
  ------------------
  835|  3.34k|                    if (t > 3) return JpegLoadError("bad DQT table", "Corrupt JPEG");
  ------------------
  |  Branch (835:25): [True: 0, False: 3.34k]
  ------------------
  836|       |
  837|   217k|                    for (i = 0; i < 64; ++i)
  ------------------
  |  Branch (837:33): [True: 214k, False: 3.34k]
  ------------------
  838|   214k|                        z->dequant[t][Base::JpegDeZigZag[i]] = (uint16_t)(sixteen ? z->stream->GetBe16u() : z->stream->Get8u());
  ------------------
  |  Branch (838:75): [True: 142k, False: 71.9k]
  ------------------
  839|  3.34k|                    L -= (sixteen ? 129 : 65);
  ------------------
  |  Branch (839:27): [True: 2.22k, False: 1.12k]
  ------------------
  840|  3.34k|                }
  841|  1.11k|                return L == 0;
  842|       |
  843|   106k|            case 0xC4: // DHT - define huffman table
  ------------------
  |  Branch (843:13): [True: 106k, False: 2.95M]
  ------------------
  844|   106k|                L = z->stream->GetBe16u() - 2;
  845|   213k|                while (L > 0) {
  ------------------
  |  Branch (845:24): [True: 106k, False: 106k]
  ------------------
  846|   106k|                    uint8_t* v;
  847|   106k|                    int sizes[16], i, n = 0;
  848|   106k|                    int q = z->stream->Get8u();
  849|   106k|                    int tc = q >> 4;
  850|   106k|                    int th = q & 15;
  851|   106k|                    if (tc > 1 || th > 3) return JpegLoadError("bad DHT header", "Corrupt JPEG");
  ------------------
  |  Branch (851:25): [True: 4, False: 106k]
  |  Branch (851:35): [True: 0, False: 106k]
  ------------------
  852|  1.81M|                    for (i = 0; i < 16; ++i) 
  ------------------
  |  Branch (852:33): [True: 1.71M, False: 106k]
  ------------------
  853|  1.71M|                    {
  854|  1.71M|                        sizes[i] = z->stream->Get8u();
  855|  1.71M|                        n += sizes[i];
  856|  1.71M|                    }
  857|   106k|                    L -= 17;
  858|   106k|                    if (tc == 0) 
  ------------------
  |  Branch (858:25): [True: 11.2k, False: 95.7k]
  ------------------
  859|  11.2k|                    {
  860|  11.2k|                        if (!z->huff_dc[th].Build(sizes)) 
  ------------------
  |  Branch (860:29): [True: 24, False: 11.2k]
  ------------------
  861|     24|                            return 0;
  862|  11.2k|                        v = z->huff_dc[th].values;
  863|  11.2k|                    }
  864|  95.7k|                    else 
  865|  95.7k|                    {
  866|  95.7k|                        if (!z->huff_ac[th].Build(sizes)) 
  ------------------
  |  Branch (866:29): [True: 70, False: 95.6k]
  ------------------
  867|     70|                            return 0;
  868|  95.6k|                        v = z->huff_ac[th].values;
  869|  95.6k|                    }
  870|  3.84M|                    for (i = 0; i < n; ++i)
  ------------------
  |  Branch (870:33): [True: 3.73M, False: 106k]
  ------------------
  871|  3.73M|                        v[i] = z->stream->Get8u();
  872|   106k|                    if (tc != 0)
  ------------------
  |  Branch (872:25): [True: 95.6k, False: 11.2k]
  ------------------
  873|  95.6k|                        z->huff_ac[th].BuildFastAc();
  874|   106k|                    L -= n;
  875|   106k|                }
  876|   106k|                return L == 0;
  877|  3.05M|            }
  878|       |
  879|       |            // check for comment block or APP blocks
  880|  2.77M|            if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
  ------------------
  |  Branch (880:18): [True: 2.77M, False: 122]
  |  Branch (880:31): [True: 2.77M, False: 240]
  |  Branch (880:45): [True: 224, False: 138]
  ------------------
  881|  2.77M|                L = z->stream->GetBe16u();
  882|  2.77M|                if (L < 2) {
  ------------------
  |  Branch (882:21): [True: 24, False: 2.77M]
  ------------------
  883|     24|                    if (m == 0xFE)
  ------------------
  |  Branch (883:25): [True: 0, False: 24]
  ------------------
  884|      0|                        return JpegLoadError("bad COM len", "Corrupt JPEG");
  885|     24|                    else
  886|     24|                        return JpegLoadError("bad APP len", "Corrupt JPEG");
  887|     24|                }
  888|  2.77M|                L -= 2;
  889|       |
  890|  2.77M|                if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
  ------------------
  |  Branch (890:21): [True: 599k, False: 2.17M]
  |  Branch (890:34): [True: 599k, False: 96]
  ------------------
  891|   599k|                    static const unsigned char tag[5] = { 'J','F','I','F','\0' };
  892|   599k|                    int ok = 1;
  893|   599k|                    int i;
  894|  3.59M|                    for (i = 0; i < 5; ++i)
  ------------------
  |  Branch (894:33): [True: 2.99M, False: 599k]
  ------------------
  895|  2.99M|                        if (z->stream->Get8u() != tag[i])
  ------------------
  |  Branch (895:29): [True: 1.62M, False: 1.36M]
  ------------------
  896|  1.62M|                            ok = 0;
  897|   599k|                    L -= 5;
  898|   599k|                    if (ok)
  ------------------
  |  Branch (898:25): [True: 13.8k, False: 585k]
  ------------------
  899|  13.8k|                        z->jfif = 1;
  900|   599k|                }
  901|  2.17M|                else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
  ------------------
  |  Branch (901:26): [True: 2.17M, False: 1.73k]
  |  Branch (901:39): [True: 2.17M, False: 172]
  ------------------
  902|  2.17M|                    static const unsigned char tag[6] = { 'A','d','o','b','e','\0' };
  903|  2.17M|                    int ok = 1;
  904|  2.17M|                    int i;
  905|  15.2M|                    for (i = 0; i < 6; ++i)
  ------------------
  |  Branch (905:33): [True: 13.0M, False: 2.17M]
  ------------------
  906|  13.0M|                        if (z->stream->Get8u() != tag[i])
  ------------------
  |  Branch (906:29): [True: 601k, False: 12.4M]
  ------------------
  907|   601k|                            ok = 0;
  908|  2.17M|                    L -= 6;
  909|  2.17M|                    if (ok) {
  ------------------
  |  Branch (909:25): [True: 1.99M, False: 178k]
  ------------------
  910|  1.99M|                        z->stream->Get8u(); // version
  911|  1.99M|                        z->stream->GetBe16u(); // flags0
  912|  1.99M|                        z->stream->GetBe16u(); // flags1
  913|  1.99M|                        z->app14_color_transform = z->stream->Get8u(); // color transform
  914|  1.99M|                        L -= 6;
  915|  1.99M|                    }
  916|  2.17M|                }
  917|       |
  918|  2.77M|                if (L > 0)
  ------------------
  |  Branch (918:21): [True: 2.23M, False: 542k]
  ------------------
  919|  2.23M|                    z->stream->Skip(L);
  920|       |
  921|  2.77M|                return 1;
  922|  2.77M|            }
  923|       |
  924|    138|            return JpegLoadError("unknown marker", "Corrupt JPEG");
  925|  2.77M|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL17jpeg__jpeg_finishEPNS0_11JpegContextE:
  798|    160|        {
  799|    160|            if (z->progressive) {
  ------------------
  |  Branch (799:17): [True: 160, False: 0]
  ------------------
  800|       |                // dequantize and idct the data
  801|    160|                int i, j, n;
  802|    660|                for (n = 0; n < z->img_n; ++n) {
  ------------------
  |  Branch (802:29): [True: 500, False: 160]
  ------------------
  803|    500|                    int w = (z->img_comp[n].x + 7) >> 3;
  804|    500|                    int h = (z->img_comp[n].y + 7) >> 3;
  805|  4.32k|                    for (j = 0; j < h; ++j) {
  ------------------
  |  Branch (805:33): [True: 3.82k, False: 500]
  ------------------
  806|   196k|                        for (i = 0; i < w; ++i) {
  ------------------
  |  Branch (806:37): [True: 192k, False: 3.82k]
  ------------------
  807|   192k|                            short* data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
  808|   192k|                            jpeg__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
  809|   192k|                            z->idct_block_kernel(z->img_comp[n].data + z->img_comp[n].w2 * j * 8 + i * 8, z->img_comp[n].w2, data);
  810|   192k|                        }
  811|  3.82k|                    }
  812|    500|                }
  813|    160|            }
  814|    160|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL21jpeg__jpeg_dequantizeEPsPt:
  791|   192k|        {
  792|   192k|            int i;
  793|  12.5M|            for (i = 0; i < 64; ++i)
  ------------------
  |  Branch (793:25): [True: 12.3M, False: 192k]
  ------------------
  794|  12.3M|                data[i] *= dequant[i];
  795|   192k|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL14resample_row_1EPhS1_S1_ii:
 1133|  4.15k|        {
 1134|  4.15k|            JPEG_NOTUSED(out);
  ------------------
  |  |   56|  4.15k|#define JPEG_NOTUSED(v)  (void)sizeof(v)
  ------------------
 1135|  4.15k|            JPEG_NOTUSED(in_far);
  ------------------
  |  |   56|  4.15k|#define JPEG_NOTUSED(v)  (void)sizeof(v)
  ------------------
 1136|  4.15k|            JPEG_NOTUSED(w);
  ------------------
  |  |   56|  4.15k|#define JPEG_NOTUSED(v)  (void)sizeof(v)
  ------------------
 1137|  4.15k|            JPEG_NOTUSED(hs);
  ------------------
  |  |   56|  4.15k|#define JPEG_NOTUSED(v)  (void)sizeof(v)
  ------------------
 1138|  4.15k|            return in_near;
 1139|  4.15k|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL22jpeg__resample_row_v_2EPhS1_S1_ii:
 1142|  3.16k|        {
 1143|       |            // need to generate two samples vertically for every one in input
 1144|  3.16k|            int i;
 1145|  3.16k|            JPEG_NOTUSED(hs);
  ------------------
  |  |   56|  3.16k|#define JPEG_NOTUSED(v)  (void)sizeof(v)
  ------------------
 1146|   312k|            for (i = 0; i < w; ++i)
  ------------------
  |  Branch (1146:25): [True: 309k, False: 3.16k]
  ------------------
 1147|   309k|                out[i] = jpeg__div4(3 * in_near[i] + in_far[i] + 2);
  ------------------
  |  | 1130|   309k|#define jpeg__div4(x) ((uint8_t) ((x) >> 2))
  ------------------
 1148|  3.16k|            return out;
 1149|  3.16k|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL22jpeg__resample_row_h_2EPhS1_S1_ii:
 1152|  14.7k|        {
 1153|       |            // need to generate two samples horizontally for every one in input
 1154|  14.7k|            int i;
 1155|  14.7k|            uint8_t* input = in_near;
 1156|       |
 1157|  14.7k|            if (w == 1) {
  ------------------
  |  Branch (1157:17): [True: 0, False: 14.7k]
  ------------------
 1158|       |                // if only one sample, can't do any interpolation
 1159|      0|                out[0] = out[1] = input[0];
 1160|      0|                return out;
 1161|      0|            }
 1162|       |
 1163|  14.7k|            out[0] = input[0];
 1164|  14.7k|            out[1] = jpeg__div4(input[0] * 3 + input[1] + 2);
  ------------------
  |  | 1130|  14.7k|#define jpeg__div4(x) ((uint8_t) ((x) >> 2))
  ------------------
 1165|  4.56M|            for (i = 1; i < w - 1; ++i) {
  ------------------
  |  Branch (1165:25): [True: 4.54M, False: 14.7k]
  ------------------
 1166|  4.54M|                int n = 3 * input[i] + 2;
 1167|  4.54M|                out[i * 2 + 0] = jpeg__div4(n + input[i - 1]);
  ------------------
  |  | 1130|  4.54M|#define jpeg__div4(x) ((uint8_t) ((x) >> 2))
  ------------------
 1168|  4.54M|                out[i * 2 + 1] = jpeg__div4(n + input[i + 1]);
  ------------------
  |  | 1130|  4.54M|#define jpeg__div4(x) ((uint8_t) ((x) >> 2))
  ------------------
 1169|  4.54M|            }
 1170|  14.7k|            out[i * 2 + 0] = jpeg__div4(input[w - 2] * 3 + input[w - 1] + 2);
  ------------------
  |  | 1130|  14.7k|#define jpeg__div4(x) ((uint8_t) ((x) >> 2))
  ------------------
 1171|  14.7k|            out[i * 2 + 1] = input[w - 1];
 1172|       |
 1173|  14.7k|            JPEG_NOTUSED(in_far);
  ------------------
  |  |   56|  14.7k|#define JPEG_NOTUSED(v)  (void)sizeof(v)
  ------------------
 1174|  14.7k|            JPEG_NOTUSED(hs);
  ------------------
  |  |   56|  14.7k|#define JPEG_NOTUSED(v)  (void)sizeof(v)
  ------------------
 1175|       |
 1176|  14.7k|            return out;
 1177|  14.7k|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL26jpeg__resample_row_genericEPhS1_S1_ii:
 1206|  6.38k|        {
 1207|       |            // resample with nearest-neighbor
 1208|  6.38k|            int i, j;
 1209|  6.38k|            JPEG_NOTUSED(in_far);
  ------------------
  |  |   56|  6.38k|#define JPEG_NOTUSED(v)  (void)sizeof(v)
  ------------------
 1210|  2.69M|            for (i = 0; i < w; ++i)
  ------------------
  |  Branch (1210:25): [True: 2.68M, False: 6.38k]
  ------------------
 1211|  5.37M|                for (j = 0; j < hs; ++j)
  ------------------
  |  Branch (1211:29): [True: 2.68M, False: 2.68M]
  ------------------
 1212|  2.68M|                    out[i * hs + j] = in_near[i];
 1213|  6.38k|            return out;
 1214|  6.38k|        }
SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL15jpeg__blinn_8x8Ehh:
 1264|   929k|        {
 1265|   929k|            unsigned int t = x * y + 128;
 1266|   929k|            return (uint8_t)((t + (t >> 8)) >> 8);
 1267|   929k|        }

_ZN4Simd4Base14ImagePngLoaderC2ERKNS_16ImageLoaderParamE:
  791|    168|        {
  792|    168|            if (_param.format == SimdPixelFormatNone)
  ------------------
  |  Branch (792:17): [True: 0, False: 168]
  ------------------
  793|      0|                _param.format = SimdPixelFormatRgba32;
  794|    168|            _decodeLine[0] = Base::DecodeLine0;
  795|    168|            _decodeLine[1] = Base::DecodeLine1;
  796|    168|            _decodeLine[2] = Base::DecodeLine2;
  797|    168|            _decodeLine[3] = Base::DecodeLine3;
  798|    168|            _decodeLine[4] = Base::DecodeLine4;
  799|    168|            _decodeLine[5] = Base::DecodeLine5;
  800|    168|            _decodeLine[6] = Base::DecodeLine6;
  801|    168|            _expandPalette = Base::ExpandPalette;
  802|    168|        }

_ZN4Simd4Base12PxmPrintInitEv:
  132|      2|        {
  133|    514|            for (int i = 0; i < 256; ++i)
  ------------------
  |  Branch (133:29): [True: 512, False: 2]
  ------------------
  134|    512|            {
  135|    512|                int d0 = i / 100;
  136|    512|                int d1 = (i / 10) % 10;
  137|    512|                int d2 = i % 10;
  138|    512|                g_pxmPrint[i][0] = d0 ? '0' + d0 : ' ';
  ------------------
  |  Branch (138:36): [True: 312, False: 200]
  ------------------
  139|    512|                g_pxmPrint[i][1] = (d1 || d0) ? '0' + d1 : ' ';
  ------------------
  |  Branch (139:37): [True: 452, False: 60]
  |  Branch (139:43): [True: 40, False: 20]
  ------------------
  140|    512|                g_pxmPrint[i][2] = '0' + d2;
  141|    512|                g_pxmPrint[i][3] = ' ';
  142|    512|            }
  143|      2|            return true;
  144|      2|        }

_ZN4Simd4Base21JpegCalcBitsTableInitEv:
   95|      2|        {
   96|  8.19k|            for (int i = 0, n = JpegCalcBitsRange * 2; i < n; ++i)
  ------------------
  |  Branch (96:56): [True: 8.19k, False: 2]
  ------------------
   97|  8.19k|            {
   98|  8.19k|                int val = i - JpegCalcBitsRange;
   99|  8.19k|                int tmp = val < 0 ? -val : val;
  ------------------
  |  Branch (99:27): [True: 4.09k, False: 4.09k]
  ------------------
  100|  8.19k|                val = val < 0 ? val - 1 : val;
  ------------------
  |  Branch (100:23): [True: 4.09k, False: 4.09k]
  ------------------
  101|  8.19k|                int cnt = 1;
  102|  81.9k|                while (tmp >>= 1)
  ------------------
  |  Branch (102:24): [True: 73.7k, False: 8.19k]
  ------------------
  103|  73.7k|                    ++cnt;
  104|  8.19k|                JpegCalcBitsTable[i][0] = val & ((1 << cnt) - 1);
  105|  8.19k|                JpegCalcBitsTable[i][1] = cnt;
  106|  8.19k|            }
  107|      2|            return true;
  108|      2|        }

SimdBaseImageSavePng.cpp:_ZN4Simd4BaseL19ZlibBitRevTableInitEv:
   42|      2|        {
   43|  1.02k|            for (int i = 0; i < 512; i++)
  ------------------
  |  Branch (43:29): [True: 1.02k, False: 2]
  ------------------
   44|  1.02k|            {
   45|  1.02k|                int rev = 0, val = i;
   46|  10.2k|                for (size_t b = 0; b < 9; b++)
  ------------------
  |  Branch (46:36): [True: 9.21k, False: 1.02k]
  ------------------
   47|  9.21k|                {
   48|  9.21k|                    rev = (rev << 1) | (val & 1);
   49|  9.21k|                    val >>= 1;
   50|  9.21k|                }
   51|  1.02k|                ZlibBitRevTable[i] = rev;
   52|  1.02k|            }
   53|      2|            return true;
   54|      2|        }

_ZN4Simd4Base9BgrToGrayEiii:
   37|  3.49M|        {
   38|  3.49M|            return (BLUE_TO_GRAY_WEIGHT*blue + GREEN_TO_GRAY_WEIGHT * green +
   39|  3.49M|                RED_TO_GRAY_WEIGHT * red + BGR_TO_GRAY_ROUND_TERM) >> BGR_TO_GRAY_AVERAGING_SHIFT;
   40|  3.49M|        }
_ZN4Simd4Avx29GrayToBgrILi0EEEDv4_xS2_:
  401|   362k|        {
  402|   362k|            return _mm256_shuffle_epi8(_mm256_permute4x64_epi64(gray, 0x44), K8_SHUFFLE_GRAY_TO_BGR0);
  403|   362k|        }
_ZN4Simd4Avx29GrayToBgrILi1EEEDv4_xS2_:
  406|   362k|        {
  407|   362k|            return _mm256_shuffle_epi8(_mm256_permute4x64_epi64(gray, 0x99), K8_SHUFFLE_GRAY_TO_BGR1);
  408|   362k|        }
_ZN4Simd4Avx29GrayToBgrILi2EEEDv4_xS2_:
  411|   362k|        {
  412|   362k|            return _mm256_shuffle_epi8(_mm256_permute4x64_epi64(gray, 0xEE), K8_SHUFFLE_GRAY_TO_BGR2);
  413|   362k|        }
_ZN4Simd4Avx29BgrToBgraILb0EEEDv4_xRKS2_S4_:
  448|   246k|        {
  449|   246k|            return _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(bgr, 0x94), K8_BGR_TO_BGRA_SHUFFLE), alpha);
  450|   246k|        }
_ZN4Simd4Avx29BgrToBgraILb1EEEDv4_xRKS2_S4_:
  453|  82.1k|        {
  454|  82.1k|            return _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(bgr, 0xE9), K8_BGR_TO_BGRA_SHUFFLE), alpha);
  455|  82.1k|        }
_ZN4Simd4Avx29RgbToBgraILb0EEEDv4_xRKS2_S4_:
  460|   246k|        {
  461|   246k|            return _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(rgb, 0x94), K8_RGB_TO_BGRA_SHUFFLE), alpha);
  462|   246k|        }
_ZN4Simd4Avx29RgbToBgraILb1EEEDv4_xRKS2_S4_:
  465|  82.1k|        {
  466|  82.1k|            return _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(rgb, 0xE9), K8_RGB_TO_BGRA_SHUFFLE), alpha);
  467|  82.1k|        }

_ZN4Simd4Base10AlgCacheL1Ev:
  131|    537|        {
  132|    537|            return Cpu::L1_CACHE_SIZE;
  133|    537|        }

_ZN4Simd5Sse415EmptyEv:
   35|  2.40k|        {
   36|       |#if defined(_MSC_VER) && defined(SIMD_X64_ENABLE)
   37|       |#else
   38|  2.40k|            _mm_empty();
   39|  2.40k|#endif
   40|  2.40k|        }
_ZN4Simd5Sse4111EmptyCallerD2Ev:
   45|  2.40k|            {
   46|  2.40k|                if (Enable)
  ------------------
  |  Branch (46:21): [True: 2.40k, False: 0]
  ------------------
   47|  2.40k|                    Empty();
   48|  2.40k|            }

_ZN4Simd11ImageLoaderC2ERKNS_16ImageLoaderParamE:
   68|  2.39k|        {
   69|  2.39k|        }
_ZN4Simd11ImageLoaderD2Ev:
   72|  2.39k|        {
   73|  2.39k|        }
_ZN4Simd11ImageLoader7ReleaseEPmS1_S1_P19SimdPixelFormatType:
   78|    336|        {
   79|    336|            *stride = _image.stride;
   80|    336|            *width = _image.width;
   81|    336|            *height = _image.height;
   82|    336|            *format = (SimdPixelFormatType)_image.format;
   83|    336|            return _image.Release();
   84|    336|        }

SimdBaseImageLoadJpeg.cpp:_ZN4Simd4BaseL13JpegLoadErrorEPKcS2_:
   56|  1.16k|        {
   57|  1.16k|            std::cout << "JPEG load error: " << text << ", " << type << "!" << std::endl;
   58|  1.16k|            return 0;
   59|  1.16k|        }

SimdFree:
  172|    336|{
  173|    336|    Free(ptr);
  174|    336|}
SimdAlignment:
  182|    876|{
  183|    876|    return Simd::ALIGNMENT;
  184|    876|}
SimdImageLoadFromMemory:
 2770|  2.40k|{
 2771|  2.40k|    SIMD_EMPTY();
  ------------------
  |  |   55|  2.40k|#define SIMD_EMPTY() Simd::Sse41::EmptyCaller emptyCaller;
  ------------------
 2772|  2.40k|    const static Simd::ImageLoadFromMemoryPtr imageLoadFromMemory = SIMD_FUNC4(ImageLoadFromMemory, SIMD_AVX512BW_FUNC, SIMD_AVX2_FUNC, SIMD_SSE41_FUNC, SIMD_NEON_FUNC);
  ------------------
  |  |  132|  2.40k|#define SIMD_FUNC4(func, EXT1, EXT2, EXT3, EXT4) EXT1(func) EXT2(func) EXT3(func) EXT4(func) SIMD_BASE_FUNC(func)
  |  |  ------------------
  |  |  |  | 2772|  2.40k|    const static Simd::ImageLoadFromMemoryPtr imageLoadFromMemory = SIMD_FUNC4(ImageLoadFromMemory, SIMD_AVX512BW_FUNC, SIMD_AVX2_FUNC, SIMD_SSE41_FUNC, SIMD_NEON_FUNC);
  |  |  |  |  ------------------
  |  |  |  |  |  |  105|  2.40k|#define SIMD_AVX512BW_FUNC(func) Simd::Avx512bw::Enable ? Simd::Avx512bw::func : 
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (105:34): [True: 0, False: 2.40k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |               #define SIMD_FUNC4(func, EXT1, EXT2, EXT3, EXT4) EXT1(func) EXT2(func) EXT3(func) EXT4(func) SIMD_BASE_FUNC(func)
  |  |  ------------------
  |  |  |  | 2772|  2.40k|    const static Simd::ImageLoadFromMemoryPtr imageLoadFromMemory = SIMD_FUNC4(ImageLoadFromMemory, SIMD_AVX512BW_FUNC, SIMD_AVX2_FUNC, SIMD_SSE41_FUNC, SIMD_NEON_FUNC);
  |  |  |  |  ------------------
  |  |  |  |  |  |   99|  2.40k|#define SIMD_AVX2_FUNC(func) Simd::Avx2::Enable ? Simd::Avx2::func : 
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (99:30): [True: 1, False: 2.40k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |               #define SIMD_FUNC4(func, EXT1, EXT2, EXT3, EXT4) EXT1(func) EXT2(func) EXT3(func) EXT4(func) SIMD_BASE_FUNC(func)
  |  |  ------------------
  |  |  |  | 2772|  2.40k|    const static Simd::ImageLoadFromMemoryPtr imageLoadFromMemory = SIMD_FUNC4(ImageLoadFromMemory, SIMD_AVX512BW_FUNC, SIMD_AVX2_FUNC, SIMD_SSE41_FUNC, SIMD_NEON_FUNC);
  |  |  |  |  ------------------
  |  |  |  |  |  |   93|  2.40k|#define SIMD_SSE41_FUNC(func) Simd::Sse41::Enable ? Simd::Sse41::func : 
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (93:31): [True: 0, False: 2.40k]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  |  |               #define SIMD_FUNC4(func, EXT1, EXT2, EXT3, EXT4) EXT1(func) EXT2(func) EXT3(func) EXT4(func) SIMD_BASE_FUNC(func)
  |  |  ------------------
  |  |  |  |   90|  4.80k|#define SIMD_BASE_FUNC(func) Simd::Base::func
  |  |  ------------------
  ------------------
 2773|       |
 2774|  2.40k|    return imageLoadFromMemory(data, size, stride, width, height, format);
 2775|  2.40k|}

_ZN4Simd5Sse414LoadILb0EEEDv2_xPKS2_:
   88|  1.05M|        {
   89|  1.05M|            return _mm_loadu_si128(p);
   90|  1.05M|        }
_ZN4Simd5Sse414LoadILb1EEEDv2_xPKS2_:
   93|  1.53M|        {
   94|  1.53M|            return _mm_load_si128(p);
   95|  1.53M|        }
_ZN4Simd4Avx24LoadILb1EEEDv4_xPKS2_:
  174|   338k|        {
  175|   338k|            return _mm256_load_si256(p);
  176|   338k|        }
_ZN4Simd4Avx24LoadILb0EEEDv4_xPKS2_:
  169|  1.10M|        {
  170|  1.10M|            return _mm256_loadu_si256(p);
  171|  1.10M|        }

_ZN4Simd3MinImEET_S1_S1_:
   41|  8.01M|    {
   42|  8.01M|        return a < b ? a : b;
  ------------------
  |  Branch (42:16): [True: 4.87k, False: 8.00M]
  ------------------
   43|  8.01M|    }
_ZN4Simd3MaxImEET_S1_S1_:
   46|    537|    {
   47|    537|        return a > b ? a : b;
  ------------------
  |  Branch (47:16): [True: 18, False: 519]
  ------------------
   48|    537|    }
_ZN4Simd13RestrictRangeImEET_S1_S1_S1_:
   56|    537|    {
   57|    537|        return Max(min, Min(max, value));
   58|    537|    }

_ZN4Simd7AlignHiEmm:
   61|  13.1k|    {
   62|  13.1k|        return (size + align - 1) & ~(align - 1);
   63|  13.1k|    }
_ZN4Simd7AlignLoEmm:
   71|  9.49k|    {
   72|  9.49k|        return size & ~(align - 1);
   73|  9.49k|    }
_ZN4Simd7AlignLoEPKvm:
   76|  6.24k|    {
   77|  6.24k|        return (void *)(((size_t)ptr) & ~(align - 1));
   78|  6.24k|    }
_ZN4Simd7AlignedEmm:
   81|  6.19k|    {
   82|  6.19k|        return size == AlignLo(size, align);
   83|  6.19k|    }
_ZN4Simd7AlignedEPKvm:
   86|  6.24k|    {
   87|  6.24k|        return ptr == AlignLo(ptr, align);
   88|  6.24k|    }
_ZN4Simd8AllocateEmm:
  117|  6.11k|    {
  118|  6.11k|#ifdef SIMD_NO_MANS_LAND
  119|  6.11k|        size += 2 * SIMD_NO_MANS_LAND;
  ------------------
  |  |   49|  6.11k|#define SIMD_NO_MANS_LAND 64
  ------------------
  120|  6.11k|#endif
  121|  6.11k|        void* ptr = NULL;
  122|       |#if defined(_MSC_VER) 
  123|       |        ptr = _aligned_malloc(size, align);
  124|       |#elif defined(__MINGW32__) || defined(__MINGW64__)
  125|       |        ptr = __mingw_aligned_malloc(size, align);
  126|       |#elif defined(__GNUC__)
  127|       |        align = AlignHi(align, sizeof(void*));
  128|  6.11k|        size = AlignHi(size, align);
  129|  6.11k|        int result = ::posix_memalign(&ptr, align, size);
  130|  6.11k|        if (result != 0)
  ------------------
  |  Branch (130:13): [True: 0, False: 6.11k]
  ------------------
  131|      0|            ptr = NULL;
  132|       |#else
  133|       |        ptr = malloc(size);
  134|       |#endif
  135|  6.11k|#ifdef SIMD_ALLOCATE_ERROR_MESSAGE
  136|  6.11k|        if (ptr == NULL)
  ------------------
  |  Branch (136:13): [True: 0, False: 6.11k]
  ------------------
  137|      0|            std::cout << "The function posix_memalign can't allocate " << size << " bytes with align " << align << " !" << std::endl << std::flush;
  138|  6.11k|#endif
  139|  6.11k|#ifdef SIMD_ALLOCATE_ASSERT
  140|  6.11k|        assert(ptr);
  141|  6.11k|#endif
  142|  6.11k|#ifdef SIMD_NO_MANS_LAND
  143|  6.11k|        if (ptr)
  ------------------
  |  Branch (143:13): [True: 6.11k, False: 0]
  ------------------
  144|  6.11k|        {
  145|       |#if !defined(NDEBUG) && SIMD_NO_MANS_LAND >= 16
  146|       |            * (size_t*)ptr = size - 2 * SIMD_NO_MANS_LAND;
  147|       |            memset((char*)ptr + sizeof(size_t), NO_MANS_LAND_WATERMARK, SIMD_NO_MANS_LAND - sizeof(size_t));
  148|       |            memset((char*)ptr + size - SIMD_NO_MANS_LAND, NO_MANS_LAND_WATERMARK, SIMD_NO_MANS_LAND);
  149|       |#endif
  150|  6.11k|            ptr = (char*)ptr + SIMD_NO_MANS_LAND;
  ------------------
  |  |   49|  6.11k|#define SIMD_NO_MANS_LAND 64
  ------------------
  151|  6.11k|        }
  152|  6.11k|#endif
  153|  6.11k|        return ptr;
  154|  6.11k|    }
_ZN4Simd4FreeEPv:
  157|  6.11k|    {
  158|  6.11k|#ifdef SIMD_NO_MANS_LAND
  159|  6.11k|        if (ptr)
  ------------------
  |  Branch (159:13): [True: 6.11k, False: 0]
  ------------------
  160|  6.11k|        {
  161|  6.11k|            ptr = (char*)ptr - SIMD_NO_MANS_LAND;
  ------------------
  |  |   49|  6.11k|#define SIMD_NO_MANS_LAND 64
  ------------------
  162|       |#if !defined(NDEBUG) && SIMD_NO_MANS_LAND >= 16
  163|       |            size_t size = *(size_t*)ptr;
  164|       |            char* nose = (char*)ptr + sizeof(size_t), *tail = (char*)ptr + SIMD_NO_MANS_LAND + size;
  165|       |            for (size_t i = 0, n = SIMD_NO_MANS_LAND - sizeof(size_t); i < n; ++i)
  166|       |                assert(nose[i] == NO_MANS_LAND_WATERMARK);
  167|       |            for (size_t i = 0, n = SIMD_NO_MANS_LAND; i < n; ++i)
  168|       |                assert(tail[i] == NO_MANS_LAND_WATERMARK);
  169|       |#endif  
  170|  6.11k|        }
  171|  6.11k|#endif
  172|       |#if defined(_MSC_VER) 
  173|       |        _aligned_free(ptr);
  174|       |#elif defined(__MINGW32__) || defined(__MINGW64__)
  175|       |        return __mingw_aligned_free(ptr);
  176|       |#else
  177|  6.11k|        free(ptr);
  178|  6.11k|#endif
  179|  6.11k|    }
_ZN4Simd5Sse417AlignedEmm:
  242|  2.19k|        {
  243|  2.19k|            return Simd::Aligned(size, align);
  244|  2.19k|        }
_ZN4Simd5Sse417AlignedEPKvm:
  247|  2.19k|        {
  248|  2.19k|            return Simd::Aligned(ptr, align);
  249|  2.19k|        }
_ZN4Simd4Avx27AlignedEmm:
  257|  3.99k|        {
  258|  3.99k|            return Simd::Aligned(size, align);
  259|  3.99k|        }
_ZN4Simd4Avx27AlignedEPKvm:
  262|  4.04k|        {
  263|  4.04k|            return Simd::Aligned(ptr, align);
  264|  4.04k|        }

_ZN4Simd17InputMemoryStreamC2EPKhm:
   45|  2.39k|        {
   46|  2.39k|            Init(data, size);
   47|  2.39k|        }
_ZN4Simd17InputMemoryStream4InitEPKhm:
   50|  2.39k|        {
   51|  2.39k|            _pos = 0;
   52|  2.39k|            _data = data;
   53|  2.39k|            _size = size;
   54|  2.39k|            _bitBuffer = 0;
   55|  2.39k|            _bitCount = 0;
   56|  2.39k|        }
_ZN4Simd17InputMemoryStream4SeekEm:
   59|    776|        {
   60|    776|            if (pos <= _size)
  ------------------
  |  Branch (60:17): [True: 776, False: 0]
  ------------------
   61|    776|            {
   62|    776|                _pos = pos;
   63|    776|                return true;
   64|    776|            }
   65|      0|            return false;
   66|    776|        }
_ZNK4Simd17InputMemoryStream4SizeEv:
   69|    776|        {
   70|    776|            return _size;
   71|    776|        }
_ZNK4Simd17InputMemoryStream4DataEv:
   74|  2.32k|        {
   75|  2.32k|            return _data;
   76|  2.32k|        }
_ZNK4Simd17InputMemoryStream3EofEv:
   89|  58.3M|        {
   90|  58.3M|            return _pos >= _size;
   91|  58.3M|        }
_ZN4Simd17InputMemoryStream4ReadEmPv:
   99|  8.00M|        {
  100|  8.00M|            size = Min(_size - _pos, size);
  101|  8.00M|            memcpy(data, _data + _pos, size);
  102|  8.00M|            _pos += size;
  103|  8.00M|            return size;
  104|  8.00M|        }
_ZN4Simd17InputMemoryStream5Get8uEv:
  123|   667M|        {
  124|   667M|            return _pos < _size ? _data[_pos++] : 0;
  ------------------
  |  Branch (124:20): [True: 156M, False: 511M]
  ------------------
  125|   667M|        }
_ZN4Simd17InputMemoryStream8GetBe16uEv:
  167|  7.57M|        {
  168|  7.57M|            uint32_t hi = Get8u();
  169|  7.57M|            uint32_t lo = Get8u();
  170|       |#if defined(SIMD_BIG_ENDIAN)
  171|       |            return (uint16_t)(hi | (lo >> 8));
  172|       |#else
  173|  7.57M|            return (uint16_t)((hi << 8) | lo);
  174|  7.57M|#endif
  175|  7.57M|        }
_ZN4Simd17InputMemoryStream4SkipEm:
  211|  2.23M|        {
  212|  2.23M|            if (_pos + size < _size)
  ------------------
  |  Branch (212:17): [True: 2.22M, False: 2.01k]
  ------------------
  213|  2.22M|            {
  214|  2.22M|                _pos += size;
  215|  2.22M|                return true;
  216|  2.22M|            }
  217|  2.01k|            return false;
  218|  2.23M|        }
_ZN4Simd17InputMemoryStream7SkipGapEv:
  235|  15.1M|        {
  236|  30.3M|            while (IsGap(_data[_pos]) && _pos < _size)
  ------------------
  |  Branch (236:20): [True: 15.1M, False: 15.1M]
  |  Branch (236:42): [True: 15.1M, False: 0]
  ------------------
  237|  15.1M|                _pos++;
  238|  15.1M|            return _pos < _size;
  239|  15.1M|        }
_ZN4Simd17InputMemoryStream5IsGapEh:
  242|  68.7M|        {
  243|  68.7M|            return value == ' ' || value == '\t' || value == '\n' || value == '\r';
  ------------------
  |  Branch (243:20): [True: 884, False: 68.7M]
  |  Branch (243:36): [True: 1.76k, False: 68.7M]
  |  Branch (243:53): [True: 30.3M, False: 38.4M]
  |  Branch (243:70): [True: 140, False: 38.4M]
  ------------------
  244|  68.7M|        }
_ZN4Simd17InputMemoryStream12ReadUnsignedIjEEbRT_:
  195|  2.27k|        {
  196|  2.27k|            if (!SkipGap())
  ------------------
  |  Branch (196:17): [True: 0, False: 2.27k]
  ------------------
  197|      0|                return false;
  198|  2.27k|            value = 0;
  199|  8.06M|            while (!IsGap(_data[_pos]) && _pos < _size)
  ------------------
  |  Branch (199:20): [True: 8.06M, False: 2.23k]
  |  Branch (199:43): [True: 8.06M, False: 0]
  ------------------
  200|  8.06M|            {
  201|  8.06M|                if (_data[_pos] >= '0' && _data[_pos] <= '9')
  ------------------
  |  Branch (201:21): [True: 8.06M, False: 32]
  |  Branch (201:43): [True: 8.06M, False: 8]
  ------------------
  202|  8.06M|                    value = value * 10 + Unsigned(_data[_pos] - '0');
  203|     40|                else
  204|     40|                    return false;
  205|  8.06M|                _pos++;
  206|  8.06M|            }
  207|  2.23k|            return true;
  208|  2.27k|        }
_ZN4Simd17InputMemoryStream4ReadIhEEbRT_:
  107|    716|        {
  108|    716|            return Read(sizeof(Value), &value) == sizeof(Value);
  109|    716|        }
_ZN4Simd17InputMemoryStream12ReadUnsignedIhEEbRT_:
  195|  15.1M|        {
  196|  15.1M|            if (!SkipGap())
  ------------------
  |  Branch (196:17): [True: 0, False: 15.1M]
  ------------------
  197|      0|                return false;
  198|  15.1M|            value = 0;
  199|  30.3M|            while (!IsGap(_data[_pos]) && _pos < _size)
  ------------------
  |  Branch (199:20): [True: 15.2M, False: 15.1M]
  |  Branch (199:43): [True: 15.2M, False: 0]
  ------------------
  200|  15.2M|            {
  201|  15.2M|                if (_data[_pos] >= '0' && _data[_pos] <= '9')
  ------------------
  |  Branch (201:21): [True: 15.2M, False: 52]
  |  Branch (201:43): [True: 15.2M, False: 92]
  ------------------
  202|  15.2M|                    value = value * 10 + Unsigned(_data[_pos] - '0');
  203|    144|                else
  204|    144|                    return false;
  205|  15.2M|                _pos++;
  206|  15.2M|            }
  207|  15.1M|            return true;
  208|  15.1M|        }

_ZN4Simd5Sse419RgbToBgraEPKhmmmPhmh:
  156|    107|        {
  157|    107|            if (Aligned(bgra) && Aligned(bgraStride) && Aligned(rgb) && Aligned(rgbStride))
  ------------------
  |  Branch (157:17): [True: 107, False: 0]
  |  Branch (157:34): [True: 107, False: 0]
  |  Branch (157:57): [True: 107, False: 0]
  |  Branch (157:73): [True: 68, False: 39]
  ------------------
  158|     68|                RgbToBgra<true>(rgb, width, height, rgbStride, bgra, bgraStride, alpha);
  159|     39|            else
  160|     39|                RgbToBgra<false>(rgb, width, height, rgbStride, bgra, bgraStride, alpha);
  161|    107|        }
_ZN4Simd5Sse419RgbToBgraILb1EEEvPKhmmmPhmh:
  134|     68|        {
  135|     68|            assert(width >= A);
  136|     68|            if (align)
  ------------------
  |  Branch (136:17): [Folded - Ignored]
  ------------------
  137|     68|                assert(Aligned(bgra) && Aligned(bgraStride) && Aligned(rgb) && Aligned(rgbStride));
  138|       |
  139|     68|            size_t alignedWidth = AlignLo(width, A);
  140|       |
  141|     68|            __m128i _alpha = _mm_slli_si128(_mm_set1_epi32(alpha), 3);
  142|     68|            __m128i _shuffle = _mm_setr_epi8(0x2, 0x1, 0x0, -1, 0x5, 0x4, 0x3, -1, 0x8, 0x7, 0x6, -1, 0xB, 0xA, 0x9, -1);
  143|       |
  144|  45.3k|            for (size_t row = 0; row < height; ++row)
  ------------------
  |  Branch (144:34): [True: 45.2k, False: 68]
  ------------------
  145|  45.2k|            {
  146|  90.5k|                for (size_t col = 0; col < alignedWidth; col += A)
  ------------------
  |  Branch (146:38): [True: 45.2k, False: 45.2k]
  ------------------
  147|  45.2k|                    RgbToBgra<align>(rgb + 3 * col, bgra + 4 * col, _alpha, _shuffle);
  148|  45.2k|                if (width != alignedWidth)
  ------------------
  |  Branch (148:21): [True: 0, False: 45.2k]
  ------------------
  149|      0|                    RgbToBgra<false>(rgb + 3 * (width - A), bgra + 4 * (width - A), _alpha, _shuffle);
  150|  45.2k|                 rgb += rgbStride;
  151|  45.2k|                bgra += bgraStride;
  152|  45.2k|            }
  153|     68|        }
_ZN4Simd5Sse419RgbToBgraILb1EEEvPKhPhDv2_xS5_:
  126|  45.2k|        {
  127|  45.2k|            Store<align>((__m128i*)bgra + 0, _mm_or_si128(alpha, _mm_shuffle_epi8(Load<align>((__m128i*)(rgb + 0)), shuffle)));
  128|  45.2k|            Store<align>((__m128i*)bgra + 1, _mm_or_si128(alpha, _mm_shuffle_epi8(Load<false>((__m128i*)(rgb + 12)), shuffle)));
  129|  45.2k|            Store<align>((__m128i*)bgra + 2, _mm_or_si128(alpha, _mm_shuffle_epi8(Load<false>((__m128i*)(rgb + 24)), shuffle)));
  130|  45.2k|            Store<align>((__m128i*)bgra + 3, _mm_or_si128(alpha, _mm_shuffle_epi8(_mm_srli_si128(Load<align>((__m128i*)(rgb + 32)), 4), shuffle)));
  131|  45.2k|        }
_ZN4Simd5Sse419RgbToBgraILb0EEEvPKhPhDv2_xS5_:
  126|  34.7k|        {
  127|  34.7k|            Store<align>((__m128i*)bgra + 0, _mm_or_si128(alpha, _mm_shuffle_epi8(Load<align>((__m128i*)(rgb + 0)), shuffle)));
  128|  34.7k|            Store<align>((__m128i*)bgra + 1, _mm_or_si128(alpha, _mm_shuffle_epi8(Load<false>((__m128i*)(rgb + 12)), shuffle)));
  129|  34.7k|            Store<align>((__m128i*)bgra + 2, _mm_or_si128(alpha, _mm_shuffle_epi8(Load<false>((__m128i*)(rgb + 24)), shuffle)));
  130|  34.7k|            Store<align>((__m128i*)bgra + 3, _mm_or_si128(alpha, _mm_shuffle_epi8(_mm_srli_si128(Load<align>((__m128i*)(rgb + 32)), 4), shuffle)));
  131|  34.7k|        }
_ZN4Simd5Sse419RgbToBgraILb0EEEvPKhmmmPhmh:
  134|     39|        {
  135|     39|            assert(width >= A);
  136|     39|            if (align)
  ------------------
  |  Branch (136:17): [Folded - Ignored]
  ------------------
  137|      0|                assert(Aligned(bgra) && Aligned(bgraStride) && Aligned(rgb) && Aligned(rgbStride));
  138|       |
  139|     39|            size_t alignedWidth = AlignLo(width, A);
  140|       |
  141|     39|            __m128i _alpha = _mm_slli_si128(_mm_set1_epi32(alpha), 3);
  142|     39|            __m128i _shuffle = _mm_setr_epi8(0x2, 0x1, 0x0, -1, 0x5, 0x4, 0x3, -1, 0x8, 0x7, 0x6, -1, 0xB, 0xA, 0x9, -1);
  143|       |
  144|  17.3k|            for (size_t row = 0; row < height; ++row)
  ------------------
  |  Branch (144:34): [True: 17.3k, False: 39]
  ------------------
  145|  17.3k|            {
  146|  34.7k|                for (size_t col = 0; col < alignedWidth; col += A)
  ------------------
  |  Branch (146:38): [True: 17.3k, False: 17.3k]
  ------------------
  147|  17.3k|                    RgbToBgra<align>(rgb + 3 * col, bgra + 4 * col, _alpha, _shuffle);
  148|  17.3k|                if (width != alignedWidth)
  ------------------
  |  Branch (148:21): [True: 17.3k, False: 0]
  ------------------
  149|  17.3k|                    RgbToBgra<false>(rgb + 3 * (width - A), bgra + 4 * (width - A), _alpha, _shuffle);
  150|  17.3k|                 rgb += rgbStride;
  151|  17.3k|                bgra += bgraStride;
  152|  17.3k|            }
  153|     39|        }

_ZN4Simd5Sse419RgbToGrayEPKhmmmPhm:
  140|    107|        {
  141|    107|            if (Aligned(gray) && Aligned(grayStride) && Aligned(rgb) && Aligned(rgbStride))
  ------------------
  |  Branch (141:17): [True: 107, False: 0]
  |  Branch (141:34): [True: 107, False: 0]
  |  Branch (141:57): [True: 107, False: 0]
  |  Branch (141:73): [True: 68, False: 39]
  ------------------
  142|     68|                RgbToGray<true>(rgb, width, height, rgbStride, gray, grayStride);
  143|     39|            else
  144|     39|                RgbToGray<false>(rgb, width, height, rgbStride, gray, grayStride);
  145|    107|        }
_ZN4Simd5Sse419RgbToGrayILb1EEEvPKhmmmPhm:
  119|     68|        {
  120|     68|            assert(width >= A);
  121|     68|            if (align)
  ------------------
  |  Branch (121:17): [Folded - Ignored]
  ------------------
  122|     68|                assert(Aligned(gray) && Aligned(grayStride) && Aligned(rgb) && Aligned(rgbStride));
  123|       |
  124|     68|            size_t alignedWidth = AlignLo(width, A);
  125|       |
  126|     68|            __m128i _shuffle = _mm_setr_epi8(0x0, 0x1, 0x2, -1, 0x3, 0x4, 0x5, -1, 0x6, 0x7, 0x8, -1, 0x9, 0xA, 0xB, -1);
  127|       |
  128|  45.3k|            for (size_t row = 0; row < height; ++row)
  ------------------
  |  Branch (128:34): [True: 45.2k, False: 68]
  ------------------
  129|  45.2k|            {
  130|  90.5k|                for (size_t col = 0; col < alignedWidth; col += A)
  ------------------
  |  Branch (130:38): [True: 45.2k, False: 45.2k]
  ------------------
  131|  45.2k|                    Store<align>((__m128i*)(gray + col), RgbToGray<align>(rgb + 3 * col, _shuffle));
  132|  45.2k|                if (width != alignedWidth)
  ------------------
  |  Branch (132:21): [True: 0, False: 45.2k]
  ------------------
  133|      0|                    Store<false>((__m128i*)(gray + width - A), RgbToGray<false>(rgb + 3 * (width - A), _shuffle));
  134|  45.2k|                rgb += rgbStride;
  135|  45.2k|                gray += grayStride;
  136|  45.2k|            }
  137|     68|        }
_ZN4Simd5Sse419RgbToGrayILb1EEEDv2_xPKhS2_:
  109|  45.2k|        {
  110|  45.2k|            __m128i rgba[4];
  111|  45.2k|            rgba[0] = _mm_or_si128(K32_01000000, _mm_shuffle_epi8(Load<align>((__m128i*)(rgb + 0)), shuffle));
  112|  45.2k|            rgba[1] = _mm_or_si128(K32_01000000, _mm_shuffle_epi8(Load<false>((__m128i*)(rgb + 12)), shuffle));
  113|  45.2k|            rgba[2] = _mm_or_si128(K32_01000000, _mm_shuffle_epi8(Load<false>((__m128i*)(rgb + 24)), shuffle));
  114|  45.2k|            rgba[3] = _mm_or_si128(K32_01000000, _mm_shuffle_epi8(_mm_srli_si128(Load<align>((__m128i*)(rgb + 32)), 4), shuffle));
  115|  45.2k|            return RgbaToGray(rgba);
  116|  45.2k|        }
_ZN4Simd5Sse4110RgbaToGrayEPDv2_x:
  102|  79.9k|        {
  103|  79.9k|            const __m128i lo = _mm_packs_epi32(RgbaToGray32(rgba[0]), RgbaToGray32(rgba[1]));
  104|  79.9k|            const __m128i hi = _mm_packs_epi32(RgbaToGray32(rgba[2]), RgbaToGray32(rgba[3]));
  105|  79.9k|            return _mm_packus_epi16(lo, hi);
  106|  79.9k|        }
_ZN4Simd5Sse4112RgbaToGray32EDv2_x:
   94|   319k|        {
   95|   319k|            const __m128i g0a0 = _mm_and_si128(_mm_srli_si128(rgba, 1), K16_00FF);
   96|   319k|            const __m128i r0b0 = _mm_and_si128(rgba, K16_00FF);
   97|   319k|            const __m128i weightedSum = _mm_add_epi32(_mm_madd_epi16(g0a0, K16_GREEN_ROUND), _mm_madd_epi16(r0b0, K16_RED_BLUE));
   98|   319k|            return _mm_srli_epi32(weightedSum, Base::BGR_TO_GRAY_AVERAGING_SHIFT);
   99|   319k|        }
_ZN4Simd5Sse419RgbToGrayILb0EEEDv2_xPKhS2_:
  109|  34.7k|        {
  110|  34.7k|            __m128i rgba[4];
  111|  34.7k|            rgba[0] = _mm_or_si128(K32_01000000, _mm_shuffle_epi8(Load<align>((__m128i*)(rgb + 0)), shuffle));
  112|  34.7k|            rgba[1] = _mm_or_si128(K32_01000000, _mm_shuffle_epi8(Load<false>((__m128i*)(rgb + 12)), shuffle));
  113|  34.7k|            rgba[2] = _mm_or_si128(K32_01000000, _mm_shuffle_epi8(Load<false>((__m128i*)(rgb + 24)), shuffle));
  114|  34.7k|            rgba[3] = _mm_or_si128(K32_01000000, _mm_shuffle_epi8(_mm_srli_si128(Load<align>((__m128i*)(rgb + 32)), 4), shuffle));
  115|  34.7k|            return RgbaToGray(rgba);
  116|  34.7k|        }
_ZN4Simd5Sse419RgbToGrayILb0EEEvPKhmmmPhm:
  119|     39|        {
  120|     39|            assert(width >= A);
  121|     39|            if (align)
  ------------------
  |  Branch (121:17): [Folded - Ignored]
  ------------------
  122|      0|                assert(Aligned(gray) && Aligned(grayStride) && Aligned(rgb) && Aligned(rgbStride));
  123|       |
  124|     39|            size_t alignedWidth = AlignLo(width, A);
  125|       |
  126|     39|            __m128i _shuffle = _mm_setr_epi8(0x0, 0x1, 0x2, -1, 0x3, 0x4, 0x5, -1, 0x6, 0x7, 0x8, -1, 0x9, 0xA, 0xB, -1);
  127|       |
  128|  17.3k|            for (size_t row = 0; row < height; ++row)
  ------------------
  |  Branch (128:34): [True: 17.3k, False: 39]
  ------------------
  129|  17.3k|            {
  130|  34.7k|                for (size_t col = 0; col < alignedWidth; col += A)
  ------------------
  |  Branch (130:38): [True: 17.3k, False: 17.3k]
  ------------------
  131|  17.3k|                    Store<align>((__m128i*)(gray + col), RgbToGray<align>(rgb + 3 * col, _shuffle));
  132|  17.3k|                if (width != alignedWidth)
  ------------------
  |  Branch (132:21): [True: 17.3k, False: 0]
  ------------------
  133|  17.3k|                    Store<false>((__m128i*)(gray + width - A), RgbToGray<false>(rgb + 3 * (width - A), _shuffle));
  134|  17.3k|                rgb += rgbStride;
  135|  17.3k|                gray += grayStride;
  136|  17.3k|            }
  137|     39|        }

_ZN4Simd5Sse418BgrToRgbEPKhmmmPhm:
   72|    107|        {
   73|    107|            if (Aligned(bgr) && Aligned(bgrStride) && Aligned(rgb) && Aligned(rgbStride))
  ------------------
  |  Branch (73:17): [True: 107, False: 0]
  |  Branch (73:33): [True: 68, False: 39]
  |  Branch (73:55): [True: 68, False: 0]
  |  Branch (73:71): [True: 68, False: 0]
  ------------------
   74|     68|                BgrToRgb<true>(bgr, width, height, bgrStride, rgb, rgbStride);
   75|     39|            else
   76|     39|                BgrToRgb<false>(bgr, width, height, bgrStride, rgb, rgbStride);
   77|    107|        }
_ZN4Simd5Sse418BgrToRgbILb1EEEvPKhmmmPhm:
   51|     68|        {
   52|     68|            assert(width >= A);
   53|     68|            if (align)
  ------------------
  |  Branch (53:17): [Folded - Ignored]
  ------------------
   54|     68|                assert(Aligned(bgr) && Aligned(bgrStride) && Aligned(rgb) && Aligned(rgbStride));
   55|       |
   56|     68|            const size_t A3 = A * 3;
   57|     68|            size_t size = width * 3;
   58|     68|            size_t aligned = AlignLo(width, A) * 3;
   59|       |
   60|  45.3k|            for (size_t row = 0; row < height; ++row)
  ------------------
  |  Branch (60:34): [True: 45.2k, False: 68]
  ------------------
   61|  45.2k|            {
   62|  90.5k|                for (size_t i = 0; i < aligned; i += A3)
  ------------------
  |  Branch (62:36): [True: 45.2k, False: 45.2k]
  ------------------
   63|  45.2k|                    BgrToRgb<align>(bgr + i, rgb + i);
   64|  45.2k|                if (aligned < size)
  ------------------
  |  Branch (64:21): [True: 0, False: 45.2k]
  ------------------
   65|      0|                    BgrToRgb<false>(bgr + size - A3, rgb + size - A3);
   66|  45.2k|                bgr += bgrStride;
   67|  45.2k|                rgb += rgbStride;
   68|  45.2k|            }
   69|     68|        }
_ZN4Simd5Sse418BgrToRgbILb1EEEvPKhPh:
   41|  45.2k|        {
   42|  45.2k|            __m128i s0 = Load<align>((__m128i*)src + 0);
   43|  45.2k|            __m128i s1 = Load<align>((__m128i*)src + 1);
   44|  45.2k|            __m128i s2 = Load<align>((__m128i*)src + 2);
   45|  45.2k|            Store<align>((__m128i*)dst + 0, _mm_or_si128(_mm_shuffle_epi8(s0, K8_CVT_00), _mm_shuffle_epi8(s1, K8_CVT_01)));
   46|  45.2k|            Store<align>((__m128i*)dst + 1, _mm_or_si128(_mm_or_si128(_mm_shuffle_epi8(s0, K8_CVT_10), _mm_shuffle_epi8(s1, K8_CVT_11)), _mm_shuffle_epi8(s2, K8_CVT_12)));
   47|  45.2k|            Store<align>((__m128i*)dst + 2, _mm_or_si128(_mm_shuffle_epi8(s1, K8_CVT_21), _mm_shuffle_epi8(s2, K8_CVT_22)));
   48|  45.2k|        }
_ZN4Simd5Sse418BgrToRgbILb0EEEvPKhPh:
   41|  34.7k|        {
   42|  34.7k|            __m128i s0 = Load<align>((__m128i*)src + 0);
   43|  34.7k|            __m128i s1 = Load<align>((__m128i*)src + 1);
   44|  34.7k|            __m128i s2 = Load<align>((__m128i*)src + 2);
   45|  34.7k|            Store<align>((__m128i*)dst + 0, _mm_or_si128(_mm_shuffle_epi8(s0, K8_CVT_00), _mm_shuffle_epi8(s1, K8_CVT_01)));
   46|  34.7k|            Store<align>((__m128i*)dst + 1, _mm_or_si128(_mm_or_si128(_mm_shuffle_epi8(s0, K8_CVT_10), _mm_shuffle_epi8(s1, K8_CVT_11)), _mm_shuffle_epi8(s2, K8_CVT_12)));
   47|  34.7k|            Store<align>((__m128i*)dst + 2, _mm_or_si128(_mm_shuffle_epi8(s1, K8_CVT_21), _mm_shuffle_epi8(s2, K8_CVT_22)));
   48|  34.7k|        }
_ZN4Simd5Sse418BgrToRgbILb0EEEvPKhmmmPhm:
   51|     39|        {
   52|     39|            assert(width >= A);
   53|     39|            if (align)
  ------------------
  |  Branch (53:17): [Folded - Ignored]
  ------------------
   54|      0|                assert(Aligned(bgr) && Aligned(bgrStride) && Aligned(rgb) && Aligned(rgbStride));
   55|       |
   56|     39|            const size_t A3 = A * 3;
   57|     39|            size_t size = width * 3;
   58|     39|            size_t aligned = AlignLo(width, A) * 3;
   59|       |
   60|  17.3k|            for (size_t row = 0; row < height; ++row)
  ------------------
  |  Branch (60:34): [True: 17.3k, False: 39]
  ------------------
   61|  17.3k|            {
   62|  34.7k|                for (size_t i = 0; i < aligned; i += A3)
  ------------------
  |  Branch (62:36): [True: 17.3k, False: 17.3k]
  ------------------
   63|  17.3k|                    BgrToRgb<align>(bgr + i, rgb + i);
   64|  17.3k|                if (aligned < size)
  ------------------
  |  Branch (64:21): [True: 17.3k, False: 0]
  ------------------
   65|  17.3k|                    BgrToRgb<false>(bgr + size - A3, rgb + size - A3);
   66|  17.3k|                bgr += bgrStride;
   67|  17.3k|                rgb += rgbStride;
   68|  17.3k|            }
   69|     39|        }

_ZN4Simd5Sse419GetEnableEv:
   66|     30|        {
   67|     30|            return SupportedByCPU() && SupportedByOS();
  ------------------
  |  Branch (67:20): [True: 30, False: 0]
  |  Branch (67:40): [True: 30, False: 0]
  ------------------
   68|     30|        }
_ZN4Simd5Sse4114SupportedByCPUEv:
   37|     30|        {
   38|     30|            return 
   39|     30|                Base::CheckBit(1, 0, Cpuid::Edx, Cpuid::SSE) &&
  ------------------
  |  Branch (39:17): [True: 30, False: 0]
  ------------------
   40|     30|                Base::CheckBit(1, 0, Cpuid::Edx, Cpuid::SSE2) &&
  ------------------
  |  Branch (40:17): [True: 30, False: 0]
  ------------------
   41|     30|                Base::CheckBit(1, 0, Cpuid::Ecx, Cpuid::SSE3) &&
  ------------------
  |  Branch (41:17): [True: 30, False: 0]
  ------------------
   42|     30|                Base::CheckBit(1, 0, Cpuid::Ecx, Cpuid::SSSE3) &&
  ------------------
  |  Branch (42:17): [True: 30, False: 0]
  ------------------
   43|     30|                Base::CheckBit(1, 0, Cpuid::Ecx, Cpuid::SSE41) &&
  ------------------
  |  Branch (43:17): [True: 30, False: 0]
  ------------------
   44|     30|                Base::CheckBit(1, 0, Cpuid::Ecx, Cpuid::SSE42);
  ------------------
  |  Branch (44:17): [True: 30, False: 0]
  ------------------
   45|     30|        }
_ZN4Simd5Sse4113SupportedByOSEv:
   48|     30|        {
   49|       |#if defined(_MSC_VER)
   50|       |            __try
   51|       |            {
   52|       |                int value = _mm_testz_si128(_mm_set1_epi8(0), _mm_set1_epi8(-1)); // try to execute of SSE41 instructions;
   53|       |                uint32_t crc = _mm_crc32_u8(0, 1); // try to execute of SSE42 instructions;
   54|       |                return true;
   55|       |            }
   56|       |            __except (EXCEPTION_EXECUTE_HANDLER)
   57|       |            {
   58|       |                return false;
   59|       |            }
   60|       |#else
   61|     30|            return true;
   62|     30|#endif
   63|     30|        }

_ZN4Simd5Sse419GrayToBgrEPKhmmmPhm:
   64|    532|        {
   65|    532|            if (Aligned(bgr) && Aligned(gray) && Aligned(bgrStride) && Aligned(grayStride))
  ------------------
  |  Branch (65:17): [True: 532, False: 0]
  |  Branch (65:33): [True: 532, False: 0]
  |  Branch (65:50): [True: 532, False: 0]
  |  Branch (65:72): [True: 398, False: 134]
  ------------------
   66|    398|                GrayToBgr<true>(gray, width, height, grayStride, bgr, bgrStride);
   67|    134|            else
   68|    134|                GrayToBgr<false>(gray, width, height, grayStride, bgr, bgrStride);
   69|    532|        }
_ZN4Simd5Sse419GrayToBgrILb1EEEvPKhmmmPhm:
   40|    398|        {
   41|    398|            assert(width >= A);
   42|    398|            if (align)
  ------------------
  |  Branch (42:17): [Folded - Ignored]
  ------------------
   43|    398|                assert(Aligned(bgr) && Aligned(bgrStride) && Aligned(gray) && Aligned(grayStride));
   44|       |
   45|    398|            size_t alignedWidth = AlignLo(width, A);
   46|   813k|            for (size_t row = 0; row < height; ++row)
  ------------------
  |  Branch (46:34): [True: 813k, False: 398]
  ------------------
   47|   813k|            {
   48|  1.62M|                for (size_t col = 0; col < alignedWidth; col += A)
  ------------------
  |  Branch (48:38): [True: 813k, False: 813k]
  ------------------
   49|   813k|                {
   50|   813k|                    __m128i _gray = Load<align>((__m128i*)(gray + col));
   51|   813k|                    GrayToBgr<align>(bgr + 3 * col, _gray);
   52|   813k|                }
   53|   813k|                if (alignedWidth != width)
  ------------------
  |  Branch (53:21): [True: 0, False: 813k]
  ------------------
   54|      0|                {
   55|      0|                    __m128i _gray = Load<false>((__m128i*)(gray + width - A));
   56|      0|                    GrayToBgr<false>(bgr + 3 * (width - A), _gray);
   57|      0|                }
   58|   813k|                gray += grayStride;
   59|   813k|                bgr += bgrStride;
   60|   813k|            }
   61|    398|        }
_ZN4Simd5Sse419GrayToBgrILb1EEEvPhDv2_x:
   33|   813k|        {
   34|   813k|            Store<align>((__m128i*)bgr + 0, _mm_shuffle_epi8(gray, K8_SHUFFLE_GRAY_TO_BGR0));
   35|   813k|            Store<align>((__m128i*)bgr + 1, _mm_shuffle_epi8(gray, K8_SHUFFLE_GRAY_TO_BGR1));
   36|   813k|            Store<align>((__m128i*)bgr + 2, _mm_shuffle_epi8(gray, K8_SHUFFLE_GRAY_TO_BGR2));
   37|   813k|        }
_ZN4Simd5Sse419GrayToBgrILb0EEEvPhDv2_x:
   33|   325k|        {
   34|   325k|            Store<align>((__m128i*)bgr + 0, _mm_shuffle_epi8(gray, K8_SHUFFLE_GRAY_TO_BGR0));
   35|   325k|            Store<align>((__m128i*)bgr + 1, _mm_shuffle_epi8(gray, K8_SHUFFLE_GRAY_TO_BGR1));
   36|   325k|            Store<align>((__m128i*)bgr + 2, _mm_shuffle_epi8(gray, K8_SHUFFLE_GRAY_TO_BGR2));
   37|   325k|        }
_ZN4Simd5Sse419GrayToBgrILb0EEEvPKhmmmPhm:
   40|    134|        {
   41|    134|            assert(width >= A);
   42|    134|            if (align)
  ------------------
  |  Branch (42:17): [Folded - Ignored]
  ------------------
   43|      0|                assert(Aligned(bgr) && Aligned(bgrStride) && Aligned(gray) && Aligned(grayStride));
   44|       |
   45|    134|            size_t alignedWidth = AlignLo(width, A);
   46|   162k|            for (size_t row = 0; row < height; ++row)
  ------------------
  |  Branch (46:34): [True: 162k, False: 134]
  ------------------
   47|   162k|            {
   48|   325k|                for (size_t col = 0; col < alignedWidth; col += A)
  ------------------
  |  Branch (48:38): [True: 162k, False: 162k]
  ------------------
   49|   162k|                {
   50|   162k|                    __m128i _gray = Load<align>((__m128i*)(gray + col));
   51|   162k|                    GrayToBgr<align>(bgr + 3 * col, _gray);
   52|   162k|                }
   53|   162k|                if (alignedWidth != width)
  ------------------
  |  Branch (53:21): [True: 162k, False: 0]
  ------------------
   54|   162k|                {
   55|   162k|                    __m128i _gray = Load<false>((__m128i*)(gray + width - A));
   56|   162k|                    GrayToBgr<false>(bgr + 3 * (width - A), _gray);
   57|   162k|                }
   58|   162k|                gray += grayStride;
   59|   162k|                bgr += bgrStride;
   60|   162k|            }
   61|    134|        }

_ZN4Simd5Sse4110GrayToBgraEPKhmmmPhmh:
   71|    266|        {
   72|    266|            if (Aligned(bgra) && Aligned(gray) && Aligned(bgraStride) && Aligned(grayStride))
  ------------------
  |  Branch (72:17): [True: 266, False: 0]
  |  Branch (72:34): [True: 266, False: 0]
  |  Branch (72:51): [True: 266, False: 0]
  |  Branch (72:74): [True: 199, False: 67]
  ------------------
   73|    199|                GrayToBgra<true>(gray, width, height, grayStride, bgra, bgraStride, alpha);
   74|     67|            else
   75|     67|                GrayToBgra<false>(gray, width, height, grayStride, bgra, bgraStride, alpha);
   76|    266|        }
_ZN4Simd5Sse4110GrayToBgraILb1EEEvPKhmmmPhmh:
   46|    199|        {
   47|    199|            assert(width >= A);
   48|    199|            if (align)
  ------------------
  |  Branch (48:17): [Folded - Ignored]
  ------------------
   49|    199|                assert(Aligned(bgra) && Aligned(bgraStride) && Aligned(gray) && Aligned(grayStride));
   50|       |
   51|    199|            __m128i _alpha = _mm_set1_epi8(alpha);
   52|    199|            size_t alignedWidth = AlignLo(width, A);
   53|   406k|            for (size_t row = 0; row < height; ++row)
  ------------------
  |  Branch (53:34): [True: 406k, False: 199]
  ------------------
   54|   406k|            {
   55|   813k|                for (size_t col = 0; col < alignedWidth; col += A)
  ------------------
  |  Branch (55:38): [True: 406k, False: 406k]
  ------------------
   56|   406k|                {
   57|   406k|                    __m128i _gray = Load<align>((__m128i*)(gray + col));
   58|   406k|                    GrayToBgra<align>(bgra + 4 * col, _gray, _alpha);
   59|   406k|                }
   60|   406k|                if (alignedWidth != width)
  ------------------
  |  Branch (60:21): [True: 0, False: 406k]
  ------------------
   61|      0|                {
   62|      0|                    __m128i _gray = Load<false>((__m128i*)(gray + width - A));
   63|      0|                    GrayToBgra<false>(bgra + 4 * (width - A), _gray, _alpha);
   64|      0|                }
   65|   406k|                gray += grayStride;
   66|   406k|                bgra += bgraStride;
   67|   406k|            }
   68|    199|        }
_ZN4Simd5Sse4110GrayToBgraILb1EEEvPhDv2_xS3_:
   33|   406k|        {
   34|   406k|            __m128i bgLo = _mm_unpacklo_epi8(gray, gray);
   35|   406k|            __m128i bgHi = _mm_unpackhi_epi8(gray, gray);
   36|   406k|            __m128i raLo = _mm_unpacklo_epi8(gray, alpha);
   37|   406k|            __m128i raHi = _mm_unpackhi_epi8(gray, alpha);
   38|       |
   39|   406k|            Store<align>((__m128i*)bgra + 0, _mm_unpacklo_epi16(bgLo, raLo));
   40|   406k|            Store<align>((__m128i*)bgra + 1, _mm_unpackhi_epi16(bgLo, raLo));
   41|   406k|            Store<align>((__m128i*)bgra + 2, _mm_unpacklo_epi16(bgHi, raHi));
   42|   406k|            Store<align>((__m128i*)bgra + 3, _mm_unpackhi_epi16(bgHi, raHi));
   43|   406k|        }
_ZN4Simd5Sse4110GrayToBgraILb0EEEvPhDv2_xS3_:
   33|   162k|        {
   34|   162k|            __m128i bgLo = _mm_unpacklo_epi8(gray, gray);
   35|   162k|            __m128i bgHi = _mm_unpackhi_epi8(gray, gray);
   36|   162k|            __m128i raLo = _mm_unpacklo_epi8(gray, alpha);
   37|   162k|            __m128i raHi = _mm_unpackhi_epi8(gray, alpha);
   38|       |
   39|   162k|            Store<align>((__m128i*)bgra + 0, _mm_unpacklo_epi16(bgLo, raLo));
   40|   162k|            Store<align>((__m128i*)bgra + 1, _mm_unpackhi_epi16(bgLo, raLo));
   41|   162k|            Store<align>((__m128i*)bgra + 2, _mm_unpacklo_epi16(bgHi, raHi));
   42|   162k|            Store<align>((__m128i*)bgra + 3, _mm_unpackhi_epi16(bgHi, raHi));
   43|   162k|        }
_ZN4Simd5Sse4110GrayToBgraILb0EEEvPKhmmmPhmh:
   46|     67|        {
   47|     67|            assert(width >= A);
   48|     67|            if (align)
  ------------------
  |  Branch (48:17): [Folded - Ignored]
  ------------------
   49|      0|                assert(Aligned(bgra) && Aligned(bgraStride) && Aligned(gray) && Aligned(grayStride));
   50|       |
   51|     67|            __m128i _alpha = _mm_set1_epi8(alpha);
   52|     67|            size_t alignedWidth = AlignLo(width, A);
   53|  81.4k|            for (size_t row = 0; row < height; ++row)
  ------------------
  |  Branch (53:34): [True: 81.4k, False: 67]
  ------------------
   54|  81.4k|            {
   55|   162k|                for (size_t col = 0; col < alignedWidth; col += A)
  ------------------
  |  Branch (55:38): [True: 81.4k, False: 81.4k]
  ------------------
   56|  81.4k|                {
   57|  81.4k|                    __m128i _gray = Load<align>((__m128i*)(gray + col));
   58|  81.4k|                    GrayToBgra<align>(bgra + 4 * col, _gray, _alpha);
   59|  81.4k|                }
   60|  81.4k|                if (alignedWidth != width)
  ------------------
  |  Branch (60:21): [True: 81.4k, False: 0]
  ------------------
   61|  81.4k|                {
   62|  81.4k|                    __m128i _gray = Load<false>((__m128i*)(gray + width - A));
   63|  81.4k|                    GrayToBgra<false>(bgra + 4 * (width - A), _gray, _alpha);
   64|  81.4k|                }
   65|  81.4k|                gray += grayStride;
   66|  81.4k|                bgra += bgraStride;
   67|  81.4k|            }
   68|     67|        }

_ZN4Simd5Sse4117ImagePgmTxtLoaderC2ERKNS_16ImageLoaderParamE:
   38|     80|        {
   39|     80|        }
_ZN4Simd5Sse4117ImagePgmTxtLoader13SetConvertersEv:
   42|     56|        {
   43|     56|            Base::ImagePgmTxtLoader::SetConverters();
   44|     56|            if (_image.width >= A)
  ------------------
  |  Branch (44:17): [True: 56, False: 0]
  ------------------
   45|     56|            {
   46|     56|                switch (_param.format)
   47|     56|                {
   48|     14|                case SimdPixelFormatBgr24: _toAny = Sse41::GrayToBgr; break;
  ------------------
  |  Branch (48:17): [True: 14, False: 42]
  ------------------
   49|     14|                case SimdPixelFormatBgra32: _toBgra = Sse41::GrayToBgra; break;
  ------------------
  |  Branch (49:17): [True: 14, False: 42]
  ------------------
   50|     14|                case SimdPixelFormatRgb24: _toAny = Sse41::GrayToBgr; break;
  ------------------
  |  Branch (50:17): [True: 14, False: 42]
  ------------------
   51|      0|                case SimdPixelFormatRgba32: _toBgra = Sse41::GrayToBgra; break;
  ------------------
  |  Branch (51:17): [True: 0, False: 56]
  ------------------
   52|     14|                default: break;
  ------------------
  |  Branch (52:17): [True: 14, False: 42]
  ------------------
   53|     56|                }
   54|     56|            }
   55|     56|        }
_ZN4Simd5Sse4117ImagePgmBinLoaderC2ERKNS_16ImageLoaderParamE:
   61|    292|        {
   62|    292|        }
_ZN4Simd5Sse4117ImagePgmBinLoader13SetConvertersEv:
   65|    292|        {
   66|    292|            Base::ImagePgmBinLoader::SetConverters();
   67|    292|            if (_image.width >= A)
  ------------------
  |  Branch (67:17): [True: 180, False: 112]
  ------------------
   68|    180|            {
   69|    180|                switch (_param.format)
   70|    180|                {
   71|     45|                case SimdPixelFormatBgr24: _toAny = Sse41::GrayToBgr; break;
  ------------------
  |  Branch (71:17): [True: 45, False: 135]
  ------------------
   72|     45|                case SimdPixelFormatBgra32: _toBgra = Sse41::GrayToBgra; break;
  ------------------
  |  Branch (72:17): [True: 45, False: 135]
  ------------------
   73|     45|                case SimdPixelFormatRgb24: _toAny = Sse41::GrayToBgr; break;
  ------------------
  |  Branch (73:17): [True: 45, False: 135]
  ------------------
   74|      0|                case SimdPixelFormatRgba32: _toBgra = Sse41::GrayToBgra; break;
  ------------------
  |  Branch (74:17): [True: 0, False: 180]
  ------------------
   75|     45|                default: break;
  ------------------
  |  Branch (75:17): [True: 45, False: 135]
  ------------------
   76|    180|                }
   77|    180|            }
   78|    292|        }
_ZN4Simd5Sse4117ImagePpmTxtLoaderC2ERKNS_16ImageLoaderParamE:
   84|    120|        {
   85|    120|        }
_ZN4Simd5Sse4117ImagePpmTxtLoader13SetConvertersEv:
   88|     96|        {
   89|     96|            Base::ImagePpmTxtLoader::SetConverters();
   90|     96|            if (_image.width >= A)
  ------------------
  |  Branch (90:17): [True: 96, False: 0]
  ------------------
   91|     96|            {
   92|     96|                switch (_param.format)
   93|     96|                {
   94|     24|                case SimdPixelFormatGray8: _toAny = Sse41::RgbToGray; break;
  ------------------
  |  Branch (94:17): [True: 24, False: 72]
  ------------------
   95|     24|                case SimdPixelFormatBgr24: _toAny = Sse41::BgrToRgb; break;
  ------------------
  |  Branch (95:17): [True: 24, False: 72]
  ------------------
   96|     24|                case SimdPixelFormatBgra32: _toBgra = Sse41::RgbToBgra; break;
  ------------------
  |  Branch (96:17): [True: 24, False: 72]
  ------------------
   97|      0|                case SimdPixelFormatRgba32: _toBgra = Sse41::BgrToBgra; break;
  ------------------
  |  Branch (97:17): [True: 0, False: 96]
  ------------------
   98|     24|                default: break;
  ------------------
  |  Branch (98:17): [True: 24, False: 72]
  ------------------
   99|     96|                }
  100|     96|            }
  101|     96|        }
_ZN4Simd5Sse4117ImagePpmBinLoaderC2ERKNS_16ImageLoaderParamE:
  107|    284|        {
  108|    284|        }
_ZN4Simd5Sse4117ImagePpmBinLoader13SetConvertersEv:
  111|    272|        {
  112|    272|            Base::ImagePpmBinLoader::SetConverters();
  113|    272|            if (_image.width >= A)
  ------------------
  |  Branch (113:17): [True: 208, False: 64]
  ------------------
  114|    208|            {
  115|    208|                switch (_param.format)
  116|    208|                {
  117|     52|                case SimdPixelFormatGray8: _toAny = Sse41::RgbToGray; break;
  ------------------
  |  Branch (117:17): [True: 52, False: 156]
  ------------------
  118|     52|                case SimdPixelFormatBgr24: _toAny = Sse41::BgrToRgb; break;
  ------------------
  |  Branch (118:17): [True: 52, False: 156]
  ------------------
  119|     52|                case SimdPixelFormatBgra32: _toBgra = Sse41::RgbToBgra; break;
  ------------------
  |  Branch (119:17): [True: 52, False: 156]
  ------------------
  120|      0|                case SimdPixelFormatRgba32: _toBgra = Sse41::BgrToBgra; break;
  ------------------
  |  Branch (120:17): [True: 0, False: 208]
  ------------------
  121|     52|                default: break;
  ------------------
  |  Branch (121:17): [True: 52, False: 156]
  ------------------
  122|    208|                }
  123|    208|            }
  124|    272|        }

_ZN4Simd5Sse4114ImagePngLoaderC2ERKNS_16ImageLoaderParamE:
 1717|    168|        {
 1718|    168|            if (_param.format == SimdPixelFormatNone)
  ------------------
  |  Branch (1718:17): [True: 0, False: 168]
  ------------------
 1719|      0|                _param.format = SimdPixelFormatRgb24;
 1720|    168|        }
_ZN4Simd5Sse4114ImagePngLoader10FromStreamEv:
 1723|    168|        {
 1724|    168|            const int req_comp = 4;
 1725|    168|            int x, y, comp;
 1726|    168|            png__context s;
 1727|    168|            s.io.eof = png__stdio_eof;
 1728|    168|            s.io.read = png__stdio_read;
 1729|    168|            s.io.skip = png__stdio_skip;
 1730|    168|            s.io_user_data = &_stream;
 1731|    168|            s.buflen = sizeof(s.buffer_start);
 1732|    168|            s.read_from_callbacks = 1;
 1733|    168|            s.callback_already_read = 0;
 1734|    168|            s.img_buffer = s.img_buffer_original = s.buffer_start;
 1735|    168|            png__refill_buffer(&s);
 1736|    168|            s.img_buffer_original_end = s.img_buffer_end;
 1737|    168|            png__result_info ri;
 1738|    168|            uint8_t* data = (uint8_t*)png__png_load(&s, &x, &y, &comp, req_comp, &ri);
 1739|    168|            if (data)
  ------------------
  |  Branch (1739:17): [True: 0, False: 168]
  ------------------
 1740|      0|            {
 1741|      0|                if (ri.bits_per_channel == 16)
  ------------------
  |  Branch (1741:21): [True: 0, False: 0]
  ------------------
 1742|      0|                {
 1743|      0|                    const uint16_t* src = (uint16_t*)data;
 1744|      0|                    size_t size = x * y * req_comp;
 1745|      0|                    uint8_t* dst = (uint8_t*)PNG_MALLOC(size);
  ------------------
  |  |   43|      0|#define PNG_MALLOC(sz)           malloc(sz)
  ------------------
 1746|      0|                    for (size_t i = 0; i < size; ++i)
  ------------------
  |  Branch (1746:40): [True: 0, False: 0]
  ------------------
 1747|      0|                        dst[i] = uint8_t(src[i] >> 8);
 1748|      0|                    PNG_FREE(data);
  ------------------
  |  |   45|      0|#define PNG_FREE(p)              free(p)
  ------------------
 1749|      0|                    data = dst;
 1750|      0|                }
 1751|      0|                size_t stride = 4 * x;
 1752|      0|                _image.Recreate(x, y, (Image::Format)_param.format);
 1753|      0|                if (x < A)
  ------------------
  |  Branch (1753:21): [True: 0, False: 0]
  ------------------
 1754|      0|                {
 1755|      0|                    switch (_param.format)
 1756|      0|                    {
 1757|      0|                    case SimdPixelFormatGray8:
  ------------------
  |  Branch (1757:21): [True: 0, False: 0]
  ------------------
 1758|      0|                        Base::RgbaToGray(data, x, y, stride, _image.data, _image.stride);
 1759|      0|                        break;
 1760|      0|                    case SimdPixelFormatBgr24:
  ------------------
  |  Branch (1760:21): [True: 0, False: 0]
  ------------------
 1761|      0|                        Base::BgraToRgb(data, x, y, stride, _image.data, _image.stride);
 1762|      0|                        break;
 1763|      0|                    case SimdPixelFormatBgra32:
  ------------------
  |  Branch (1763:21): [True: 0, False: 0]
  ------------------
 1764|      0|                        Base::BgraToRgba(data, x, y, stride, _image.data, _image.stride);
 1765|      0|                        break;
 1766|      0|                    case SimdPixelFormatRgb24:
  ------------------
  |  Branch (1766:21): [True: 0, False: 0]
  ------------------
 1767|      0|                        Base::BgraToBgr(data, x, y, stride, _image.data, _image.stride);
 1768|      0|                        break;
 1769|      0|                    case SimdPixelFormatRgba32:
  ------------------
  |  Branch (1769:21): [True: 0, False: 0]
  ------------------
 1770|      0|                        Base::Copy(data, stride, x, y, 4, _image.data, _image.stride);
 1771|      0|                        break;
 1772|      0|                    default:
  ------------------
  |  Branch (1772:21): [True: 0, False: 0]
  ------------------
 1773|      0|                        break;
 1774|      0|                    }
 1775|      0|                }
 1776|      0|                else
 1777|      0|                {
 1778|      0|                    switch (_param.format)
 1779|      0|                    {
 1780|      0|                    case SimdPixelFormatGray8:
  ------------------
  |  Branch (1780:21): [True: 0, False: 0]
  ------------------
 1781|      0|                        Sse41::RgbaToGray(data, x, y, stride, _image.data, _image.stride);
 1782|      0|                        break;
 1783|      0|                    case SimdPixelFormatBgr24:
  ------------------
  |  Branch (1783:21): [True: 0, False: 0]
  ------------------
 1784|      0|                        Sse41::BgraToRgb(data, x, y, stride, _image.data, _image.stride);
 1785|      0|                        break;
 1786|      0|                    case SimdPixelFormatBgra32:
  ------------------
  |  Branch (1786:21): [True: 0, False: 0]
  ------------------
 1787|      0|                        Sse41::BgraToRgba(data, x, y, stride, _image.data, _image.stride);
 1788|      0|                        break;
 1789|      0|                    case SimdPixelFormatRgb24:
  ------------------
  |  Branch (1789:21): [True: 0, False: 0]
  ------------------
 1790|      0|                        Sse41::BgraToBgr(data, x, y, stride, _image.data, _image.stride);
 1791|      0|                        break;
 1792|      0|                    case SimdPixelFormatRgba32:
  ------------------
  |  Branch (1792:21): [True: 0, False: 0]
  ------------------
 1793|      0|                        Base::Copy(data, stride, x, y, 4, _image.data, _image.stride);
 1794|      0|                        break;
 1795|      0|                    default:
  ------------------
  |  Branch (1795:21): [True: 0, False: 0]
  ------------------
 1796|      0|                        break;
 1797|      0|                    }
 1798|      0|                }
 1799|      0|                PNG_FREE(data);
  ------------------
  |  |   45|      0|#define PNG_FREE(p)              free(p)
  ------------------
 1800|      0|                return true;
 1801|      0|            }
 1802|    168|            return false;
 1803|    168|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L15png__stdio_readEPvPci:
 1695|  4.89k|        {
 1696|  4.89k|            InputMemoryStream* stream = (InputMemoryStream*)user;
 1697|  4.89k|            return (int)stream->Read(size, data);
 1698|  4.89k|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L15png__stdio_skipEPvi:
 1701|     12|        {
 1702|     12|            InputMemoryStream* stream = (InputMemoryStream*)user;
 1703|     12|            stream->Skip(n);
 1704|     12|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L18png__refill_bufferEPNS0_12png__contextE:
  124|  2.61k|        {
  125|  2.61k|            int n = (s->io.read)(s->io_user_data, (char*)s->buffer_start, s->buflen);
  126|  2.61k|            s->callback_already_read += (int)(s->img_buffer - s->img_buffer_original);
  127|  2.61k|            if (n == 0) {
  ------------------
  |  Branch (127:17): [True: 0, False: 2.61k]
  ------------------
  128|       |                // at end of file, treat same as if from memory, but need to handle case
  129|       |                // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
  130|      0|                s->read_from_callbacks = 0;
  131|      0|                s->img_buffer = s->buffer_start;
  132|      0|                s->img_buffer_end = s->buffer_start + 1;
  133|      0|                *s->img_buffer = 0;
  134|      0|            }
  135|  2.61k|            else {
  136|  2.61k|                s->img_buffer = s->buffer_start;
  137|  2.61k|                s->img_buffer_end = s->buffer_start + n;
  138|  2.61k|            }
  139|  2.61k|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L13png__png_loadEPNS0_12png__contextEPiS3_S3_iPNS0_16png__result_infoE:
 1576|    168|        {
 1577|    168|            png__png p;
 1578|    168|            p.s = s;
 1579|    168|            return png__do_png(&p, x, y, comp, req_comp, ri);
 1580|    168|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L11png__do_pngEPNS0_8png__pngEPiS3_S3_iPNS0_16png__result_infoE:
 1544|    168|        {
 1545|    168|            void* result = NULL;
 1546|    168|            if (req_comp < 0 || req_comp > 4) return png__errpuc("bad req_comp", "Internal error");
  ------------------
  |  |   63|      0|#define png__errpuc(x,y)  ((unsigned char *)(size_t) (png__err(x,y)?NULL:NULL))
  |  |  ------------------
  |  |  |  Branch (63:55): [True: 0, False: 0]
  |  |  ------------------
  ------------------
  |  Branch (1546:17): [True: 0, False: 168]
  |  Branch (1546:33): [True: 0, False: 168]
  ------------------
 1547|    168|            if (png__parse_png_file(p, PNG__SCAN_load, req_comp)) {
  ------------------
  |  Branch (1547:17): [True: 0, False: 168]
  ------------------
 1548|      0|                if (p->depth <= 8)
  ------------------
  |  Branch (1548:21): [True: 0, False: 0]
  ------------------
 1549|      0|                    ri->bits_per_channel = 8;
 1550|      0|                else if (p->depth == 16)
  ------------------
  |  Branch (1550:26): [True: 0, False: 0]
  ------------------
 1551|      0|                    ri->bits_per_channel = 16;
 1552|      0|                else
 1553|      0|                    return png__errpuc("bad bits_per_channel", "PNG not supported: unsupported color depth");
  ------------------
  |  |   63|      0|#define png__errpuc(x,y)  ((unsigned char *)(size_t) (png__err(x,y)?NULL:NULL))
  |  |  ------------------
  |  |  |  Branch (63:55): [True: 0, False: 0]
  |  |  ------------------
  ------------------
 1554|      0|                result = p->out;
 1555|      0|                p->out = NULL;
 1556|      0|                if (req_comp && req_comp != p->s->img_out_n) {
  ------------------
  |  Branch (1556:21): [True: 0, False: 0]
  |  Branch (1556:33): [True: 0, False: 0]
  ------------------
 1557|      0|                    if (ri->bits_per_channel == 8)
  ------------------
  |  Branch (1557:25): [True: 0, False: 0]
  ------------------
 1558|      0|                        result = png__convert_format((unsigned char*)result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
 1559|      0|                    else
 1560|      0|                        result = png__convert_format16((png__uint16*)result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
 1561|      0|                    p->s->img_out_n = req_comp;
 1562|      0|                    if (result == NULL) return result;
  ------------------
  |  Branch (1562:25): [True: 0, False: 0]
  ------------------
 1563|      0|                }
 1564|      0|                *x = p->s->img_x;
 1565|      0|                *y = p->s->img_y;
 1566|      0|                if (n) *n = p->s->img_n;
  ------------------
  |  Branch (1566:21): [True: 0, False: 0]
  ------------------
 1567|      0|            }
 1568|    168|            PNG_FREE(p->out);      p->out = NULL;
  ------------------
  |  |   45|    168|#define PNG_FREE(p)              free(p)
  ------------------
 1569|    168|            PNG_FREE(p->expanded); p->expanded = NULL;
  ------------------
  |  |   45|    168|#define PNG_FREE(p)              free(p)
  ------------------
 1570|    168|            PNG_FREE(p->idata);    p->idata = NULL;
  ------------------
  |  |   45|    168|#define PNG_FREE(p)              free(p)
  ------------------
 1571|       |
 1572|    168|            return result;
 1573|    168|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L8png__errEPKcS2_:
   59|    148|        {
   60|    148|            return 0;
   61|    148|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L19png__parse_png_fileEPNS0_8png__pngEii:
 1363|    168|        {
 1364|    168|            png_uc palette[1024], pal_img_n = 0;
 1365|    168|            png_uc has_trans = 0, tc[3] = { 0 };
 1366|    168|            png__uint16 tc16[3];
 1367|    168|            png__uint32 ioff = 0, idata_limit = 0, i, pal_len = 0;
 1368|    168|            int first = 1, k, interlace = 0, color = 0, is_iphone = 0;
 1369|    168|            png__context* s = z->s;
 1370|       |
 1371|    168|            z->expanded = NULL;
 1372|    168|            z->idata = NULL;
 1373|    168|            z->out = NULL;
 1374|       |
 1375|    168|            if (!png__check_png_header(s)) return 0;
  ------------------
  |  Branch (1375:17): [True: 0, False: 168]
  ------------------
 1376|       |
 1377|    168|            if (scan == PNG__SCAN_type) return 1;
  ------------------
  |  Branch (1377:17): [True: 0, False: 168]
  ------------------
 1378|       |
 1379|  4.16k|            for (;;) {
 1380|  4.16k|                png__pngchunk c = png__get_chunk_header(s);
 1381|  4.16k|                switch (c.type) {
 1382|      4|                case PNG__PNG_TYPE('C', 'g', 'B', 'I'):
  ------------------
  |  | 1360|      4|#define PNG__PNG_TYPE(a,b,c,d)  (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d))
  ------------------
  |  Branch (1382:17): [True: 4, False: 4.15k]
  ------------------
 1383|      4|                    is_iphone = 1;
 1384|      4|                    png__skip(s, c.length);
 1385|      4|                    break;
 1386|    164|                case PNG__PNG_TYPE('I', 'H', 'D', 'R'): {
  ------------------
  |  | 1360|    164|#define PNG__PNG_TYPE(a,b,c,d)  (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d))
  ------------------
  |  Branch (1386:17): [True: 164, False: 3.99k]
  ------------------
 1387|    164|                    int comp, filter;
 1388|    164|                    if (!first) return png__err("multiple IHDR", "Corrupt PNG");
  ------------------
  |  Branch (1388:25): [True: 0, False: 164]
  ------------------
 1389|    164|                    first = 0;
 1390|    164|                    if (c.length != 13) return png__err("bad IHDR len", "Corrupt PNG");
  ------------------
  |  Branch (1390:25): [True: 0, False: 164]
  ------------------
 1391|    164|                    s->img_x = png__get32be(s);
 1392|    164|                    s->img_y = png__get32be(s);
 1393|    164|                    if (s->img_y > PNG_MAX_DIMENSIONS) return png__err("too large", "Very large image (corrupt?)");
  ------------------
  |  |   56|    164|#define PNG_MAX_DIMENSIONS (1 << 24)
  ------------------
  |  Branch (1393:25): [True: 0, False: 164]
  ------------------
 1394|    164|                    if (s->img_x > PNG_MAX_DIMENSIONS) return png__err("too large", "Very large image (corrupt?)");
  ------------------
  |  |   56|    164|#define PNG_MAX_DIMENSIONS (1 << 24)
  ------------------
  |  Branch (1394:25): [True: 0, False: 164]
  ------------------
 1395|    164|                    z->depth = png__get8(s);  if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16)  return png__err("1/2/4/8/16-bit only", "PNG not supported: 1/2/4/8/16-bit only");
  ------------------
  |  Branch (1395:51): [True: 132, False: 32]
  |  Branch (1395:68): [True: 132, False: 0]
  |  Branch (1395:85): [True: 132, False: 0]
  |  Branch (1395:102): [True: 44, False: 88]
  |  Branch (1395:119): [True: 0, False: 44]
  ------------------
 1396|    164|                    color = png__get8(s);  if (color > 6)         return png__err("bad ctype", "Corrupt PNG");
  ------------------
  |  Branch (1396:48): [True: 0, False: 164]
  ------------------
 1397|    164|                    if (color == 3 && z->depth == 16)                  return png__err("bad ctype", "Corrupt PNG");
  ------------------
  |  Branch (1397:25): [True: 4, False: 160]
  |  Branch (1397:39): [True: 0, False: 4]
  ------------------
 1398|    164|                    if (color == 3) pal_img_n = 3; else if (color & 1) return png__err("bad ctype", "Corrupt PNG");
  ------------------
  |  Branch (1398:25): [True: 4, False: 160]
  |  Branch (1398:61): [True: 0, False: 160]
  ------------------
 1399|    164|                    comp = png__get8(s);  if (comp) return png__err("bad comp method", "Corrupt PNG");
  ------------------
  |  Branch (1399:47): [True: 0, False: 164]
  ------------------
 1400|    164|                    filter = png__get8(s);  if (filter) return png__err("bad filter method", "Corrupt PNG");
  ------------------
  |  Branch (1400:49): [True: 0, False: 164]
  ------------------
 1401|    164|                    interlace = png__get8(s); if (interlace > 1) return png__err("bad interlace method", "Corrupt PNG");
  ------------------
  |  Branch (1401:51): [True: 0, False: 164]
  ------------------
 1402|    164|                    if (!s->img_x || !s->img_y) return png__err("0-pixel image", "Corrupt PNG");
  ------------------
  |  Branch (1402:25): [True: 0, False: 164]
  |  Branch (1402:38): [True: 0, False: 164]
  ------------------
 1403|    164|                    if (!pal_img_n) {
  ------------------
  |  Branch (1403:25): [True: 160, False: 4]
  ------------------
 1404|    160|                        s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
  ------------------
  |  Branch (1404:37): [True: 104, False: 56]
  |  Branch (1404:59): [True: 20, False: 140]
  ------------------
 1405|    160|                        if ((1 << 30) / s->img_x / s->img_n < s->img_y) return png__err("too large", "Image too large to decode");
  ------------------
  |  Branch (1405:29): [True: 0, False: 160]
  ------------------
 1406|    160|                        if (scan == PNG__SCAN_header) return 1;
  ------------------
  |  Branch (1406:29): [True: 0, False: 160]
  ------------------
 1407|    160|                    }
 1408|      4|                    else {
 1409|       |                        // if paletted, then pal_n is our final components, and
 1410|       |                        // img_n is # components to decompress/filter.
 1411|      4|                        s->img_n = 1;
 1412|      4|                        if ((1 << 30) / s->img_x / 4 < s->img_y) return png__err("too large", "Corrupt PNG");
  ------------------
  |  Branch (1412:29): [True: 0, False: 4]
  ------------------
 1413|       |                        // if SCAN_header, have to scan to see if we have a tRNS
 1414|      4|                    }
 1415|    164|                    break;
 1416|    164|                }
 1417|       |
 1418|    164|                case PNG__PNG_TYPE('P', 'L', 'T', 'E'): {
  ------------------
  |  | 1360|     24|#define PNG__PNG_TYPE(a,b,c,d)  (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d))
  ------------------
  |  Branch (1418:17): [True: 24, False: 4.13k]
  ------------------
 1419|     24|                    if (first) return png__err("first not IHDR", "Corrupt PNG");
  ------------------
  |  Branch (1419:25): [True: 0, False: 24]
  ------------------
 1420|     24|                    if (c.length > 256 * 3) return png__err("invalid PLTE", "Corrupt PNG");
  ------------------
  |  Branch (1420:25): [True: 0, False: 24]
  ------------------
 1421|     24|                    pal_len = c.length / 3;
 1422|     24|                    if (pal_len * 3 != c.length) return png__err("invalid PLTE", "Corrupt PNG");
  ------------------
  |  Branch (1422:25): [True: 0, False: 24]
  ------------------
 1423|  5.76k|                    for (i = 0; i < pal_len; ++i) {
  ------------------
  |  Branch (1423:33): [True: 5.73k, False: 24]
  ------------------
 1424|  5.73k|                        palette[i * 4 + 0] = png__get8(s);
 1425|  5.73k|                        palette[i * 4 + 1] = png__get8(s);
 1426|  5.73k|                        palette[i * 4 + 2] = png__get8(s);
 1427|  5.73k|                        palette[i * 4 + 3] = 255;
 1428|  5.73k|                    }
 1429|     24|                    break;
 1430|     24|                }
 1431|       |
 1432|     20|                case PNG__PNG_TYPE('t', 'R', 'N', 'S'): {
  ------------------
  |  | 1360|     20|#define PNG__PNG_TYPE(a,b,c,d)  (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d))
  ------------------
  |  Branch (1432:17): [True: 20, False: 4.14k]
  ------------------
 1433|     20|                    if (first) return png__err("first not IHDR", "Corrupt PNG");
  ------------------
  |  Branch (1433:25): [True: 0, False: 20]
  ------------------
 1434|     20|                    if (z->idata) return png__err("tRNS after IDAT", "Corrupt PNG");
  ------------------
  |  Branch (1434:25): [True: 0, False: 20]
  ------------------
 1435|     20|                    if (pal_img_n) {
  ------------------
  |  Branch (1435:25): [True: 0, False: 20]
  ------------------
 1436|      0|                        if (scan == PNG__SCAN_header) { s->img_n = 4; return 1; }
  ------------------
  |  Branch (1436:29): [True: 0, False: 0]
  ------------------
 1437|      0|                        if (pal_len == 0) return png__err("tRNS before PLTE", "Corrupt PNG");
  ------------------
  |  Branch (1437:29): [True: 0, False: 0]
  ------------------
 1438|      0|                        if (c.length > pal_len) return png__err("bad tRNS len", "Corrupt PNG");
  ------------------
  |  Branch (1438:29): [True: 0, False: 0]
  ------------------
 1439|      0|                        pal_img_n = 4;
 1440|      0|                        for (i = 0; i < c.length; ++i)
  ------------------
  |  Branch (1440:37): [True: 0, False: 0]
  ------------------
 1441|      0|                            palette[i * 4 + 3] = png__get8(s);
 1442|      0|                    }
 1443|     20|                    else {
 1444|     20|                        if (!(s->img_n & 1)) return png__err("tRNS with alpha", "Corrupt PNG");
  ------------------
  |  Branch (1444:29): [True: 0, False: 20]
  ------------------
 1445|     20|                        if (c.length != (png__uint32)s->img_n * 2) return png__err("bad tRNS len", "Corrupt PNG");
  ------------------
  |  Branch (1445:29): [True: 0, False: 20]
  ------------------
 1446|     20|                        has_trans = 1;
 1447|     20|                        if (z->depth == 16) {
  ------------------
  |  Branch (1447:29): [True: 0, False: 20]
  ------------------
 1448|      0|                            for (k = 0; k < s->img_n; ++k) tc16[k] = (png__uint16)png__get16be(s); // copy the values as-is
  ------------------
  |  Branch (1448:41): [True: 0, False: 0]
  ------------------
 1449|      0|                        }
 1450|     20|                        else {
 1451|     80|                            for (k = 0; k < s->img_n; ++k) tc[k] = (png_uc)(png__get16be(s) & 255) * png__depth_scale_table[z->depth]; // non 8-bit images will be larger
  ------------------
  |  Branch (1451:41): [True: 60, False: 20]
  ------------------
 1452|     20|                        }
 1453|     20|                    }
 1454|     20|                    break;
 1455|     20|                }
 1456|       |
 1457|  3.63k|                case PNG__PNG_TYPE('I', 'D', 'A', 'T'): {
  ------------------
  |  | 1360|  3.63k|#define PNG__PNG_TYPE(a,b,c,d)  (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d))
  ------------------
  |  Branch (1457:17): [True: 3.63k, False: 528]
  ------------------
 1458|  3.63k|                    if (first) return png__err("first not IHDR", "Corrupt PNG");
  ------------------
  |  Branch (1458:25): [True: 0, False: 3.63k]
  ------------------
 1459|  3.63k|                    if (pal_img_n && !pal_len) return png__err("no PLTE", "Corrupt PNG");
  ------------------
  |  Branch (1459:25): [True: 4, False: 3.62k]
  |  Branch (1459:38): [True: 0, False: 4]
  ------------------
 1460|  3.63k|                    if (scan == PNG__SCAN_header) { s->img_n = pal_img_n; return 1; }
  ------------------
  |  Branch (1460:25): [True: 0, False: 3.63k]
  ------------------
 1461|  3.63k|                    if ((int)(ioff + c.length) < (int)ioff) return 0;
  ------------------
  |  Branch (1461:25): [True: 0, False: 3.63k]
  ------------------
 1462|  3.63k|                    if (ioff + c.length > idata_limit) {
  ------------------
  |  Branch (1462:25): [True: 264, False: 3.36k]
  ------------------
 1463|    264|                        png__uint32 idata_limit_old = idata_limit;
 1464|    264|                        png_uc* p;
 1465|    264|                        if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
  ------------------
  |  Branch (1465:29): [True: 160, False: 104]
  |  Branch (1465:61): [True: 136, False: 24]
  ------------------
 1466|    392|                        while (ioff + c.length > idata_limit)
  ------------------
  |  Branch (1466:32): [True: 128, False: 264]
  ------------------
 1467|    128|                            idata_limit *= 2;
 1468|    264|                        PNG_NOTUSED(idata_limit_old);
  ------------------
  |  |   52|    264|#define PNG_NOTUSED(v)  (void)sizeof(v)
  ------------------
 1469|    264|                        p = (png_uc*)PNG_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return png__err("outofmem", "Out of memory");
  ------------------
  |  |   46|    264|#define PNG_REALLOC_SIZED(p,oldsz,newsz) PNG_REALLOC(p,newsz)
  |  |  ------------------
  |  |  |  |   44|    264|#define PNG_REALLOC(p,newsz)     realloc(p,newsz)
  |  |  ------------------
  ------------------
  |  Branch (1469:101): [True: 0, False: 264]
  ------------------
 1470|    264|                        z->idata = p;
 1471|    264|                    }
 1472|  3.63k|                    if (!png__getn(s, z->idata + ioff, c.length)) return png__err("outofdata", "Corrupt PNG");
  ------------------
  |  Branch (1472:25): [True: 8, False: 3.62k]
  ------------------
 1473|  3.62k|                    ioff += c.length;
 1474|  3.62k|                    break;
 1475|  3.63k|                }
 1476|       |
 1477|    136|                case PNG__PNG_TYPE('I', 'E', 'N', 'D'): {
  ------------------
  |  | 1360|    136|#define PNG__PNG_TYPE(a,b,c,d)  (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d))
  ------------------
  |  Branch (1477:17): [True: 136, False: 4.02k]
  ------------------
 1478|    136|                    png__uint32 raw_len, bpl;
 1479|    136|                    if (first) return png__err("first not IHDR", "Corrupt PNG");
  ------------------
  |  Branch (1479:25): [True: 0, False: 136]
  ------------------
 1480|    136|                    if (scan != PNG__SCAN_load) return 1;
  ------------------
  |  Branch (1480:25): [True: 0, False: 136]
  ------------------
 1481|    136|                    if (z->idata == NULL) return png__err("no IDAT", "Corrupt PNG");
  ------------------
  |  Branch (1481:25): [True: 0, False: 136]
  ------------------
 1482|       |                    // initial guess for decoded data size to avoid unnecessary reallocs
 1483|    136|                    bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component
 1484|    136|                    raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
 1485|    136|                    z->expanded = (png_uc*)png_zlib_decode_malloc_guesssize_headerflag((char*)z->idata, ioff, raw_len, (int*)&raw_len, !is_iphone);
 1486|    136|                    if (z->expanded == NULL) return 0; // zlib should set error
  ------------------
  |  Branch (1486:25): [True: 80, False: 56]
  ------------------
 1487|     56|                    PNG_FREE(z->idata); z->idata = NULL;
  ------------------
  |  |   45|     56|#define PNG_FREE(p)              free(p)
  ------------------
 1488|     56|                    if ((req_comp == s->img_n + 1 && req_comp != 3 && !pal_img_n) || has_trans)
  ------------------
  |  Branch (1488:26): [True: 52, False: 4]
  |  Branch (1488:54): [True: 52, False: 0]
  |  Branch (1488:71): [True: 52, False: 0]
  |  Branch (1488:86): [True: 0, False: 4]
  ------------------
 1489|     52|                        s->img_out_n = s->img_n + 1;
 1490|      4|                    else
 1491|      4|                        s->img_out_n = s->img_n;
 1492|     56|                    if (!png__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0;
  ------------------
  |  Branch (1492:25): [True: 56, False: 0]
  ------------------
 1493|      0|                    if (has_trans) {
  ------------------
  |  Branch (1493:25): [True: 0, False: 0]
  ------------------
 1494|      0|                        if (z->depth == 16) {
  ------------------
  |  Branch (1494:29): [True: 0, False: 0]
  ------------------
 1495|      0|                            if (!png__compute_transparency16(z, tc16, s->img_out_n)) return 0;
  ------------------
  |  Branch (1495:33): [True: 0, False: 0]
  ------------------
 1496|      0|                        }
 1497|      0|                        else {
 1498|      0|                            if (!png__compute_transparency(z, tc, s->img_out_n)) return 0;
  ------------------
  |  Branch (1498:33): [True: 0, False: 0]
  ------------------
 1499|      0|                        }
 1500|      0|                    }
 1501|      0|                    if (is_iphone && png__de_iphone_flag && s->img_out_n > 2)
  ------------------
  |  Branch (1501:25): [True: 0, False: 0]
  |  Branch (1501:38): [True: 0, False: 0]
  |  Branch (1501:61): [True: 0, False: 0]
  ------------------
 1502|      0|                        png__de_iphone(z);
 1503|      0|                    if (pal_img_n) {
  ------------------
  |  Branch (1503:25): [True: 0, False: 0]
  ------------------
 1504|       |                        // pal_img_n == 3 or 4
 1505|      0|                        s->img_n = pal_img_n; // record the actual colors we had
 1506|      0|                        s->img_out_n = pal_img_n;
 1507|      0|                        if (req_comp >= 3) s->img_out_n = req_comp;
  ------------------
  |  Branch (1507:29): [True: 0, False: 0]
  ------------------
 1508|      0|                        if (!png__expand_png_palette(z, palette, pal_len, s->img_out_n))
  ------------------
  |  Branch (1508:29): [True: 0, False: 0]
  ------------------
 1509|      0|                            return 0;
 1510|      0|                    }
 1511|      0|                    else if (has_trans) {
  ------------------
  |  Branch (1511:30): [True: 0, False: 0]
  ------------------
 1512|       |                        // non-paletted image with tRNS -> source image has (constant) alpha
 1513|      0|                        ++s->img_n;
 1514|      0|                    }
 1515|      0|                    PNG_FREE(z->expanded); z->expanded = NULL;
  ------------------
  |  |   45|      0|#define PNG_FREE(p)              free(p)
  ------------------
 1516|       |                    // end of PNG chunk, read and skip CRC
 1517|      0|                    png__get32be(s);
 1518|      0|                    return 1;
 1519|      0|                }
 1520|       |
 1521|    180|                default:
  ------------------
  |  Branch (1521:17): [True: 180, False: 3.98k]
  ------------------
 1522|       |                    // if critical, fail
 1523|    180|                    if (first) return png__err("first not IHDR", "Corrupt PNG");
  ------------------
  |  Branch (1523:25): [True: 4, False: 176]
  ------------------
 1524|    176|                    if ((c.type & (1 << 29)) == 0) {
  ------------------
  |  Branch (1524:25): [True: 20, False: 156]
  ------------------
 1525|     20|#ifndef PNG_NO_FAILURE_STRINGS
 1526|       |                        // not threadsafe
 1527|     20|                        static char invalid_chunk[] = "XXXX PNG chunk not known";
 1528|     20|                        invalid_chunk[0] = PNG__BYTECAST(c.type >> 24);
  ------------------
  |  |   55|     20|#define PNG__BYTECAST(x)  ((png_uc) ((x) & 255))  // truncate int to byte without warnings
  ------------------
 1529|     20|                        invalid_chunk[1] = PNG__BYTECAST(c.type >> 16);
  ------------------
  |  |   55|     20|#define PNG__BYTECAST(x)  ((png_uc) ((x) & 255))  // truncate int to byte without warnings
  ------------------
 1530|     20|                        invalid_chunk[2] = PNG__BYTECAST(c.type >> 8);
  ------------------
  |  |   55|     20|#define PNG__BYTECAST(x)  ((png_uc) ((x) & 255))  // truncate int to byte without warnings
  ------------------
 1531|     20|                        invalid_chunk[3] = PNG__BYTECAST(c.type >> 0);
  ------------------
  |  |   55|     20|#define PNG__BYTECAST(x)  ((png_uc) ((x) & 255))  // truncate int to byte without warnings
  ------------------
 1532|     20|#endif
 1533|     20|                        return png__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
 1534|     20|                    }
 1535|    156|                    png__skip(s, c.length);
 1536|    156|                    break;
 1537|  4.16k|                }
 1538|       |                // end of PNG chunk, read and skip CRC
 1539|  3.99k|                png__get32be(s);
 1540|  3.99k|            }
 1541|    168|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L21png__check_png_headerEPNS0_12png__contextE:
  898|    168|        {
  899|    168|            static const png_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
  900|    168|            int i;
  901|  1.51k|            for (i = 0; i < 8; ++i)
  ------------------
  |  Branch (901:25): [True: 1.34k, False: 168]
  ------------------
  902|  1.34k|                if (png__get8(s) != png_sig[i]) return png__err("bad png sig", "Not a PNG");
  ------------------
  |  Branch (902:21): [True: 0, False: 1.34k]
  ------------------
  903|    168|            return 1;
  904|    168|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L21png__get_chunk_headerEPNS0_12png__contextE:
  890|  4.16k|        {
  891|  4.16k|            png__pngchunk c;
  892|  4.16k|            c.length = png__get32be(s);
  893|  4.16k|            c.type = png__get32be(s);
  894|  4.16k|            return c;
  895|  4.16k|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L9png__skipEPNS0_12png__contextEi:
  177|    160|        {
  178|    160|            if (n == 0) return;  // already there!
  ------------------
  |  Branch (178:17): [True: 4, False: 156]
  ------------------
  179|    156|            if (n < 0) {
  ------------------
  |  Branch (179:17): [True: 8, False: 148]
  ------------------
  180|      8|                s->img_buffer = s->img_buffer_end;
  181|      8|                return;
  182|      8|            }
  183|    148|            if (s->io.read) {
  ------------------
  |  Branch (183:17): [True: 148, False: 0]
  ------------------
  184|    148|                int blen = (int)(s->img_buffer_end - s->img_buffer);
  185|    148|                if (blen < n) {
  ------------------
  |  Branch (185:21): [True: 12, False: 136]
  ------------------
  186|     12|                    s->img_buffer = s->img_buffer_end;
  187|     12|                    (s->io.skip)(s->io_user_data, n - blen);
  188|     12|                    return;
  189|     12|                }
  190|    148|            }
  191|    136|            s->img_buffer += n;
  192|    136|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L12png__get32beEPNS0_12png__contextE:
  159|  12.6k|        {
  160|  12.6k|            png__uint32 z = png__get16be(s);
  161|  12.6k|            return (z << 16) + png__get16be(s);
  162|  12.6k|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L9png__get8EPNS0_12png__contextE:
  142|  70.0k|        {
  143|  70.0k|            if (s->img_buffer < s->img_buffer_end)
  ------------------
  |  Branch (143:17): [True: 67.6k, False: 2.44k]
  ------------------
  144|  67.6k|                return *s->img_buffer++;
  145|  2.44k|            if (s->read_from_callbacks) {
  ------------------
  |  Branch (145:17): [True: 2.44k, False: 0]
  ------------------
  146|  2.44k|                png__refill_buffer(s);
  147|  2.44k|                return *s->img_buffer++;
  148|  2.44k|            }
  149|      0|            return 0;
  150|  2.44k|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L12png__get16beEPNS0_12png__contextE:
  153|  25.3k|        {
  154|  25.3k|            int z = png__get8(s);
  155|  25.3k|            return (z << 8) + png__get8(s);
  156|  25.3k|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L9png__getnEPNS0_12png__contextEPhi:
  195|  3.63k|        {
  196|  3.63k|            if (s->io.read) {
  ------------------
  |  Branch (196:17): [True: 3.63k, False: 0]
  ------------------
  197|  3.63k|                int blen = (int)(s->img_buffer_end - s->img_buffer);
  198|  3.63k|                if (blen < n) {
  ------------------
  |  Branch (198:21): [True: 2.28k, False: 1.35k]
  ------------------
  199|  2.28k|                    int res, count;
  200|       |
  201|  2.28k|                    memcpy(buffer, s->img_buffer, blen);
  202|       |
  203|  2.28k|                    count = (s->io.read)(s->io_user_data, (char*)buffer + blen, n - blen);
  204|  2.28k|                    res = (count == (n - blen));
  205|  2.28k|                    s->img_buffer = s->img_buffer_end;
  206|  2.28k|                    return res;
  207|  2.28k|                }
  208|  3.63k|            }
  209|       |
  210|  1.35k|            if (s->img_buffer + n <= s->img_buffer_end) {
  ------------------
  |  Branch (210:17): [True: 1.35k, False: 0]
  ------------------
  211|  1.35k|                memcpy(buffer, s->img_buffer, n);
  212|  1.35k|                s->img_buffer += n;
  213|  1.35k|                return 1;
  214|  1.35k|            }
  215|      0|            else
  216|      0|                return 0;
  217|  1.35k|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L43png_zlib_decode_malloc_guesssize_headerflagEPKciiPii:
  817|    136|        {
  818|    136|            png__zbuf a;
  819|    136|            char* p = (char*)png__malloc(initial_size);
  820|    136|            if (p == NULL) return NULL;
  ------------------
  |  Branch (820:17): [True: 0, False: 136]
  ------------------
  821|    136|            a.zbuffer = (png_uc*)buffer;
  822|    136|            a.zbuffer_end = (png_uc*)buffer + len;
  823|    136|            if (png__do_zlib(&a, p, initial_size, 1, parse_header)) {
  ------------------
  |  Branch (823:17): [True: 56, False: 80]
  ------------------
  824|     56|                if (outlen) *outlen = (int)(a.zout - a.zout_start);
  ------------------
  |  Branch (824:21): [True: 56, False: 0]
  ------------------
  825|     56|                return a.zout_start;
  826|     56|            }
  827|     80|            else {
  828|     80|                PNG_FREE(a.zout_start);
  ------------------
  |  |   45|     80|#define PNG_FREE(p)              free(p)
  ------------------
  829|     80|                return NULL;
  830|     80|            }
  831|    136|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L11png__mallocEm:
   66|    200|        {
   67|    200|            return PNG_MALLOC(size);
  ------------------
  |  |   43|    200|#define PNG_MALLOC(sz)           malloc(sz)
  ------------------
   68|    200|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L12png__do_zlibEPNS0_9png__zbufEPciii:
  785|    136|        {
  786|    136|            a->zout_start = obuf;
  787|    136|            a->zout = obuf;
  788|    136|            a->zout_end = obuf + olen;
  789|    136|            a->z_expandable = exp;
  790|       |
  791|    136|            return png__parse_zlib(a, parse_header);
  792|    136|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L15png__parse_zlibEPNS0_9png__zbufEi:
  754|    136|        {
  755|    136|            int final, type;
  756|    136|            if (parse_header)
  ------------------
  |  Branch (756:17): [True: 136, False: 0]
  ------------------
  757|    136|                if (!png__parse_zlib_header(a)) return 0;
  ------------------
  |  Branch (757:21): [True: 0, False: 136]
  ------------------
  758|    136|            a->num_bits = 0;
  759|    136|            a->code_buffer = 0;
  760|  1.68k|            do {
  761|  1.68k|                final = png__zreceive(a, 1);
  762|  1.68k|                type = png__zreceive(a, 2);
  763|  1.68k|                if (type == 0) {
  ------------------
  |  Branch (763:21): [True: 68, False: 1.61k]
  ------------------
  764|     68|                    if (!png__parse_uncompressed_block(a)) return 0;
  ------------------
  |  Branch (764:25): [True: 4, False: 64]
  ------------------
  765|     68|                }
  766|  1.61k|                else if (type == 3) {
  ------------------
  |  Branch (766:26): [True: 20, False: 1.59k]
  ------------------
  767|     20|                    return 0;
  768|     20|                }
  769|  1.59k|                else {
  770|  1.59k|                    if (type == 1) {
  ------------------
  |  Branch (770:25): [True: 0, False: 1.59k]
  ------------------
  771|       |                        // use fixed code lengths
  772|      0|                        if (!png__zbuild_huffman(&a->z_length, png__zdefault_length, 288)) return 0;
  ------------------
  |  Branch (772:29): [True: 0, False: 0]
  ------------------
  773|      0|                        if (!png__zbuild_huffman(&a->z_distance, png__zdefault_distance, 32)) return 0;
  ------------------
  |  Branch (773:29): [True: 0, False: 0]
  ------------------
  774|      0|                    }
  775|  1.59k|                    else {
  776|  1.59k|                        if (!png__compute_huffman_codes(a)) return 0;
  ------------------
  |  Branch (776:29): [True: 28, False: 1.56k]
  ------------------
  777|  1.59k|                    }
  778|  1.56k|                    if (!png__parse_huffman_block(a)) return 0;
  ------------------
  |  Branch (778:25): [True: 28, False: 1.54k]
  ------------------
  779|  1.56k|                }
  780|  1.68k|            } while (!final);
  ------------------
  |  Branch (780:22): [True: 1.54k, False: 56]
  ------------------
  781|     56|            return 1;
  782|    136|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L22png__parse_zlib_headerEPNS0_9png__zbufE:
  711|    136|        {
  712|    136|            int cmf = png__zget8(a);
  713|    136|            int cm = cmf & 15;
  714|       |            /* int cinfo = cmf >> 4; */
  715|    136|            int flg = png__zget8(a);
  716|    136|            if (png__zeof(a)) return png__err("bad zlib header", "Corrupt PNG"); // zlib spec
  ------------------
  |  Branch (716:17): [True: 0, False: 136]
  ------------------
  717|    136|            if ((cmf * 256 + flg) % 31 != 0) return png__err("bad zlib header", "Corrupt PNG"); // zlib spec
  ------------------
  |  Branch (717:17): [True: 0, False: 136]
  ------------------
  718|    136|            if (flg & 32) return png__err("no preset dict", "Corrupt PNG"); // preset dictionary not allowed in png
  ------------------
  |  Branch (718:17): [True: 0, False: 136]
  ------------------
  719|    136|            if (cm != 8) return png__err("bad compression", "Corrupt PNG"); // DEFLATE required for png
  ------------------
  |  Branch (719:17): [True: 0, False: 136]
  ------------------
  720|       |            // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
  721|    136|            return 1;
  722|    136|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L10png__zget8EPNS0_9png__zbufE:
  482|  4.03M|        {
  483|  4.03M|            return png__zeof(z) ? 0 : *z->zbuffer++;
  ------------------
  |  Branch (483:20): [True: 8, False: 4.03M]
  ------------------
  484|  4.03M|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L9png__zeofEPNS0_9png__zbufE:
  477|  5.86M|        {
  478|  5.86M|            return (z->zbuffer >= z->zbuffer_end);
  479|  5.86M|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L13png__zreceiveEPNS0_9png__zbufEi:
  499|  1.31M|        {
  500|  1.31M|            unsigned int k;
  501|  1.31M|            if (z->num_bits < n) png__fill_bits(z);
  ------------------
  |  Branch (501:17): [True: 11.0k, False: 1.30M]
  ------------------
  502|  1.31M|            k = z->code_buffer & ((1 << n) - 1);
  503|  1.31M|            z->code_buffer >>= n;
  504|  1.31M|            z->num_bits -= n;
  505|  1.31M|            return k;
  506|  1.31M|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L14png__fill_bitsEPNS0_9png__zbufE:
  487|  1.84M|        {
  488|  4.03M|            do {
  489|  4.03M|                if (z->code_buffer >= (1U << z->num_bits)) {
  ------------------
  |  Branch (489:21): [True: 0, False: 4.03M]
  ------------------
  490|      0|                    z->zbuffer = z->zbuffer_end;  /* treat this as EOF so we fail. */
  491|      0|                    return;
  492|      0|                }
  493|  4.03M|                z->code_buffer |= (unsigned int)png__zget8(z) << z->num_bits;
  494|  4.03M|                z->num_bits += 8;
  495|  4.03M|            } while (z->num_bits <= 24);
  ------------------
  |  Branch (495:22): [True: 2.19M, False: 1.84M]
  ------------------
  496|  1.84M|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L29png__parse_uncompressed_blockEPNS0_9png__zbufE:
  682|     68|        {
  683|     68|            png_uc header[4];
  684|     68|            int len, nlen, k;
  685|     68|            if (a->num_bits & 7)
  ------------------
  |  Branch (685:17): [True: 68, False: 0]
  ------------------
  686|     68|                png__zreceive(a, a->num_bits & 7); // discard
  687|       |             // drain the bit-packed data into header
  688|     68|            k = 0;
  689|    188|            while (a->num_bits > 0) {
  ------------------
  |  Branch (689:20): [True: 120, False: 68]
  ------------------
  690|    120|                header[k++] = (png_uc)(a->code_buffer & 255); // suppress MSVC run-time check
  691|    120|                a->code_buffer >>= 8;
  692|    120|                a->num_bits -= 8;
  693|    120|            }
  694|     68|            if (a->num_bits < 0) return png__err("zlib corrupt", "Corrupt PNG");
  ------------------
  |  Branch (694:17): [True: 0, False: 68]
  ------------------
  695|       |            // now fill header the normal way
  696|    220|            while (k < 4)
  ------------------
  |  Branch (696:20): [True: 152, False: 68]
  ------------------
  697|    152|                header[k++] = png__zget8(a);
  698|     68|            len = header[1] * 256 + header[0];
  699|     68|            nlen = header[3] * 256 + header[2];
  700|     68|            if (nlen != (len ^ 0xffff)) return png__err("zlib corrupt", "Corrupt PNG");
  ------------------
  |  Branch (700:17): [True: 4, False: 64]
  ------------------
  701|     64|            if (a->zbuffer + len > a->zbuffer_end) return png__err("read past buffer", "Corrupt PNG");
  ------------------
  |  Branch (701:17): [True: 0, False: 64]
  ------------------
  702|     64|            if (a->zout + len > a->zout_end)
  ------------------
  |  Branch (702:17): [True: 64, False: 0]
  ------------------
  703|     64|                if (!png__zexpand(a, a->zout, len)) return 0;
  ------------------
  |  Branch (703:21): [True: 0, False: 64]
  ------------------
  704|     64|            memcpy(a->zout, a->zbuffer, len);
  705|     64|            a->zbuffer += len;
  706|     64|            a->zout += len;
  707|     64|            return 1;
  708|     64|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L12png__zexpandEPNS0_9png__zbufEPci:
  547|    576|        {
  548|    576|            char* q;
  549|    576|            unsigned int cur, limit, old_limit;
  550|    576|            z->zout = zout;
  551|    576|            if (!z->z_expandable) return png__err("output buffer limit", "Corrupt PNG");
  ------------------
  |  Branch (551:17): [True: 0, False: 576]
  ------------------
  552|    576|            cur = (unsigned int)(z->zout - z->zout_start);
  553|    576|            limit = old_limit = (unsigned)(z->zout_end - z->zout_start);
  554|    576|            if (UINT_MAX - cur < (unsigned)n) return png__err("outofmem", "Out of memory");
  ------------------
  |  Branch (554:17): [True: 0, False: 576]
  ------------------
  555|  1.16k|            while (cur + n > limit) {
  ------------------
  |  Branch (555:20): [True: 588, False: 576]
  ------------------
  556|    588|                if (limit > UINT_MAX / 2) return png__err("outofmem", "Out of memory");
  ------------------
  |  Branch (556:21): [True: 0, False: 588]
  ------------------
  557|    588|                limit *= 2;
  558|    588|            }
  559|    576|            q = (char*)PNG_REALLOC_SIZED(z->zout_start, old_limit, limit);
  ------------------
  |  |   46|    576|#define PNG_REALLOC_SIZED(p,oldsz,newsz) PNG_REALLOC(p,newsz)
  |  |  ------------------
  |  |  |  |   44|    576|#define PNG_REALLOC(p,newsz)     realloc(p,newsz)
  |  |  ------------------
  ------------------
  560|    576|            PNG_NOTUSED(old_limit);
  ------------------
  |  |   52|    576|#define PNG_NOTUSED(v)  (void)sizeof(v)
  ------------------
  561|    576|            if (q == NULL) return png__err("outofmem", "Out of memory");
  ------------------
  |  Branch (561:17): [True: 0, False: 576]
  ------------------
  562|    576|            z->zout_start = q;
  563|    576|            z->zout = q + cur;
  564|    576|            z->zout_end = q + limit;
  565|    576|            return 1;
  566|    576|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L19png__zbuild_huffmanEPNS0_13png__zhuffmanEPKhi:
  410|  4.73k|        {
  411|  4.73k|            int i, k = 0;
  412|  4.73k|            int code, next_code[16], sizes[17];
  413|       |
  414|       |            // DEFLATE spec for generating codes
  415|  4.73k|            memset(sizes, 0, sizeof(sizes));
  416|  4.73k|            memset(z->fast, 0, sizeof(z->fast));
  417|   516k|            for (i = 0; i < num; ++i)
  ------------------
  |  Branch (417:25): [True: 511k, False: 4.73k]
  ------------------
  418|   511k|                ++sizes[sizelist[i]];
  419|  4.73k|            sizes[0] = 0;
  420|  75.7k|            for (i = 1; i < 16; ++i)
  ------------------
  |  Branch (420:25): [True: 70.9k, False: 4.73k]
  ------------------
  421|  70.9k|                if (sizes[i] > (1 << i))
  ------------------
  |  Branch (421:21): [True: 0, False: 70.9k]
  ------------------
  422|      0|                    return png__err("bad sizes", "Corrupt PNG");
  423|  4.73k|            code = 0;
  424|  75.6k|            for (i = 1; i < 16; ++i) {
  ------------------
  |  Branch (424:25): [True: 70.9k, False: 4.72k]
  ------------------
  425|  70.9k|                next_code[i] = code;
  426|  70.9k|                z->firstcode[i] = (png__uint16)code;
  427|  70.9k|                z->firstsymbol[i] = (png__uint16)k;
  428|  70.9k|                code = (code + sizes[i]);
  429|  70.9k|                if (sizes[i])
  ------------------
  |  Branch (429:21): [True: 36.2k, False: 34.6k]
  ------------------
  430|  36.2k|                    if (code - 1 >= (1 << i)) return png__err("bad codelengths", "Corrupt PNG");
  ------------------
  |  Branch (430:25): [True: 4, False: 36.2k]
  ------------------
  431|  70.9k|                z->maxcode[i] = code << (16 - i); // preshift for inner loop
  432|  70.9k|                code <<= 1;
  433|  70.9k|                k += sizes[i];
  434|  70.9k|            }
  435|  4.72k|            z->maxcode[16] = 0x10000; // sentinel
  436|   516k|            for (i = 0; i < num; ++i) {
  ------------------
  |  Branch (436:25): [True: 511k, False: 4.72k]
  ------------------
  437|   511k|                int s = sizelist[i];
  438|   511k|                if (s) {
  ------------------
  |  Branch (438:21): [True: 325k, False: 185k]
  ------------------
  439|   325k|                    int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
  440|   325k|                    png__uint16 fastv = (png__uint16)((s << 9) | i);
  441|   325k|                    z->size[c] = (png_uc)s;
  442|   325k|                    z->value[c] = (png__uint16)i;
  443|   325k|                    if (s <= PNG__ZFAST_BITS) {
  ------------------
  |  |  377|   325k|#define PNG__ZFAST_BITS  9 // accelerate all cases in default tables
  ------------------
  |  Branch (443:25): [True: 241k, False: 83.9k]
  ------------------
  444|   241k|                        int j = png__bit_reverse(next_code[s], s);
  445|  2.62M|                        while (j < (1 << PNG__ZFAST_BITS)) {
  ------------------
  |  |  377|  2.62M|#define PNG__ZFAST_BITS  9 // accelerate all cases in default tables
  ------------------
  |  Branch (445:32): [True: 2.38M, False: 241k]
  ------------------
  446|  2.38M|                            z->fast[j] = fastv;
  447|  2.38M|                            j += (1 << s);
  448|  2.38M|                        }
  449|   241k|                    }
  450|   325k|                    ++next_code[s];
  451|   325k|                }
  452|   511k|            }
  453|  4.72k|            return 1;
  454|  4.73k|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L16png__bit_reverseEii:
  402|   441k|        {
  403|   441k|            PNG_ASSERT(bits <= 16);
  ------------------
  |  |   42|   441k|#define PNG_ASSERT assert
  ------------------
  404|       |            // to bit reverse n bits, reverse 16 and shift
  405|       |            // e.g. 11 bits, bit reverse and shift away 5
  406|   441k|            return png__bitreverse16(v) >> (16 - bits);
  407|   441k|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L17png__bitreverse16Ei:
  393|   441k|        {
  394|   441k|            n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1);
  395|   441k|            n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2);
  396|   441k|            n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4);
  397|   441k|            n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8);
  398|   441k|            return n;
  399|   441k|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L26png__compute_huffman_codesEPNS0_9png__zbufE:
  629|  1.59k|        {
  630|  1.59k|            static const png_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
  631|  1.59k|            png__zhuffman z_codelength;
  632|  1.59k|            png_uc lencodes[286 + 32 + 137];//padding for maximum single op
  633|  1.59k|            png_uc codelength_sizes[19];
  634|  1.59k|            int i, n;
  635|       |
  636|  1.59k|            int hlit = png__zreceive(a, 5) + 257;
  637|  1.59k|            int hdist = png__zreceive(a, 5) + 1;
  638|  1.59k|            int hclen = png__zreceive(a, 4) + 4;
  639|  1.59k|            int ntot = hlit + hdist;
  640|       |
  641|  1.59k|            memset(codelength_sizes, 0, sizeof(codelength_sizes));
  642|  27.4k|            for (i = 0; i < hclen; ++i) {
  ------------------
  |  Branch (642:25): [True: 25.8k, False: 1.59k]
  ------------------
  643|  25.8k|                int s = png__zreceive(a, 3);
  644|  25.8k|                codelength_sizes[length_dezigzag[i]] = (png_uc)s;
  645|  25.8k|            }
  646|  1.59k|            if (!png__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
  ------------------
  |  Branch (646:17): [True: 4, False: 1.59k]
  ------------------
  647|       |
  648|  1.59k|            n = 0;
  649|   303k|            while (n < ntot) {
  ------------------
  |  Branch (649:20): [True: 301k, False: 1.56k]
  ------------------
  650|   301k|                int c = png__zhuffman_decode(a, &z_codelength);
  651|   301k|                if (c < 0 || c >= 19) return png__err("bad codelengths", "Corrupt PNG");
  ------------------
  |  Branch (651:21): [True: 8, False: 301k]
  |  Branch (651:30): [True: 0, False: 301k]
  ------------------
  652|   301k|                if (c < 16)
  ------------------
  |  Branch (652:21): [True: 267k, False: 34.1k]
  ------------------
  653|   267k|                    lencodes[n++] = (png_uc)c;
  654|  34.1k|                else {
  655|  34.1k|                    png_uc fill = 0;
  656|  34.1k|                    if (c == 16) {
  ------------------
  |  Branch (656:25): [True: 13.7k, False: 20.3k]
  ------------------
  657|  13.7k|                        c = png__zreceive(a, 2) + 3;
  658|  13.7k|                        if (n == 0) return png__err("bad codelengths", "Corrupt PNG");
  ------------------
  |  Branch (658:29): [True: 0, False: 13.7k]
  ------------------
  659|  13.7k|                        fill = lencodes[n - 1];
  660|  13.7k|                    }
  661|  20.3k|                    else if (c == 17) {
  ------------------
  |  Branch (661:30): [True: 19.4k, False: 964]
  ------------------
  662|  19.4k|                        c = png__zreceive(a, 3) + 3;
  663|  19.4k|                    }
  664|    964|                    else if (c == 18) {
  ------------------
  |  Branch (664:30): [True: 964, False: 0]
  ------------------
  665|    964|                        c = png__zreceive(a, 7) + 11;
  666|    964|                    }
  667|      0|                    else {
  668|      0|                        return png__err("bad codelengths", "Corrupt PNG");
  669|      0|                    }
  670|  34.1k|                    if (ntot - n < c) return png__err("bad codelengths", "Corrupt PNG");
  ------------------
  |  Branch (670:25): [True: 16, False: 34.1k]
  ------------------
  671|  34.1k|                    memset(lencodes + n, fill, c);
  672|  34.1k|                    n += c;
  673|  34.1k|                }
  674|   301k|            }
  675|  1.56k|            if (n != ntot) return png__err("bad codelengths", "Corrupt PNG");
  ------------------
  |  Branch (675:17): [True: 0, False: 1.56k]
  ------------------
  676|  1.56k|            if (!png__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
  ------------------
  |  Branch (676:17): [True: 0, False: 1.56k]
  ------------------
  677|  1.56k|            if (!png__zbuild_huffman(&a->z_distance, lencodes + hlit, hdist)) return 0;
  ------------------
  |  Branch (677:17): [True: 0, False: 1.56k]
  ------------------
  678|  1.56k|            return 1;
  679|  1.56k|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L20png__zhuffman_decodeEPNS0_9png__zbufEPNS0_13png__zhuffmanE:
  528|  5.11M|        {
  529|  5.11M|            int b, s;
  530|  5.11M|            if (a->num_bits < 16) {
  ------------------
  |  Branch (530:17): [True: 1.83M, False: 3.28M]
  ------------------
  531|  1.83M|                if (png__zeof(a)) {
  ------------------
  |  Branch (531:21): [True: 24, False: 1.83M]
  ------------------
  532|     24|                    return -1;   /* report error for unexpected end of data. */
  533|     24|                }
  534|  1.83M|                png__fill_bits(a);
  535|  1.83M|            }
  536|  5.11M|            b = z->fast[a->code_buffer & PNG__ZFAST_MASK];
  ------------------
  |  |  378|  5.11M|#define PNG__ZFAST_MASK  ((1 << PNG__ZFAST_BITS) - 1)
  |  |  ------------------
  |  |  |  |  377|  5.11M|#define PNG__ZFAST_BITS  9 // accelerate all cases in default tables
  |  |  ------------------
  ------------------
  537|  5.11M|            if (b) {
  ------------------
  |  Branch (537:17): [True: 4.91M, False: 199k]
  ------------------
  538|  4.91M|                s = b >> 9;
  539|  4.91M|                a->code_buffer >>= s;
  540|  4.91M|                a->num_bits -= s;
  541|  4.91M|                return b & 511;
  542|  4.91M|            }
  543|   199k|            return png__zhuffman_decode_slowpath(a, z);
  544|  5.11M|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L29png__zhuffman_decode_slowpathEPNS0_9png__zbufEPNS0_13png__zhuffmanE:
  509|   199k|        {
  510|   199k|            int b, s, k;
  511|       |            // not resolved by fast table, so compute it the slow way
  512|       |            // use jpeg approach, which requires MSbits at top
  513|   199k|            k = png__bit_reverse(a->code_buffer, 16);
  514|   199k|            for (s = PNG__ZFAST_BITS + 1; ; ++s)
  ------------------
  |  |  377|   199k|#define PNG__ZFAST_BITS  9 // accelerate all cases in default tables
  ------------------
  515|   393k|                if (k < z->maxcode[s])
  ------------------
  |  Branch (515:21): [True: 199k, False: 193k]
  ------------------
  516|   199k|                    break;
  517|   199k|            if (s >= 16) return -1; // invalid code!
  ------------------
  |  Branch (517:17): [True: 12, False: 199k]
  ------------------
  518|       |            // code size is s, so:
  519|   199k|            b = (k >> (16 - s)) - z->firstcode[s] + z->firstsymbol[s];
  520|   199k|            if (b >= sizeof(z->size)) return -1; // some data was corrupt somewhere!
  ------------------
  |  Branch (520:17): [True: 0, False: 199k]
  ------------------
  521|   199k|            if (z->size[b] != s) return -1;  // was originally an assert, but report failure instead.
  ------------------
  |  Branch (521:17): [True: 0, False: 199k]
  ------------------
  522|   199k|            a->code_buffer >>= s;
  523|   199k|            a->num_bits -= s;
  524|   199k|            return z->value[b];
  525|   199k|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L24png__parse_huffman_blockEPNS0_9png__zbufE:
  583|  1.56k|        {
  584|  1.56k|            SIMD_PERF_FUNC();
  585|       |
  586|  1.56k|            char* zout = a->zout;
  587|  3.66M|            for (;;) {
  588|  3.66M|                int z = png__zhuffman_decode(a, &a->z_length);
  589|  3.66M|                if (z < 256) {
  ------------------
  |  Branch (589:21): [True: 2.52M, False: 1.14M]
  ------------------
  590|  2.52M|                    if (z < 0) return png__err("bad huffman code", "Corrupt PNG"); // error in huffman codes
  ------------------
  |  Branch (590:25): [True: 16, False: 2.52M]
  ------------------
  591|  2.52M|                    if (zout >= a->zout_end) {
  ------------------
  |  Branch (591:25): [True: 160, False: 2.52M]
  ------------------
  592|    160|                        if (!png__zexpand(a, zout, 1)) return 0;
  ------------------
  |  Branch (592:29): [True: 0, False: 160]
  ------------------
  593|    160|                        zout = a->zout;
  594|    160|                    }
  595|  2.52M|                    *zout++ = (char)z;
  596|  2.52M|                }
  597|  1.14M|                else {
  598|  1.14M|                    png_uc* p;
  599|  1.14M|                    int len, dist;
  600|  1.14M|                    if (z == 256) {
  ------------------
  |  Branch (600:25): [True: 1.54k, False: 1.14M]
  ------------------
  601|  1.54k|                        a->zout = zout;
  602|  1.54k|                        return 1;
  603|  1.54k|                    }
  604|  1.14M|                    z -= 257;
  605|  1.14M|                    len = png__zlength_base[z];
  606|  1.14M|                    if (png__zlength_extra[z]) len += png__zreceive(a, png__zlength_extra[z]);
  ------------------
  |  Branch (606:25): [True: 381k, False: 762k]
  ------------------
  607|  1.14M|                    z = png__zhuffman_decode(a, &a->z_distance);
  608|  1.14M|                    if (z < 0) return png__err("bad huffman code", "Corrupt PNG");
  ------------------
  |  Branch (608:25): [True: 12, False: 1.14M]
  ------------------
  609|  1.14M|                    dist = png__zdist_base[z];
  610|  1.14M|                    if (png__zdist_extra[z]) dist += png__zreceive(a, png__zdist_extra[z]);
  ------------------
  |  Branch (610:25): [True: 865k, False: 278k]
  ------------------
  611|  1.14M|                    if (zout - a->zout_start < dist) return png__err("bad dist", "Corrupt PNG");
  ------------------
  |  Branch (611:25): [True: 0, False: 1.14M]
  ------------------
  612|  1.14M|                    if (zout + len > a->zout_end) {
  ------------------
  |  Branch (612:25): [True: 352, False: 1.14M]
  ------------------
  613|    352|                        if (!png__zexpand(a, zout, len)) return 0;
  ------------------
  |  Branch (613:29): [True: 0, False: 352]
  ------------------
  614|    352|                        zout = a->zout;
  615|    352|                    }
  616|  1.14M|                    p = (png_uc*)(zout - dist);
  617|  1.14M|                    if (dist == 1) { // run of one byte; common in images.
  ------------------
  |  Branch (617:25): [True: 35.0k, False: 1.10M]
  ------------------
  618|  35.0k|                        png_uc v = *p;
  619|  6.60M|                        if (len) { do *zout++ = v; while (--len); }
  ------------------
  |  Branch (619:29): [True: 35.0k, False: 0]
  |  Branch (619:59): [True: 6.56M, False: 35.0k]
  ------------------
  620|  35.0k|                    }
  621|  1.10M|                    else {
  622|  88.9M|                        if (len) { do *zout++ = *p++; while (--len); }
  ------------------
  |  Branch (622:29): [True: 1.10M, False: 0]
  |  Branch (622:62): [True: 87.7M, False: 1.10M]
  ------------------
  623|  1.10M|                    }
  624|  1.14M|                }
  625|  3.66M|            }
  626|  1.56k|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L21png__create_png_imageEPNS0_8png__pngEPhjiiii:
 1168|     56|        {
 1169|     56|            int bytes = (depth == 16 ? 2 : 1);
  ------------------
  |  Branch (1169:26): [True: 4, False: 52]
  ------------------
 1170|     56|            int out_bytes = out_n * bytes;
 1171|     56|            png_uc* final;
 1172|     56|            int p;
 1173|     56|            if (!interlaced)
  ------------------
  |  Branch (1173:17): [True: 52, False: 4]
  ------------------
 1174|     52|                return png__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
 1175|       |
 1176|       |            // de-interlacing
 1177|      4|            final = (png_uc*)png__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
 1178|      8|            for (p = 0; p < 7; ++p) {
  ------------------
  |  Branch (1178:25): [True: 8, False: 0]
  ------------------
 1179|      8|                int xorig[] = { 0,4,0,2,0,1,0 };
 1180|      8|                int yorig[] = { 0,0,4,0,2,0,1 };
 1181|      8|                int xspc[] = { 8,8,4,4,2,2,1 };
 1182|      8|                int yspc[] = { 8,8,8,4,4,2,2 };
 1183|      8|                int i, j, x, y;
 1184|       |                // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
 1185|      8|                x = (a->s->img_x - xorig[p] + xspc[p] - 1) / xspc[p];
 1186|      8|                y = (a->s->img_y - yorig[p] + yspc[p] - 1) / yspc[p];
 1187|      8|                if (x && y) {
  ------------------
  |  Branch (1187:21): [True: 8, False: 0]
  |  Branch (1187:26): [True: 8, False: 0]
  ------------------
 1188|      8|                    png__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
 1189|      8|                    if (!png__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
  ------------------
  |  Branch (1189:25): [True: 4, False: 4]
  ------------------
 1190|      4|                        PNG_FREE(final);
  ------------------
  |  |   45|      4|#define PNG_FREE(p)              free(p)
  ------------------
 1191|      4|                        return 0;
 1192|      4|                    }
 1193|    260|                    for (j = 0; j < y; ++j) {
  ------------------
  |  Branch (1193:33): [True: 256, False: 4]
  ------------------
 1194|  6.29M|                        for (i = 0; i < x; ++i) {
  ------------------
  |  Branch (1194:37): [True: 6.29M, False: 256]
  ------------------
 1195|  6.29M|                            int out_y = j * yspc[p] + yorig[p];
 1196|  6.29M|                            int out_x = i * xspc[p] + xorig[p];
 1197|  6.29M|                            memcpy(final + out_y * a->s->img_x * out_bytes + out_x * out_bytes,
 1198|  6.29M|                                a->out + (j * x + i) * out_bytes, out_bytes);
 1199|  6.29M|                        }
 1200|    256|                    }
 1201|      4|                    PNG_FREE(a->out);
  ------------------
  |  |   45|      4|#define PNG_FREE(p)              free(p)
  ------------------
 1202|      4|                    image_data += img_len;
 1203|      4|                    image_data_len -= img_len;
 1204|      4|                }
 1205|      8|            }
 1206|      0|            a->out = final;
 1207|       |
 1208|      0|            return 1;
 1209|      4|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L25png__create_png_image_rawEPNS0_8png__pngEPhjijjii:
  949|     60|        {
  950|     60|            int bytes = (depth == 16 ? 2 : 1);
  ------------------
  |  Branch (950:26): [True: 8, False: 52]
  ------------------
  951|     60|            png__context* s = a->s;
  952|     60|            png__uint32 i, j, stride = x * out_n * bytes;
  953|     60|            png__uint32 img_len, img_width_bytes;
  954|     60|            int k;
  955|     60|            int img_n = s->img_n; // copy it into a local for later
  956|       |
  957|     60|            int output_bytes = out_n * bytes;
  958|     60|            int filter_bytes = img_n * bytes;
  959|     60|            int width = x;
  960|       |
  961|     60|            PNG_ASSERT(out_n == s->img_n || out_n == s->img_n + 1);
  ------------------
  |  |   42|     60|#define PNG_ASSERT assert
  ------------------
  962|     60|            a->out = (png_uc*)png__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
  963|     60|            if (!a->out) return png__err("outofmem", "Out of memory");
  ------------------
  |  Branch (963:17): [True: 0, False: 60]
  ------------------
  964|       |
  965|     60|            if (!png__mad3sizes_valid(img_n, x, depth, 7)) return png__err("too large", "Corrupt PNG");
  ------------------
  |  Branch (965:17): [True: 0, False: 60]
  ------------------
  966|     60|            img_width_bytes = (((img_n * x * depth) + 7) >> 3);
  967|     60|            img_len = (img_width_bytes + 1) * y;
  968|       |
  969|       |            // we used to check for exact match between raw_len and img_len on non-interlaced PNGs,
  970|       |            // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros),
  971|       |            // so just check for raw_len < img_len always.
  972|     60|            if (raw_len < img_len) return png__err("not enough pixels", "Corrupt PNG");
  ------------------
  |  Branch (972:17): [True: 0, False: 60]
  ------------------
  973|       |
  974|    764|            for (j = 0; j < y; ++j) {
  ------------------
  |  Branch (974:25): [True: 760, False: 4]
  ------------------
  975|    760|                png_uc* cur = a->out + stride * j;
  976|    760|                png_uc* prior;
  977|    760|                int filter = *raw++;
  978|       |
  979|    760|                if (filter > 4)
  ------------------
  |  Branch (979:21): [True: 56, False: 704]
  ------------------
  980|     56|                    return png__err("invalid filter", "Corrupt PNG");
  981|       |
  982|    704|                if (depth < 8) {
  ------------------
  |  Branch (982:21): [True: 0, False: 704]
  ------------------
  983|      0|                    if (img_width_bytes > x) return png__err("invalid width", "Corrupt PNG");
  ------------------
  |  Branch (983:25): [True: 0, False: 0]
  ------------------
  984|      0|                    cur += x * out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
  985|      0|                    filter_bytes = 1;
  986|      0|                    width = img_width_bytes;
  987|      0|                }
  988|    704|                prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above
  989|       |
  990|       |                // if first row, use special filter that doesn't sample previous row
  991|    704|                if (j == 0) filter = first_row_filter[filter];
  ------------------
  |  Branch (991:21): [True: 60, False: 644]
  ------------------
  992|       |
  993|       |                // handle first byte explicitly
  994|  2.41k|                for (k = 0; k < filter_bytes; ++k) {
  ------------------
  |  Branch (994:29): [True: 1.70k, False: 704]
  ------------------
  995|  1.70k|                    switch (filter) {
  ------------------
  |  Branch (995:29): [True: 0, False: 1.70k]
  ------------------
  996|    548|                    case PNG__F_none: cur[k] = raw[k]; break;
  ------------------
  |  Branch (996:21): [True: 548, False: 1.16k]
  ------------------
  997|    168|                    case PNG__F_sub: cur[k] = raw[k]; break;
  ------------------
  |  Branch (997:21): [True: 168, False: 1.54k]
  ------------------
  998|     84|                    case PNG__F_up: cur[k] = PNG__BYTECAST(raw[k] + prior[k]); break;
  ------------------
  |  |   55|     84|#define PNG__BYTECAST(x)  ((png_uc) ((x) & 255))  // truncate int to byte without warnings
  ------------------
  |  Branch (998:21): [True: 84, False: 1.62k]
  ------------------
  999|     40|                    case PNG__F_avg: cur[k] = PNG__BYTECAST(raw[k] + (prior[k] >> 1)); break;
  ------------------
  |  |   55|     40|#define PNG__BYTECAST(x)  ((png_uc) ((x) & 255))  // truncate int to byte without warnings
  ------------------
  |  Branch (999:21): [True: 40, False: 1.66k]
  ------------------
 1000|    860|                    case PNG__F_paeth: cur[k] = PNG__BYTECAST(raw[k] + png__paeth(0, prior[k], 0)); break;
  ------------------
  |  |   55|    860|#define PNG__BYTECAST(x)  ((png_uc) ((x) & 255))  // truncate int to byte without warnings
  ------------------
  |  Branch (1000:21): [True: 860, False: 848]
  ------------------
 1001|      0|                    case PNG__F_avg_first: cur[k] = raw[k]; break;
  ------------------
  |  Branch (1001:21): [True: 0, False: 1.70k]
  ------------------
 1002|      8|                    case PNG__F_paeth_first: cur[k] = raw[k]; break;
  ------------------
  |  Branch (1002:21): [True: 8, False: 1.70k]
  ------------------
 1003|  1.70k|                    }
 1004|  1.70k|                }
 1005|       |
 1006|    704|                if (depth == 8) {
  ------------------
  |  Branch (1006:21): [True: 300, False: 404]
  ------------------
 1007|    300|                    if (img_n != out_n)
  ------------------
  |  Branch (1007:25): [True: 300, False: 0]
  ------------------
 1008|    300|                        cur[img_n] = 255; // first pixel
 1009|    300|                    raw += img_n;
 1010|    300|                    cur += out_n;
 1011|    300|                    prior += out_n;
 1012|    300|                }
 1013|    404|                else if (depth == 16) {
  ------------------
  |  Branch (1013:26): [True: 404, False: 0]
  ------------------
 1014|    404|                    if (img_n != out_n) {
  ------------------
  |  Branch (1014:25): [True: 0, False: 404]
  ------------------
 1015|      0|                        cur[filter_bytes] = 255; // first pixel top byte
 1016|      0|                        cur[filter_bytes + 1] = 255; // first pixel bottom byte
 1017|      0|                    }
 1018|    404|                    raw += filter_bytes;
 1019|    404|                    cur += output_bytes;
 1020|    404|                    prior += output_bytes;
 1021|    404|                }
 1022|      0|                else {
 1023|      0|                    raw += 1;
 1024|      0|                    cur += 1;
 1025|      0|                    prior += 1;
 1026|      0|                }
 1027|       |
 1028|       |                // this is a little gross, so that we don't switch per-pixel or per-component
 1029|    704|                if (depth < 8 || img_n == out_n) {
  ------------------
  |  Branch (1029:21): [True: 0, False: 704]
  |  Branch (1029:34): [True: 404, False: 300]
  ------------------
 1030|    404|                    int nk = (width - 1) * filter_bytes;
 1031|    404|#define PNG__CASE(f) \
 1032|    404|             case f:     \
 1033|    404|                for (k=0; k < nk; ++k)
 1034|    404|                    switch (filter) {
  ------------------
  |  Branch (1034:29): [True: 0, False: 404]
  ------------------
 1035|       |                        // "none" filter turns into a memcpy here; make that explicit.
 1036|    148|                    case PNG__F_none:         memcpy(cur, raw, nk); break;
  ------------------
  |  Branch (1036:21): [True: 148, False: 256]
  ------------------
 1037|  2.94M|                        PNG__CASE(PNG__F_sub) { cur[k] = PNG__BYTECAST(raw[k] + cur[k - filter_bytes]); } break;
  ------------------
  |  | 1032|     60|             case f:     \
  |  |  ------------------
  |  |  |  Branch (1032:14): [True: 60, False: 344]
  |  |  ------------------
  |  | 1033|  2.94M|                for (k=0; k < nk; ++k)
  |  |  ------------------
  |  |  |  Branch (1033:27): [True: 2.94M, False: 60]
  |  |  ------------------
  ------------------
                                      PNG__CASE(PNG__F_sub) { cur[k] = PNG__BYTECAST(raw[k] + cur[k - filter_bytes]); } break;
  ------------------
  |  |   55|  2.94M|#define PNG__BYTECAST(x)  ((png_uc) ((x) & 255))  // truncate int to byte without warnings
  ------------------
 1038|  1.17M|                        PNG__CASE(PNG__F_up) { cur[k] = PNG__BYTECAST(raw[k] + prior[k]); } break;
  ------------------
  |  | 1032|     24|             case f:     \
  |  |  ------------------
  |  |  |  Branch (1032:14): [True: 24, False: 380]
  |  |  ------------------
  |  | 1033|  1.17M|                for (k=0; k < nk; ++k)
  |  |  ------------------
  |  |  |  Branch (1033:27): [True: 1.17M, False: 24]
  |  |  ------------------
  ------------------
                                      PNG__CASE(PNG__F_up) { cur[k] = PNG__BYTECAST(raw[k] + prior[k]); } break;
  ------------------
  |  |   55|  1.17M|#define PNG__BYTECAST(x)  ((png_uc) ((x) & 255))  // truncate int to byte without warnings
  ------------------
 1039|   983k|                        PNG__CASE(PNG__F_avg) { cur[k] = PNG__BYTECAST(raw[k] + ((prior[k] + cur[k - filter_bytes]) >> 1)); } break;
  ------------------
  |  | 1032|     20|             case f:     \
  |  |  ------------------
  |  |  |  Branch (1032:14): [True: 20, False: 384]
  |  |  ------------------
  |  | 1033|   983k|                for (k=0; k < nk; ++k)
  |  |  ------------------
  |  |  |  Branch (1033:27): [True: 983k, False: 20]
  |  |  ------------------
  ------------------
                                      PNG__CASE(PNG__F_avg) { cur[k] = PNG__BYTECAST(raw[k] + ((prior[k] + cur[k - filter_bytes]) >> 1)); } break;
  ------------------
  |  |   55|   983k|#define PNG__BYTECAST(x)  ((png_uc) ((x) & 255))  // truncate int to byte without warnings
  ------------------
 1040|  7.27M|                        PNG__CASE(PNG__F_paeth) { cur[k] = PNG__BYTECAST(raw[k] + png__paeth(cur[k - filter_bytes], prior[k], prior[k - filter_bytes])); } break;
  ------------------
  |  | 1032|    148|             case f:     \
  |  |  ------------------
  |  |  |  Branch (1032:14): [True: 148, False: 256]
  |  |  ------------------
  |  | 1033|  7.27M|                for (k=0; k < nk; ++k)
  |  |  ------------------
  |  |  |  Branch (1033:27): [True: 7.27M, False: 148]
  |  |  ------------------
  ------------------
                                      PNG__CASE(PNG__F_paeth) { cur[k] = PNG__BYTECAST(raw[k] + png__paeth(cur[k - filter_bytes], prior[k], prior[k - filter_bytes])); } break;
  ------------------
  |  |   55|  7.27M|#define PNG__BYTECAST(x)  ((png_uc) ((x) & 255))  // truncate int to byte without warnings
  ------------------
 1041|      0|                        PNG__CASE(PNG__F_avg_first) { cur[k] = PNG__BYTECAST(raw[k] + (cur[k - filter_bytes] >> 1)); } break;
  ------------------
  |  | 1032|      0|             case f:     \
  |  |  ------------------
  |  |  |  Branch (1032:14): [True: 0, False: 404]
  |  |  ------------------
  |  | 1033|      0|                for (k=0; k < nk; ++k)
  |  |  ------------------
  |  |  |  Branch (1033:27): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                      PNG__CASE(PNG__F_avg_first) { cur[k] = PNG__BYTECAST(raw[k] + (cur[k - filter_bytes] >> 1)); } break;
  ------------------
  |  |   55|      0|#define PNG__BYTECAST(x)  ((png_uc) ((x) & 255))  // truncate int to byte without warnings
  ------------------
 1042|   196k|                        PNG__CASE(PNG__F_paeth_first) { cur[k] = PNG__BYTECAST(raw[k] + png__paeth(cur[k - filter_bytes], 0, 0)); } break;
  ------------------
  |  | 1032|      4|             case f:     \
  |  |  ------------------
  |  |  |  Branch (1032:14): [True: 4, False: 400]
  |  |  ------------------
  |  | 1033|   196k|                for (k=0; k < nk; ++k)
  |  |  ------------------
  |  |  |  Branch (1033:27): [True: 196k, False: 4]
  |  |  ------------------
  ------------------
                                      PNG__CASE(PNG__F_paeth_first) { cur[k] = PNG__BYTECAST(raw[k] + png__paeth(cur[k - filter_bytes], 0, 0)); } break;
  ------------------
  |  |   55|   196k|#define PNG__BYTECAST(x)  ((png_uc) ((x) & 255))  // truncate int to byte without warnings
  ------------------
 1043|    404|                    }
 1044|    404|#undef PNG__CASE
 1045|    404|                    raw += nk;
 1046|    404|                }
 1047|    300|                else {
 1048|    300|                    PNG_ASSERT(img_n + 1 == out_n);
  ------------------
  |  |   42|    300|#define PNG_ASSERT assert
  ------------------
 1049|    300|#define PNG__CASE(f) \
 1050|    300|             case f:     \
 1051|    300|                for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
 1052|    300|                   for (k=0; k < filter_bytes; ++k)
 1053|    300|                    switch (filter) {
  ------------------
  |  Branch (1053:29): [True: 0, False: 300]
  ------------------
 1054|  47.4k|                        PNG__CASE(PNG__F_none) { cur[k] = raw[k]; } break;
  ------------------
  |  | 1050|     84|             case f:     \
  |  |  ------------------
  |  |  |  Branch (1050:14): [True: 84, False: 216]
  |  |  ------------------
  |  | 1051|  15.8k|                for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
  |  |  ------------------
  |  |  |  Branch (1051:29): [True: 15.8k, False: 84]
  |  |  ------------------
  |  | 1052|  63.2k|                   for (k=0; k < filter_bytes; ++k)
  |  |  ------------------
  |  |  |  Branch (1052:30): [True: 47.4k, False: 15.8k]
  |  |  ------------------
  ------------------
 1055|  7.15k|                        PNG__CASE(PNG__F_sub) { cur[k] = PNG__BYTECAST(raw[k] + cur[k - output_bytes]); } break;
  ------------------
  |  | 1050|     16|             case f:     \
  |  |  ------------------
  |  |  |  Branch (1050:14): [True: 16, False: 284]
  |  |  ------------------
  |  | 1051|  2.40k|                for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
  |  |  ------------------
  |  |  |  Branch (1051:29): [True: 2.38k, False: 16]
  |  |  ------------------
  |  | 1052|  9.53k|                   for (k=0; k < filter_bytes; ++k)
  |  |  ------------------
  |  |  |  Branch (1052:30): [True: 7.15k, False: 2.38k]
  |  |  ------------------
  ------------------
                                      PNG__CASE(PNG__F_sub) { cur[k] = PNG__BYTECAST(raw[k] + cur[k - output_bytes]); } break;
  ------------------
  |  |   55|  7.15k|#define PNG__BYTECAST(x)  ((png_uc) ((x) & 255))  // truncate int to byte without warnings
  ------------------
 1056|  5.36k|                        PNG__CASE(PNG__F_up) { cur[k] = PNG__BYTECAST(raw[k] + prior[k]); } break;
  ------------------
  |  | 1050|     12|             case f:     \
  |  |  ------------------
  |  |  |  Branch (1050:14): [True: 12, False: 288]
  |  |  ------------------
  |  | 1051|  1.80k|                for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
  |  |  ------------------
  |  |  |  Branch (1051:29): [True: 1.78k, False: 12]
  |  |  ------------------
  |  | 1052|  7.15k|                   for (k=0; k < filter_bytes; ++k)
  |  |  ------------------
  |  |  |  Branch (1052:30): [True: 5.36k, False: 1.78k]
  |  |  ------------------
  ------------------
                                      PNG__CASE(PNG__F_up) { cur[k] = PNG__BYTECAST(raw[k] + prior[k]); } break;
  ------------------
  |  |   55|  5.36k|#define PNG__BYTECAST(x)  ((png_uc) ((x) & 255))  // truncate int to byte without warnings
  ------------------
 1057|      0|                        PNG__CASE(PNG__F_avg) { cur[k] = PNG__BYTECAST(raw[k] + ((prior[k] + cur[k - output_bytes]) >> 1)); } break;
  ------------------
  |  | 1050|      0|             case f:     \
  |  |  ------------------
  |  |  |  Branch (1050:14): [True: 0, False: 300]
  |  |  ------------------
  |  | 1051|      0|                for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
  |  |  ------------------
  |  |  |  Branch (1051:29): [True: 0, False: 0]
  |  |  ------------------
  |  | 1052|      0|                   for (k=0; k < filter_bytes; ++k)
  |  |  ------------------
  |  |  |  Branch (1052:30): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                      PNG__CASE(PNG__F_avg) { cur[k] = PNG__BYTECAST(raw[k] + ((prior[k] + cur[k - output_bytes]) >> 1)); } break;
  ------------------
  |  |   55|      0|#define PNG__BYTECAST(x)  ((png_uc) ((x) & 255))  // truncate int to byte without warnings
  ------------------
 1058|  84.0k|                        PNG__CASE(PNG__F_paeth) { cur[k] = PNG__BYTECAST(raw[k] + png__paeth(cur[k - output_bytes], prior[k], prior[k - output_bytes])); } break;
  ------------------
  |  | 1050|    188|             case f:     \
  |  |  ------------------
  |  |  |  Branch (1050:14): [True: 188, False: 112]
  |  |  ------------------
  |  | 1051|  28.2k|                for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
  |  |  ------------------
  |  |  |  Branch (1051:29): [True: 28.0k, False: 188]
  |  |  ------------------
  |  | 1052|   112k|                   for (k=0; k < filter_bytes; ++k)
  |  |  ------------------
  |  |  |  Branch (1052:30): [True: 84.0k, False: 28.0k]
  |  |  ------------------
  ------------------
                                      PNG__CASE(PNG__F_paeth) { cur[k] = PNG__BYTECAST(raw[k] + png__paeth(cur[k - output_bytes], prior[k], prior[k - output_bytes])); } break;
  ------------------
  |  |   55|  84.0k|#define PNG__BYTECAST(x)  ((png_uc) ((x) & 255))  // truncate int to byte without warnings
  ------------------
 1059|      0|                        PNG__CASE(PNG__F_avg_first) { cur[k] = PNG__BYTECAST(raw[k] + (cur[k - output_bytes] >> 1)); } break;
  ------------------
  |  | 1050|      0|             case f:     \
  |  |  ------------------
  |  |  |  Branch (1050:14): [True: 0, False: 300]
  |  |  ------------------
  |  | 1051|      0|                for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
  |  |  ------------------
  |  |  |  Branch (1051:29): [True: 0, False: 0]
  |  |  ------------------
  |  | 1052|      0|                   for (k=0; k < filter_bytes; ++k)
  |  |  ------------------
  |  |  |  Branch (1052:30): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                      PNG__CASE(PNG__F_avg_first) { cur[k] = PNG__BYTECAST(raw[k] + (cur[k - output_bytes] >> 1)); } break;
  ------------------
  |  |   55|      0|#define PNG__BYTECAST(x)  ((png_uc) ((x) & 255))  // truncate int to byte without warnings
  ------------------
 1060|      0|                        PNG__CASE(PNG__F_paeth_first) { cur[k] = PNG__BYTECAST(raw[k] + png__paeth(cur[k - output_bytes], 0, 0)); } break;
  ------------------
  |  | 1050|      0|             case f:     \
  |  |  ------------------
  |  |  |  Branch (1050:14): [True: 0, False: 300]
  |  |  ------------------
  |  | 1051|      0|                for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
  |  |  ------------------
  |  |  |  Branch (1051:29): [True: 0, False: 0]
  |  |  ------------------
  |  | 1052|      0|                   for (k=0; k < filter_bytes; ++k)
  |  |  ------------------
  |  |  |  Branch (1052:30): [True: 0, False: 0]
  |  |  ------------------
  ------------------
                                      PNG__CASE(PNG__F_paeth_first) { cur[k] = PNG__BYTECAST(raw[k] + png__paeth(cur[k - output_bytes], 0, 0)); } break;
  ------------------
  |  |   55|      0|#define PNG__BYTECAST(x)  ((png_uc) ((x) & 255))  // truncate int to byte without warnings
  ------------------
 1061|    300|                    }
 1062|    300|#undef PNG__CASE
 1063|       |
 1064|       |                    // the loop above sets the high byte of the pixels' alpha, but for
 1065|       |                    // 16 bit png files we also need the low byte set. we'll do that here.
 1066|    300|                    if (depth == 16) {
  ------------------
  |  Branch (1066:25): [True: 0, False: 300]
  ------------------
 1067|      0|                        cur = a->out + stride * j; // start at the beginning of the row again
 1068|      0|                        for (i = 0; i < x; ++i, cur += output_bytes) {
  ------------------
  |  Branch (1068:37): [True: 0, False: 0]
  ------------------
 1069|      0|                            cur[filter_bytes + 1] = 255;
 1070|      0|                        }
 1071|      0|                    }
 1072|    300|                }
 1073|    704|            }
 1074|       |
 1075|       |            // we make a separate pass to expand bits to pixels; for performance,
 1076|       |            // this could run two scanlines behind the above code, so it won't
 1077|       |            // intefere with filtering but will still be in the cache.
 1078|      4|            if (depth < 8) {
  ------------------
  |  Branch (1078:17): [True: 0, False: 4]
  ------------------
 1079|      0|                for (j = 0; j < y; ++j) {
  ------------------
  |  Branch (1079:29): [True: 0, False: 0]
  ------------------
 1080|      0|                    png_uc* cur = a->out + stride * j;
 1081|      0|                    png_uc* in = a->out + stride * j + x * out_n - img_width_bytes;
 1082|       |                    // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit
 1083|       |                    // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop
 1084|      0|                    png_uc scale = (color == 0) ? png__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
  ------------------
  |  Branch (1084:36): [True: 0, False: 0]
  ------------------
 1085|       |
 1086|       |                    // note that the final byte might overshoot and write more data than desired.
 1087|       |                    // we can allocate enough data that this never writes out of memory, but it
 1088|       |                    // could also overwrite the next scanline. can it overwrite non-empty data
 1089|       |                    // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
 1090|       |                    // so we need to explicitly clamp the final ones
 1091|       |
 1092|      0|                    if (depth == 4) {
  ------------------
  |  Branch (1092:25): [True: 0, False: 0]
  ------------------
 1093|      0|                        for (k = x * img_n; k >= 2; k -= 2, ++in) {
  ------------------
  |  Branch (1093:45): [True: 0, False: 0]
  ------------------
 1094|      0|                            *cur++ = scale * ((*in >> 4));
 1095|      0|                            *cur++ = scale * ((*in) & 0x0f);
 1096|      0|                        }
 1097|      0|                        if (k > 0) *cur++ = scale * ((*in >> 4));
  ------------------
  |  Branch (1097:29): [True: 0, False: 0]
  ------------------
 1098|      0|                    }
 1099|      0|                    else if (depth == 2) {
  ------------------
  |  Branch (1099:30): [True: 0, False: 0]
  ------------------
 1100|      0|                        for (k = x * img_n; k >= 4; k -= 4, ++in) {
  ------------------
  |  Branch (1100:45): [True: 0, False: 0]
  ------------------
 1101|      0|                            *cur++ = scale * ((*in >> 6));
 1102|      0|                            *cur++ = scale * ((*in >> 4) & 0x03);
 1103|      0|                            *cur++ = scale * ((*in >> 2) & 0x03);
 1104|      0|                            *cur++ = scale * ((*in) & 0x03);
 1105|      0|                        }
 1106|      0|                        if (k > 0) *cur++ = scale * ((*in >> 6));
  ------------------
  |  Branch (1106:29): [True: 0, False: 0]
  ------------------
 1107|      0|                        if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03);
  ------------------
  |  Branch (1107:29): [True: 0, False: 0]
  ------------------
 1108|      0|                        if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03);
  ------------------
  |  Branch (1108:29): [True: 0, False: 0]
  ------------------
 1109|      0|                    }
 1110|      0|                    else if (depth == 1) {
  ------------------
  |  Branch (1110:30): [True: 0, False: 0]
  ------------------
 1111|      0|                        for (k = x * img_n; k >= 8; k -= 8, ++in) {
  ------------------
  |  Branch (1111:45): [True: 0, False: 0]
  ------------------
 1112|      0|                            *cur++ = scale * ((*in >> 7));
 1113|      0|                            *cur++ = scale * ((*in >> 6) & 0x01);
 1114|      0|                            *cur++ = scale * ((*in >> 5) & 0x01);
 1115|      0|                            *cur++ = scale * ((*in >> 4) & 0x01);
 1116|      0|                            *cur++ = scale * ((*in >> 3) & 0x01);
 1117|      0|                            *cur++ = scale * ((*in >> 2) & 0x01);
 1118|      0|                            *cur++ = scale * ((*in >> 1) & 0x01);
 1119|      0|                            *cur++ = scale * ((*in) & 0x01);
 1120|      0|                        }
 1121|      0|                        if (k > 0) *cur++ = scale * ((*in >> 7));
  ------------------
  |  Branch (1121:29): [True: 0, False: 0]
  ------------------
 1122|      0|                        if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01);
  ------------------
  |  Branch (1122:29): [True: 0, False: 0]
  ------------------
 1123|      0|                        if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01);
  ------------------
  |  Branch (1123:29): [True: 0, False: 0]
  ------------------
 1124|      0|                        if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01);
  ------------------
  |  Branch (1124:29): [True: 0, False: 0]
  ------------------
 1125|      0|                        if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01);
  ------------------
  |  Branch (1125:29): [True: 0, False: 0]
  ------------------
 1126|      0|                        if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01);
  ------------------
  |  Branch (1126:29): [True: 0, False: 0]
  ------------------
 1127|      0|                        if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01);
  ------------------
  |  Branch (1127:29): [True: 0, False: 0]
  ------------------
 1128|      0|                    }
 1129|      0|                    if (img_n != out_n) {
  ------------------
  |  Branch (1129:25): [True: 0, False: 0]
  ------------------
 1130|      0|                        int q;
 1131|       |                        // insert alpha = 255
 1132|      0|                        cur = a->out + stride * j;
 1133|      0|                        if (img_n == 1) {
  ------------------
  |  Branch (1133:29): [True: 0, False: 0]
  ------------------
 1134|      0|                            for (q = x - 1; q >= 0; --q) {
  ------------------
  |  Branch (1134:45): [True: 0, False: 0]
  ------------------
 1135|      0|                                cur[q * 2 + 1] = 255;
 1136|      0|                                cur[q * 2 + 0] = cur[q];
 1137|      0|                            }
 1138|      0|                        }
 1139|      0|                        else {
 1140|      0|                            PNG_ASSERT(img_n == 3);
  ------------------
  |  |   42|      0|#define PNG_ASSERT assert
  ------------------
 1141|      0|                            for (q = x - 1; q >= 0; --q) {
  ------------------
  |  Branch (1141:45): [True: 0, False: 0]
  ------------------
 1142|      0|                                cur[q * 4 + 3] = 255;
 1143|      0|                                cur[q * 4 + 2] = cur[q * 3 + 2];
 1144|      0|                                cur[q * 4 + 1] = cur[q * 3 + 1];
 1145|      0|                                cur[q * 4 + 0] = cur[q * 3 + 0];
 1146|      0|                            }
 1147|      0|                        }
 1148|      0|                    }
 1149|      0|                }
 1150|      0|            }
 1151|      4|            else if (depth == 16) {
  ------------------
  |  Branch (1151:22): [True: 4, False: 0]
  ------------------
 1152|       |                // force the image data from big-endian to platform-native.
 1153|       |                // this is done in a separate pass due to the decoding relying
 1154|       |                // on the data being untouched, but could probably be done
 1155|       |                // per-line during decode if care is taken.
 1156|      4|                png_uc* cur = a->out;
 1157|      4|                png__uint16* cur16 = (png__uint16*)cur;
 1158|       |
 1159|  6.29M|                for (i = 0; i < x * y * out_n; ++i, cur16++, cur += 2) {
  ------------------
  |  Branch (1159:29): [True: 6.29M, False: 4]
  ------------------
 1160|  6.29M|                    *cur16 = (cur[0] << 8) | cur[1];
 1161|  6.29M|                }
 1162|      4|            }
 1163|       |
 1164|      4|            return 1;
 1165|     60|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L20png__mad3sizes_validEiiii:
  247|    124|        {
  248|    124|            return png__mul2sizes_valid(a, b) && png__mul2sizes_valid(a * b, c) &&
  ------------------
  |  Branch (248:20): [True: 124, False: 0]
  |  Branch (248:50): [True: 124, False: 0]
  ------------------
  249|    124|                png__addsizes_valid(a * b * c, add);
  ------------------
  |  Branch (249:17): [True: 124, False: 0]
  ------------------
  250|    124|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L20png__mul2sizes_validEii:
  232|    248|        {
  233|    248|            if (a < 0 || b < 0) return 0;
  ------------------
  |  Branch (233:17): [True: 0, False: 248]
  |  Branch (233:26): [True: 0, False: 248]
  ------------------
  234|    248|            if (b == 0) return 1; // mul-by-0 is always safe
  ------------------
  |  Branch (234:17): [True: 0, False: 248]
  ------------------
  235|       |            // portable way to check for no overflows in a*b
  236|    248|            return a <= INT_MAX / b;
  237|    248|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L19png__addsizes_validEii:
  220|    124|        {
  221|    124|            if (b < 0) return 0;
  ------------------
  |  Branch (221:17): [True: 0, False: 124]
  ------------------
  222|       |            // now 0 <= b <= INT_MAX, hence also
  223|       |            // 0 <= INT_MAX - b <= INTMAX.
  224|       |            // And "a + b <= INT_MAX" (which might overflow) is the
  225|       |            // same as a <= INT_MAX - b (no overflow)
  226|    124|            return a <= INT_MAX - b;
  227|    124|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L10png__paethEiii:
  935|  7.55M|        {
  936|  7.55M|            int p = a + b - c;
  937|  7.55M|            int pa = abs(p - a);
  938|  7.55M|            int pb = abs(p - b);
  939|  7.55M|            int pc = abs(p - c);
  940|  7.55M|            if (pa <= pb && pa <= pc) return a;
  ------------------
  |  Branch (940:17): [True: 6.45M, False: 1.10M]
  |  Branch (940:29): [True: 6.38M, False: 64.1k]
  ------------------
  941|  1.16M|            if (pb <= pc) return b;
  ------------------
  |  Branch (941:17): [True: 1.09M, False: 74.0k]
  ------------------
  942|  74.0k|            return c;
  943|  1.16M|        }
SimdSse41ImageLoadPng.cpp:_ZN4Simd5Sse41L16png__malloc_mad3Eiiii:
  267|     64|        {
  268|     64|            if (!png__mad3sizes_valid(a, b, c, add)) return NULL;
  ------------------
  |  Branch (268:17): [True: 0, False: 64]
  ------------------
  269|     64|            return png__malloc(a * b * c + add);
  270|     64|        }

_ZN4Simd5Sse415StoreILb0EEEvPDv2_xS2_:
   90|  1.90M|        {
   91|  1.90M|            _mm_storeu_si128(p, a);
   92|  1.90M|        }
_ZN4Simd5Sse415StoreILb1EEEvPDv2_xS2_:
   95|  4.42M|        {
   96|  4.42M|            _mm_store_si128(p, a);
   97|  4.42M|        }
_ZN4Simd4Avx25StoreILb1EEEvPDv4_xS2_:
  189|   919k|        {
  190|   919k|            _mm256_store_si256(p, a);
  191|   919k|        }
_ZN4Simd4Avx25StoreILb0EEEvPDv4_xS2_:
  184|  1.54M|        {
  185|  1.54M|            _mm256_storeu_si256(p, a);
  186|  1.54M|        }
_ZN4Simd4Avx211PackI16ToU8EDv4_xS1_:
  216|  82.1k|        {
  217|  82.1k|            return _mm256_permute4x64_epi64(_mm256_packus_epi16(lo, hi), 0xD8);
  218|  82.1k|        }
_ZN4Simd4Avx212PackI32ToI16EDv4_xS1_:
  221|   164k|        {
  222|   164k|            return _mm256_permute4x64_epi64(_mm256_packs_epi32(lo, hi), 0xD8);
  223|   164k|        }

_ZN4Simd4ViewINS_9AllocatorEEC2Ev:
  737|  4.80k|    {
  738|  4.80k|    }
_ZN4Simd4ViewINS_9AllocatorEED2Ev:
  901|  4.80k|    {
  902|  4.80k|        if (_owner && data)
  ------------------
  |  Branch (902:13): [True: 876, False: 3.92k]
  |  Branch (902:23): [True: 876, False: 0]
  ------------------
  903|    876|        {
  904|    876|            Allocator::Free(data);
  905|    876|        }
  906|  4.80k|    }
_ZN4Simd4ViewINS_9AllocatorEE4LoadEPKhmNS2_6FormatE:
 1281|  2.40k|    {
 1282|  2.40k|        Clear();
 1283|  2.40k|        (Format&)format = format_;
 1284|  2.40k|        *(uint8_t**)&data = SimdImageLoadFromMemory(src, size, (size_t*)&stride, (size_t*)&width, (size_t*)&height, (SimdPixelFormatType*)&format);
 1285|  2.40k|        if (data)
  ------------------
  |  Branch (1285:13): [True: 336, False: 2.06k]
  ------------------
 1286|    336|            _owner = true;
 1287|  2.06k|        else
 1288|  2.06k|            (Format&)format = None;
 1289|  2.40k|        return _owner;
 1290|  2.40k|    }
_ZN4Simd4ViewINS_9AllocatorEE5ClearEv:
 1298|  2.74k|    {
 1299|  2.74k|        if (_owner && data)
  ------------------
  |  Branch (1299:13): [True: 0, False: 2.74k]
  |  Branch (1299:23): [True: 0, False: 0]
  ------------------
 1300|      0|            Allocator::Free(data);
 1301|  2.74k|#ifdef SIMD_CPP_2011_ENABLE
 1302|  2.74k|        *(void**)&data = nullptr;
 1303|       |#else
 1304|       |        *(void**)&data = NULL;
 1305|       |#endif
 1306|  2.74k|        _owner = false;
 1307|  2.74k|        *(size_t*)&width = 0;
 1308|  2.74k|        *(size_t*)&height = 0;
 1309|  2.74k|        *(ptrdiff_t *)&stride = 0;
 1310|  2.74k|#ifdef SIMD_CPP_2011_ENABLE
 1311|  2.74k|        *(Format*)&format = Format::None;
 1312|       |#else
 1313|       |        *(Format*)&format = (Format)(0); // Modified for c++ 98
 1314|       |#endif
 1315|  2.74k|    }
_ZN4Simd4ViewINS_9AllocatorEE7ReleaseEPm:
 1318|    336|    {
 1319|    336|        uint8_t* released = data;
 1320|    336|        if (size)
  ------------------
  |  Branch (1320:13): [True: 0, False: 336]
  ------------------
 1321|      0|            *size = DataSize();
 1322|    336|        _owner = false;
 1323|    336|        Clear();
 1324|    336|        return released;
 1325|    336|    }
_ZN4Simd4ViewINS_9AllocatorEE9PixelSizeENS2_6FormatE:
 1124|    876|    {
 1125|    876|        switch (format)
 1126|    876|        {
 1127|      0|        case None:      return 0;
  ------------------
  |  Branch (1127:9): [True: 0, False: 876]
  ------------------
 1128|    219|        case Gray8:     return 1;
  ------------------
  |  Branch (1128:9): [True: 219, False: 657]
  ------------------
 1129|      0|        case Uv16:      return 2;
  ------------------
  |  Branch (1129:9): [True: 0, False: 876]
  ------------------
 1130|    219|        case Bgr24:     return 3;
  ------------------
  |  Branch (1130:9): [True: 219, False: 657]
  ------------------
 1131|    219|        case Bgra32:    return 4;
  ------------------
  |  Branch (1131:9): [True: 219, False: 657]
  ------------------
 1132|      0|        case Int16:     return 2;
  ------------------
  |  Branch (1132:9): [True: 0, False: 876]
  ------------------
 1133|      0|        case Int32:     return 4;
  ------------------
  |  Branch (1133:9): [True: 0, False: 876]
  ------------------
 1134|      0|        case Int64:     return 8;
  ------------------
  |  Branch (1134:9): [True: 0, False: 876]
  ------------------
 1135|      0|        case Float:     return 4;
  ------------------
  |  Branch (1135:9): [True: 0, False: 876]
  ------------------
 1136|      0|        case Double:    return 8;
  ------------------
  |  Branch (1136:9): [True: 0, False: 876]
  ------------------
 1137|      0|        case BayerGrbg: return 1;
  ------------------
  |  Branch (1137:9): [True: 0, False: 876]
  ------------------
 1138|      0|        case BayerGbrg: return 1;
  ------------------
  |  Branch (1138:9): [True: 0, False: 876]
  ------------------
 1139|      0|        case BayerRggb: return 1;
  ------------------
  |  Branch (1139:9): [True: 0, False: 876]
  ------------------
 1140|      0|        case BayerBggr: return 1;
  ------------------
  |  Branch (1140:9): [True: 0, False: 876]
  ------------------
 1141|      0|        case Hsv24:     return 3;
  ------------------
  |  Branch (1141:9): [True: 0, False: 876]
  ------------------
 1142|      0|        case Hsl24:     return 3;
  ------------------
  |  Branch (1142:9): [True: 0, False: 876]
  ------------------
 1143|    219|        case Rgb24:     return 3;
  ------------------
  |  Branch (1143:9): [True: 219, False: 657]
  ------------------
 1144|      0|        case Rgba32:    return 4;
  ------------------
  |  Branch (1144:9): [True: 0, False: 876]
  ------------------
 1145|      0|        case Uyvy16:    return 2;
  ------------------
  |  Branch (1145:9): [True: 0, False: 876]
  ------------------
 1146|      0|        case Argb32:    return 4;
  ------------------
  |  Branch (1146:9): [True: 0, False: 876]
  ------------------
 1147|      0|        default: assert(0); return 0;
  ------------------
  |  Branch (1147:9): [True: 0, False: 876]
  ------------------
 1148|    876|        }
 1149|    876|    }
_ZN4Simd4ViewINS_9AllocatorEE8RecreateEmmNS2_6FormatEPvm:
  988|    876|    {
  989|    876|        if (_owner && data)
  ------------------
  |  Branch (989:13): [True: 0, False: 876]
  |  Branch (989:23): [True: 0, False: 0]
  ------------------
  990|      0|        {
  991|      0|            Allocator::Free(data);
  992|      0|            *(void**)&data = NULL;
  993|      0|            _owner = false;
  994|      0|        }
  995|    876|        *(size_t*)&width = w;
  996|    876|        *(size_t*)&height = h;
  997|    876|        *(Format*)&format = f;
  998|    876|        *(ptrdiff_t*)&stride = Allocator::Align(width*PixelSize(format), align);
  999|    876|        if (d)
  ------------------
  |  Branch (999:13): [True: 0, False: 876]
  ------------------
 1000|      0|        {
 1001|      0|            *(void**)&data = Allocator::Align(d, align);
 1002|      0|            _owner = false;
 1003|      0|        }
 1004|    876|        else if(height && stride)
  ------------------
  |  Branch (1004:17): [True: 876, False: 0]
  |  Branch (1004:27): [True: 876, False: 0]
  ------------------
 1005|    876|        {
 1006|    876|            *(void**)&data = Allocator::Allocate(height*stride, align);
 1007|    876|            _owner = true;
 1008|    876|        }
 1009|    876|    }
_ZN4Simd4ViewINS_9AllocatorEE3RowIhEEPT_m:
 1118|  4.23k|    {
 1119|  4.23k|        assert(row < height);
 1120|  4.23k|        return ((T*)(data + row*stride));
 1121|  4.23k|    }

LLVMFuzzerTestOneInput:
   19|    601|int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size){
   20|    601|  if (size<5) {
  ------------------
  |  Branch (20:7): [True: 0, False: 601]
  ------------------
   21|      0|    return 0;
   22|      0|  }
   23|    601|  Test::View::Format formats[4] = {Test::View::Gray8,
   24|    601|                                   Test::View::Bgr24,
   25|    601|                                   Test::View::Bgra32,
   26|    601|                                   Test::View::Rgb24};
   27|  3.00k|  for(int i=0; i<4; i++) {
  ------------------
  |  Branch (27:16): [True: 2.40k, False: 601]
  ------------------
   28|  2.40k|    Test::View dst1;
   29|  2.40k|    dst1.Load(data, size, formats[i]);
   30|  2.40k|  }
   31|    601|  return 0;
   32|    601|}

