LLVMFuzzerInitialize:
   12|      2|extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) {
   13|      2|    memset(&glb, 0, sizeof(glb));
   14|      2|    glb.version = XVID_VERSION;
  ------------------
  |  |   71|      2|#define XVID_VERSION             XVID_MAKE_VERSION(1,4,-127)
  |  |  ------------------
  |  |  |  |   62|      2|#define XVID_MAKE_VERSION(a,b,c) ((((a)&0xff)<<16) | (((b)&0xff)<<8) | ((c)&0xff))
  |  |  ------------------
  ------------------
   15|      2|    if ( xvid_global(nullptr, XVID_GBL_INIT, &glb, nullptr) ) {
  ------------------
  |  |  235|      2|#define XVID_GBL_INIT    0 /* initialize xvidcore; must be called before using xvid_decore, or xvid_encore) */
  ------------------
  |  Branch (15:10): [True: 0, False: 2]
  ------------------
   16|      0|        abort();
   17|      0|    }
   18|       |
   19|      2|    return 0;
   20|      2|}
LLVMFuzzerTestOneInput:
   22|  10.5k|extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
   23|  10.5k|	xvid_dec_stats_t stats;
   24|  10.5k|    xvid_dec_create_t ctx;
   25|  10.5k|    xvid_dec_frame_t frame;
   26|       |
   27|  10.5k|    uint32_t width = 0;
   28|  10.5k|    uint32_t height = 0;
   29|  10.5k|    int remaining = size;
   30|       |
   31|  10.5k|    uint8_t* out = nullptr;
   32|  10.5k|    uint8_t* dataCopy = (uint8_t*)calloc(1, size + 10240);
   33|  10.5k|    memcpy(dataCopy, data, size);
   34|       |
   35|  10.5k|	uint8_t* inptr = dataCopy;
   36|       |
   37|  10.5k|    {
   38|  10.5k|        memset(&ctx, 0, sizeof(ctx));
   39|       |
   40|  10.5k|        ctx.version = XVID_VERSION;
  ------------------
  |  |   71|  10.5k|#define XVID_VERSION             XVID_MAKE_VERSION(1,4,-127)
  |  |  ------------------
  |  |  |  |   62|  10.5k|#define XVID_MAKE_VERSION(a,b,c) ((((a)&0xff)<<16) | (((b)&0xff)<<8) | ((c)&0xff))
  |  |  ------------------
  ------------------
   41|  10.5k|        ctx.width = width;
   42|  10.5k|        ctx.height = height;
   43|  10.5k|    }
   44|       |
   45|  10.5k|    if ( xvid_decore(nullptr, XVID_DEC_CREATE, &ctx, nullptr) ) {
  ------------------
  |  |  246|  10.5k|#define XVID_DEC_CREATE  0 /* create decore instance; return 0 on success */
  ------------------
  |  Branch (45:10): [True: 0, False: 10.5k]
  ------------------
   46|      0|        abort();
   47|      0|    }
   48|       |
   49|       |
   50|  10.5k|    int loops = 0;
   51|  72.0k|    do {
   52|  72.0k|        {
   53|  72.0k|            memset(&stats, 0, sizeof(xvid_dec_stats_t));
   54|  72.0k|            stats.version = XVID_VERSION;
  ------------------
  |  |   71|  72.0k|#define XVID_VERSION             XVID_MAKE_VERSION(1,4,-127)
  |  |  ------------------
  |  |  |  |   62|  72.0k|#define XVID_MAKE_VERSION(a,b,c) ((((a)&0xff)<<16) | (((b)&0xff)<<8) | ((c)&0xff))
  |  |  ------------------
  ------------------
   55|  72.0k|        }
   56|       |
   57|  72.0k|        {
   58|  72.0k|            memset(&frame, 0, sizeof(xvid_dec_frame_t));
   59|       |
   60|  72.0k|            frame.version = XVID_VERSION;
  ------------------
  |  |   71|  72.0k|#define XVID_VERSION             XVID_MAKE_VERSION(1,4,-127)
  |  |  ------------------
  |  |  |  |   62|  72.0k|#define XVID_MAKE_VERSION(a,b,c) ((((a)&0xff)<<16) | (((b)&0xff)<<8) | ((c)&0xff))
  |  |  ------------------
  ------------------
   61|  72.0k|            frame.general = 0;
   62|       |
   63|  72.0k|            frame.bitstream = inptr;
   64|  72.0k|            frame.length = remaining;
   65|       |
   66|  72.0k|            frame.output.plane[0]  = out;
   67|  72.0k|            frame.output.stride[0] = width * 3;
   68|       |
   69|  72.0k|            frame.output.csp = XVID_CSP_BGR;
  ------------------
  |  |  121|  72.0k|#define XVID_CSP_BGR      (1<< 9) /* 24-bit bgr packed */
  ------------------
   70|  72.0k|        }
   71|       |
   72|  72.0k|        const int used_bytes = xvid_decore(ctx.handle, XVID_DEC_DECODE, &frame, &stats);
  ------------------
  |  |  248|  72.0k|#define XVID_DEC_DECODE  2 /* decode a frame: returns number of bytes consumed >= 0 */
  ------------------
   73|  72.0k|        if ( stats.type == XVID_TYPE_VOL) {
  ------------------
  |  |  165|  72.0k|#define XVID_TYPE_VOL     -1 /* decoder only: vol was decoded */
  ------------------
  |  Branch (73:14): [True: 35.0k, False: 36.9k]
  ------------------
   74|       |            /* Resize buffer */
   75|       |
   76|  35.0k|            if ( (width != stats.data.vol.width) || (height != stats.data.vol.height) ) {
  ------------------
  |  Branch (76:18): [True: 12.6k, False: 22.4k]
  |  Branch (76:53): [True: 441, False: 21.9k]
  ------------------
   77|  13.1k|                if ( width * height < stats.data.vol.width * stats.data.vol.height ) {
  ------------------
  |  Branch (77:22): [True: 10.7k, False: 2.32k]
  ------------------
   78|  10.7k|                    if (out) {
  ------------------
  |  Branch (78:25): [True: 2.66k, False: 8.11k]
  ------------------
   79|  2.66k|                        free(out);
   80|  2.66k|                    }
   81|  10.7k|                    out = (uint8_t*)malloc(stats.data.vol.width * stats.data.vol.height * 4);
   82|  10.7k|                }
   83|  13.1k|                width = stats.data.vol.width;
   84|  13.1k|                height = stats.data.vol.height;
   85|  13.1k|            }
   86|  35.0k|        }
   87|       |
   88|  72.0k|        if ( used_bytes > 0 ) {
  ------------------
  |  Branch (88:14): [True: 71.2k, False: 762]
  ------------------
   89|  71.2k|            inptr += used_bytes;
   90|  71.2k|            remaining -= used_bytes;
   91|  71.2k|        } else {
   92|    762|            break;
   93|    762|        }
   94|       |
   95|  71.2k|        loops++;
   96|  71.2k|    } while (stats.type <= 0 && remaining > 1);
  ------------------
  |  Branch (96:14): [True: 65.1k, False: 6.15k]
  |  Branch (96:33): [True: 61.5k, False: 3.59k]
  ------------------
   97|       |
   98|  10.5k|end:
   99|  10.5k|    free(dataCopy);
  100|  10.5k|    free(out);
  101|       |
  102|  10.5k|    xvid_decore(ctx.handle, XVID_DEC_DESTROY, nullptr, nullptr);
  ------------------
  |  |  247|  10.5k|#define XVID_DEC_DESTROY 1 /* destroy decore instance: return 0 on success */
  ------------------
  103|  10.5k|    return 0;
  104|  10.5k|}

read_video_packet_header:
  105|  31.5k|{
  106|  31.5k|	int startcode_bits = NUMBITS_VP_RESYNC_MARKER + addbits;
  ------------------
  |  |  111|  31.5k|#define NUMBITS_VP_RESYNC_MARKER  17
  ------------------
  107|  31.5k|	int mbnum_bits = log2bin(dec->mb_width *  dec->mb_height - 1);
  108|  31.5k|	int mbnum;
  109|  31.5k|	int hec = 0;
  110|       |
  111|  31.5k|	BitstreamSkip(bs, BitstreamNumBitsToByteAlign(bs));
  112|  31.5k|	BitstreamSkip(bs, startcode_bits);
  113|       |
  114|  31.5k|	DPRINTF(XVID_DEBUG_STARTCODE, "<video_packet_header>\n");
  ------------------
  |  |  197|  31.5k|#define XVID_DEBUG_STARTCODE (1<< 1)
  ------------------
  115|       |
  116|  31.5k|	if (dec->shape != VIDOBJLAY_SHAPE_RECTANGULAR)
  ------------------
  |  |   87|  31.5k|#define VIDOBJLAY_SHAPE_RECTANGULAR		0
  ------------------
  |  Branch (116:6): [True: 16.8k, False: 14.7k]
  ------------------
  117|  16.8k|	{
  118|  16.8k|		hec = BitstreamGetBit(bs);		/* header_extension_code */
  119|  16.8k|		if (hec && !(dec->sprite_enable == SPRITE_STATIC /* && current_coding_type = I_VOP */))
  ------------------
  |  |   94|  13.4k|#define SPRITE_STATIC	1
  ------------------
  |  Branch (119:7): [True: 13.4k, False: 3.39k]
  |  Branch (119:14): [True: 9.95k, False: 3.49k]
  ------------------
  120|  9.95k|		{
  121|  9.95k|			BitstreamSkip(bs, 13);			/* vop_width */
  122|  9.95k|			READ_MARKER();
  ------------------
  |  |   99|  9.95k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  123|  9.95k|			BitstreamSkip(bs, 13);			/* vop_height */
  124|  9.95k|			READ_MARKER();
  ------------------
  |  |   99|  9.95k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  125|  9.95k|			BitstreamSkip(bs, 13);			/* vop_horizontal_mc_spatial_ref */
  126|  9.95k|			READ_MARKER();
  ------------------
  |  |   99|  9.95k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  127|  9.95k|			BitstreamSkip(bs, 13);			/* vop_vertical_mc_spatial_ref */
  128|  9.95k|			READ_MARKER();
  ------------------
  |  |   99|  9.95k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  129|  9.95k|		}
  130|  16.8k|	}
  131|       |
  132|  31.5k|	mbnum = (mbnum_bits == 0) ? 0 : BitstreamGetBits(bs, mbnum_bits);		/* macroblock_number */
  ------------------
  |  Branch (132:10): [True: 7, False: 31.5k]
  ------------------
  133|  31.5k|	DPRINTF(XVID_DEBUG_HEADER, "mbnum %i\n", mbnum);
  ------------------
  |  |  198|  31.5k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  134|       |
  135|  31.5k|	if (dec->shape != VIDOBJLAY_SHAPE_BINARY_ONLY)
  ------------------
  |  |   89|  31.5k|#define VIDOBJLAY_SHAPE_BINARY_ONLY		2
  ------------------
  |  Branch (135:6): [True: 15.2k, False: 16.2k]
  ------------------
  136|  15.2k|	{
  137|  15.2k|		*quant = BitstreamGetBits(bs, dec->quant_bits);	/* quant_scale */
  138|  15.2k|		DPRINTF(XVID_DEBUG_HEADER, "quant %i\n", *quant);
  ------------------
  |  |  198|  15.2k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  139|  15.2k|	}
  140|       |
  141|  31.5k|	if (dec->shape == VIDOBJLAY_SHAPE_RECTANGULAR)
  ------------------
  |  |   87|  31.5k|#define VIDOBJLAY_SHAPE_RECTANGULAR		0
  ------------------
  |  Branch (141:6): [True: 14.7k, False: 16.8k]
  ------------------
  142|  14.7k|		hec = BitstreamGetBit(bs);		/* header_extension_code */
  143|       |
  144|       |
  145|  31.5k|	DPRINTF(XVID_DEBUG_HEADER, "header_extension_code %i\n", hec);
  ------------------
  |  |  198|  31.5k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  146|  31.5k|	if (hec)
  ------------------
  |  Branch (146:6): [True: 21.0k, False: 10.5k]
  ------------------
  147|  21.0k|	{
  148|  21.0k|		int time_base;
  149|  21.0k|		int time_increment;
  150|  21.0k|		int coding_type;
  151|       |
  152|  46.1k|		for (time_base=0; BitstreamGetBit(bs)!=0; time_base++);		/* modulo_time_base */
  ------------------
  |  Branch (152:21): [True: 25.1k, False: 21.0k]
  ------------------
  153|  21.0k|		READ_MARKER();
  ------------------
  |  |   99|  21.0k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  154|  21.0k|		if (dec->time_inc_bits)
  ------------------
  |  Branch (154:7): [True: 21.0k, False: 0]
  ------------------
  155|  21.0k|			time_increment = (BitstreamGetBits(bs, dec->time_inc_bits));	/* vop_time_increment */
  156|      0|		else
  157|      0|			time_increment = 0;
  158|  21.0k|		READ_MARKER();
  ------------------
  |  |   99|  21.0k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  159|  21.0k|		DPRINTF(XVID_DEBUG_HEADER,"time %i:%i\n", time_base, time_increment);
  ------------------
  |  |  198|  21.0k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  160|       |
  161|  21.0k|		coding_type = BitstreamGetBits(bs, 2);
  162|  21.0k|		DPRINTF(XVID_DEBUG_HEADER,"coding_type %i\n", coding_type);
  ------------------
  |  |  198|  21.0k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  163|       |
  164|  21.0k|		if (dec->shape != VIDOBJLAY_SHAPE_RECTANGULAR)
  ------------------
  |  |   87|  21.0k|#define VIDOBJLAY_SHAPE_RECTANGULAR		0
  ------------------
  |  Branch (164:7): [True: 13.4k, False: 7.55k]
  ------------------
  165|  13.4k|		{
  166|  13.4k|			BitstreamSkip(bs, 1);	/* change_conv_ratio_disable */
  167|  13.4k|			if (coding_type != I_VOP)
  ------------------
  |  |  104|  13.4k|#define I_VOP	0
  ------------------
  |  Branch (167:8): [True: 4.67k, False: 8.77k]
  ------------------
  168|  4.67k|				BitstreamSkip(bs, 1);	/* vop_shape_coding_type */
  169|  13.4k|		}
  170|       |
  171|  21.0k|		if (dec->shape != VIDOBJLAY_SHAPE_BINARY_ONLY)
  ------------------
  |  |   89|  21.0k|#define VIDOBJLAY_SHAPE_BINARY_ONLY		2
  ------------------
  |  Branch (171:7): [True: 8.07k, False: 12.9k]
  ------------------
  172|  8.07k|		{
  173|  8.07k|			*intra_dc_threshold = intra_dc_threshold_table[BitstreamGetBits(bs, 3)];
  174|       |
  175|  8.07k|			if (dec->sprite_enable == SPRITE_GMC && coding_type == S_VOP &&
  ------------------
  |  |   95|  16.1k|#define SPRITE_GMC		2
  ------------------
              			if (dec->sprite_enable == SPRITE_GMC && coding_type == S_VOP &&
  ------------------
  |  |  107|  9.25k|#define S_VOP	3
  ------------------
  |  Branch (175:8): [True: 1.17k, False: 6.90k]
  |  Branch (175:44): [True: 155, False: 1.01k]
  ------------------
  176|  8.07k|				dec->sprite_warping_points > 0)
  ------------------
  |  Branch (176:5): [True: 151, False: 4]
  ------------------
  177|    151|			{
  178|       |				/* TODO: sprite trajectory */
  179|    151|			}
  180|  8.07k|			if (dec->reduced_resolution_enable &&
  ------------------
  |  Branch (180:8): [True: 4.14k, False: 3.93k]
  ------------------
  181|  8.07k|				dec->shape == VIDOBJLAY_SHAPE_RECTANGULAR &&
  ------------------
  |  |   87|  12.2k|#define VIDOBJLAY_SHAPE_RECTANGULAR		0
  ------------------
  |  Branch (181:5): [True: 3.65k, False: 489]
  ------------------
  182|  8.07k|				(coding_type == P_VOP || coding_type == I_VOP))
  ------------------
  |  |  105|  7.30k|#define P_VOP	1
  ------------------
              				(coding_type == P_VOP || coding_type == I_VOP))
  ------------------
  |  |  104|  2.47k|#define I_VOP	0
  ------------------
  |  Branch (182:6): [True: 1.17k, False: 2.47k]
  |  Branch (182:30): [True: 1.05k, False: 1.42k]
  ------------------
  183|  2.22k|			{
  184|  2.22k|				BitstreamSkip(bs, 1); /* XXX: vop_reduced_resolution */
  185|  2.22k|			}
  186|       |
  187|  8.07k|			if (coding_type != I_VOP && fcode_forward)
  ------------------
  |  |  104|  16.1k|#define I_VOP	0
  ------------------
  |  Branch (187:8): [True: 5.31k, False: 2.76k]
  |  Branch (187:32): [True: 3.14k, False: 2.16k]
  ------------------
  188|  3.14k|			{
  189|  3.14k|				*fcode_forward = BitstreamGetBits(bs, 3);
  190|  3.14k|				DPRINTF(XVID_DEBUG_HEADER,"fcode_forward %i\n", *fcode_forward);
  ------------------
  |  |  198|  3.14k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  191|  3.14k|			}
  192|       |
  193|  8.07k|			if (coding_type == B_VOP && fcode_backward)
  ------------------
  |  |  106|  16.1k|#define B_VOP	2
  ------------------
  |  Branch (193:8): [True: 3.15k, False: 4.92k]
  |  Branch (193:32): [True: 1.99k, False: 1.16k]
  ------------------
  194|  1.99k|			{
  195|  1.99k|				*fcode_backward = BitstreamGetBits(bs, 3);
  196|  1.99k|				DPRINTF(XVID_DEBUG_HEADER,"fcode_backward %i\n", *fcode_backward);
  ------------------
  |  |  198|  1.99k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  197|  1.99k|			}
  198|  8.07k|		}
  199|  21.0k|	}
  200|       |
  201|  31.5k|	if (dec->newpred_enable)
  ------------------
  |  Branch (201:6): [True: 14.8k, False: 16.6k]
  ------------------
  202|  14.8k|	{
  203|  14.8k|		int vop_id;
  204|  14.8k|		int vop_id_for_prediction;
  205|       |
  206|  14.8k|		vop_id = BitstreamGetBits(bs, MIN(dec->time_inc_bits + 3, 15));
  ------------------
  |  |  255|  14.8k|#define MIN(X, Y) ((X)<(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (255:20): [True: 14.2k, False: 675]
  |  |  ------------------
  ------------------
  207|  14.8k|		DPRINTF(XVID_DEBUG_HEADER, "vop_id %i\n", vop_id);
  ------------------
  |  |  198|  14.8k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  208|  14.8k|		if (BitstreamGetBit(bs))	/* vop_id_for_prediction_indication */
  ------------------
  |  Branch (208:7): [True: 7.47k, False: 7.40k]
  ------------------
  209|  7.47k|		{
  210|  7.47k|			vop_id_for_prediction = BitstreamGetBits(bs, MIN(dec->time_inc_bits + 3, 15));
  ------------------
  |  |  255|  7.47k|#define MIN(X, Y) ((X)<(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (255:20): [True: 6.87k, False: 600]
  |  |  ------------------
  ------------------
  211|  7.47k|			DPRINTF(XVID_DEBUG_HEADER, "vop_id_for_prediction %i\n", vop_id_for_prediction);
  ------------------
  |  |  198|  7.47k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  212|  7.47k|		}
  213|  14.8k|		READ_MARKER();
  ------------------
  |  |   99|  14.8k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  214|  14.8k|	}
  215|       |
  216|  31.5k|	return mbnum;
  217|  31.5k|}
BitstreamReadHeaders:
  397|  2.11M|{
  398|  2.11M|	uint32_t vol_ver_id;
  399|  2.11M|	uint32_t coding_type;
  400|  2.11M|	uint32_t start_code;
  401|  2.11M|	uint32_t time_incr = 0;
  402|  2.11M|	int32_t time_increment = 0;
  403|       |
  404|  24.3M|	while ((BitstreamPos(bs) >> 3) + 4 <= bs->length) {
  ------------------
  |  Branch (404:9): [True: 24.3M, False: 3.13k]
  ------------------
  405|       |
  406|  24.3M|		BitstreamByteAlign(bs);
  407|  24.3M|		start_code = BitstreamShowBits(bs, 32);
  408|       |
  409|  24.3M|		if (start_code == VISOBJSEQ_START_CODE) {
  ------------------
  |  |   42|  24.3M|#define VISOBJSEQ_START_CODE	0x000001b0
  ------------------
  |  Branch (409:7): [True: 1.77k, False: 24.3M]
  ------------------
  410|       |
  411|  1.77k|			int profile;
  412|       |
  413|  1.77k|			DPRINTF(XVID_DEBUG_STARTCODE, "<visual_object_sequence>\n");
  ------------------
  |  |  197|  1.77k|#define XVID_DEBUG_STARTCODE (1<< 1)
  ------------------
  414|       |
  415|  1.77k|			BitstreamSkip(bs, 32);	/* visual_object_sequence_start_code */
  416|  1.77k|			profile = BitstreamGetBits(bs, 8);	/* profile_and_level_indication */
  417|       |
  418|  1.77k|			DPRINTF(XVID_DEBUG_HEADER, "profile_and_level_indication %i\n", profile);
  ------------------
  |  |  198|  1.77k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  419|       |
  420|  24.3M|		} else if (start_code == VISOBJSEQ_STOP_CODE) {
  ------------------
  |  |   43|  24.3M|#define VISOBJSEQ_STOP_CODE		0x000001b1	/* ??? */
  ------------------
  |  Branch (420:14): [True: 1.13k, False: 24.3M]
  ------------------
  421|       |
  422|  1.13k|			BitstreamSkip(bs, 32);	/* visual_object_sequence_stop_code */
  423|       |
  424|  1.13k|			DPRINTF(XVID_DEBUG_STARTCODE, "</visual_object_sequence>\n");
  ------------------
  |  |  197|  1.13k|#define XVID_DEBUG_STARTCODE (1<< 1)
  ------------------
  425|       |
  426|  24.3M|		} else if (start_code == VISOBJ_START_CODE) {
  ------------------
  |  |   47|  24.3M|#define VISOBJ_START_CODE		0x000001b5
  ------------------
  |  Branch (426:14): [True: 8.83k, False: 24.3M]
  ------------------
  427|       |
  428|  8.83k|			DPRINTF(XVID_DEBUG_STARTCODE, "<visual_object>\n");
  ------------------
  |  |  197|  8.83k|#define XVID_DEBUG_STARTCODE (1<< 1)
  ------------------
  429|       |
  430|  8.83k|			BitstreamSkip(bs, 32);	/* visual_object_start_code */
  431|  8.83k|			if (BitstreamGetBit(bs))	/* is_visual_object_identified */
  ------------------
  |  Branch (431:8): [True: 2.93k, False: 5.90k]
  ------------------
  432|  2.93k|			{
  433|  2.93k|				dec->ver_id = BitstreamGetBits(bs, 4);	/* visual_object_ver_id */
  434|  2.93k|				DPRINTF(XVID_DEBUG_HEADER,"visobj_ver_id %i\n", dec->ver_id);
  ------------------
  |  |  198|  2.93k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  435|  2.93k|				BitstreamSkip(bs, 3);	/* visual_object_priority */
  436|  5.90k|			} else {
  437|  5.90k|				dec->ver_id = 1;
  438|  5.90k|			}
  439|       |
  440|  8.83k|			if (BitstreamShowBits(bs, 4) != VISOBJ_TYPE_VIDEO)	/* visual_object_type */
  ------------------
  |  |   52|  8.83k|#define VISOBJ_TYPE_VIDEO				1
  ------------------
  |  Branch (440:8): [True: 4.53k, False: 4.30k]
  ------------------
  441|  4.53k|			{
  442|  4.53k|				DPRINTF(XVID_DEBUG_ERROR, "visual_object_type != video\n");
  ------------------
  |  |  196|  4.53k|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
  443|  4.53k|				return -1;
  444|  4.53k|			}
  445|  4.30k|			BitstreamSkip(bs, 4);
  446|       |
  447|       |			/* video_signal_type */
  448|       |
  449|  4.30k|			if (BitstreamGetBit(bs))	/* video_signal_type */
  ------------------
  |  Branch (449:8): [True: 3.80k, False: 504]
  ------------------
  450|  3.80k|			{
  451|  3.80k|				DPRINTF(XVID_DEBUG_HEADER,"+ video_signal_type\n");
  ------------------
  |  |  198|  3.80k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  452|  3.80k|				BitstreamSkip(bs, 3);	/* video_format */
  453|  3.80k|				BitstreamSkip(bs, 1);	/* video_range */
  454|  3.80k|				if (BitstreamGetBit(bs))	/* color_description */
  ------------------
  |  Branch (454:9): [True: 2.98k, False: 817]
  ------------------
  455|  2.98k|				{
  456|  2.98k|					DPRINTF(XVID_DEBUG_HEADER,"+ color_description");
  ------------------
  |  |  198|  2.98k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  457|  2.98k|					BitstreamSkip(bs, 8);	/* color_primaries */
  458|  2.98k|					BitstreamSkip(bs, 8);	/* transfer_characteristics */
  459|  2.98k|					BitstreamSkip(bs, 8);	/* matrix_coefficients */
  460|  2.98k|				}
  461|  3.80k|			}
  462|  24.3M|		} else if ((start_code & ~VIDOBJ_START_CODE_MASK) == VIDOBJ_START_CODE) {
  ------------------
  |  |  385|  24.3M|#define VIDOBJ_START_CODE_MASK		0x0000001f
  ------------------
              		} else if ((start_code & ~VIDOBJ_START_CODE_MASK) == VIDOBJ_START_CODE) {
  ------------------
  |  |   40|  24.3M|#define VIDOBJ_START_CODE		0x00000100	/* ..0x0000011f  */
  ------------------
  |  Branch (462:14): [True: 9.14k, False: 24.3M]
  ------------------
  463|       |
  464|  9.14k|			DPRINTF(XVID_DEBUG_STARTCODE, "<video_object>\n");
  ------------------
  |  |  197|  9.14k|#define XVID_DEBUG_STARTCODE (1<< 1)
  ------------------
  465|  9.14k|			DPRINTF(XVID_DEBUG_HEADER, "vo id %i\n", start_code & VIDOBJ_START_CODE_MASK);
  ------------------
  |  |  198|  9.14k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
              			DPRINTF(XVID_DEBUG_HEADER, "vo id %i\n", start_code & VIDOBJ_START_CODE_MASK);
  ------------------
  |  |  385|  9.14k|#define VIDOBJ_START_CODE_MASK		0x0000001f
  ------------------
  466|       |
  467|  9.14k|			BitstreamSkip(bs, 32);	/* video_object_start_code */
  468|       |
  469|  24.3M|		} else if ((start_code & ~VIDOBJLAY_START_CODE_MASK) == VIDOBJLAY_START_CODE) {
  ------------------
  |  |  386|  24.3M|#define VIDOBJLAY_START_CODE_MASK	0x0000000f
  ------------------
              		} else if ((start_code & ~VIDOBJLAY_START_CODE_MASK) == VIDOBJLAY_START_CODE) {
  ------------------
  |  |   41|  24.3M|#define VIDOBJLAY_START_CODE	0x00000120	/* ..0x0000012f */
  ------------------
  |  Branch (469:14): [True: 46.2k, False: 24.2M]
  ------------------
  470|  46.2k|			uint32_t width = 0, height = 0;
  471|       |
  472|  46.2k|			DPRINTF(XVID_DEBUG_STARTCODE, "<video_object_layer>\n");
  ------------------
  |  |  197|  46.2k|#define XVID_DEBUG_STARTCODE (1<< 1)
  ------------------
  473|  46.2k|			DPRINTF(XVID_DEBUG_HEADER, "vol id %i\n", start_code & VIDOBJLAY_START_CODE_MASK);
  ------------------
  |  |  198|  46.2k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
              			DPRINTF(XVID_DEBUG_HEADER, "vol id %i\n", start_code & VIDOBJLAY_START_CODE_MASK);
  ------------------
  |  |  386|  46.2k|#define VIDOBJLAY_START_CODE_MASK	0x0000000f
  ------------------
  474|       |
  475|  46.2k|			BitstreamSkip(bs, 32);	/* video_object_layer_start_code */
  476|  46.2k|			BitstreamSkip(bs, 1);	/* random_accessible_vol */
  477|       |
  478|  46.2k|            BitstreamSkip(bs, 8);   /* video_object_type_indication */
  479|       |
  480|  46.2k|			if (BitstreamGetBit(bs))	/* is_object_layer_identifier */
  ------------------
  |  Branch (480:8): [True: 15.4k, False: 30.8k]
  ------------------
  481|  15.4k|			{
  482|  15.4k|				DPRINTF(XVID_DEBUG_HEADER, "+ is_object_layer_identifier\n");
  ------------------
  |  |  198|  15.4k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  483|  15.4k|				vol_ver_id = BitstreamGetBits(bs, 4);	/* video_object_layer_verid */
  484|  15.4k|				DPRINTF(XVID_DEBUG_HEADER,"ver_id %i\n", vol_ver_id);
  ------------------
  |  |  198|  15.4k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  485|  15.4k|				BitstreamSkip(bs, 3);	/* video_object_layer_priority */
  486|  30.8k|			} else {
  487|  30.8k|				vol_ver_id = dec->ver_id;
  488|  30.8k|			}
  489|       |
  490|  46.2k|			dec->aspect_ratio = BitstreamGetBits(bs, 4);
  491|       |
  492|  46.2k|			if (dec->aspect_ratio == VIDOBJLAY_AR_EXTPAR)	/* aspect_ratio_info */
  ------------------
  |  |   84|  46.2k|#define VIDOBJLAY_AR_EXTPAR				15
  ------------------
  |  Branch (492:8): [True: 2.10k, False: 44.1k]
  ------------------
  493|  2.10k|			{
  494|  2.10k|				DPRINTF(XVID_DEBUG_HEADER, "+ aspect_ratio_info\n");
  ------------------
  |  |  198|  2.10k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  495|  2.10k|				dec->par_width = BitstreamGetBits(bs, 8);	/* par_width */
  496|  2.10k|				dec->par_height = BitstreamGetBits(bs, 8);	/* par_height */
  497|  2.10k|			}
  498|       |
  499|  46.2k|			if (BitstreamGetBit(bs))	/* vol_control_parameters */
  ------------------
  |  Branch (499:8): [True: 13.3k, False: 32.9k]
  ------------------
  500|  13.3k|			{
  501|  13.3k|				DPRINTF(XVID_DEBUG_HEADER, "+ vol_control_parameters\n");
  ------------------
  |  |  198|  13.3k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  502|  13.3k|				BitstreamSkip(bs, 2);	/* chroma_format */
  503|  13.3k|				dec->low_delay = BitstreamGetBit(bs);	/* low_delay */
  504|  13.3k|				DPRINTF(XVID_DEBUG_HEADER, "low_delay %i\n", dec->low_delay);
  ------------------
  |  |  198|  13.3k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  505|  13.3k|				if (BitstreamGetBit(bs))	/* vbv_parameters */
  ------------------
  |  Branch (505:9): [True: 1.30k, False: 12.0k]
  ------------------
  506|  1.30k|				{
  507|  1.30k|					unsigned int bitrate;
  508|  1.30k|					unsigned int buffer_size;
  509|  1.30k|					unsigned int occupancy;
  510|       |
  511|  1.30k|					DPRINTF(XVID_DEBUG_HEADER,"+ vbv_parameters\n");
  ------------------
  |  |  198|  1.30k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  512|       |
  513|  1.30k|					bitrate = BitstreamGetBits(bs,15) << 15;	/* first_half_bit_rate */
  514|  1.30k|					READ_MARKER();
  ------------------
  |  |   99|  1.30k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  515|  1.30k|					bitrate |= BitstreamGetBits(bs,15);		/* latter_half_bit_rate */
  516|  1.30k|					READ_MARKER();
  ------------------
  |  |   99|  1.30k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  517|       |
  518|  1.30k|					buffer_size = BitstreamGetBits(bs, 15) << 3;	/* first_half_vbv_buffer_size */
  519|  1.30k|					READ_MARKER();
  ------------------
  |  |   99|  1.30k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  520|  1.30k|					buffer_size |= BitstreamGetBits(bs, 3);		/* latter_half_vbv_buffer_size */
  521|       |
  522|  1.30k|					occupancy = BitstreamGetBits(bs, 11) << 15;	/* first_half_vbv_occupancy */
  523|  1.30k|					READ_MARKER();
  ------------------
  |  |   99|  1.30k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  524|  1.30k|					occupancy |= BitstreamGetBits(bs, 15);	/* latter_half_vbv_occupancy */
  525|  1.30k|					READ_MARKER();
  ------------------
  |  |   99|  1.30k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  526|       |
  527|  1.30k|					DPRINTF(XVID_DEBUG_HEADER,"bitrate %d (unit=400 bps)\n", bitrate);
  ------------------
  |  |  198|  1.30k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  528|  1.30k|					DPRINTF(XVID_DEBUG_HEADER,"buffer_size %d (unit=16384 bits)\n", buffer_size);
  ------------------
  |  |  198|  1.30k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  529|  1.30k|					DPRINTF(XVID_DEBUG_HEADER,"occupancy %d (unit=64 bits)\n", occupancy);
  ------------------
  |  |  198|  1.30k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  530|  1.30k|				}
  531|  32.9k|			}else{
  532|  32.9k|				dec->low_delay = dec->low_delay_default;
  533|  32.9k|			}
  534|       |
  535|  46.2k|			dec->shape = BitstreamGetBits(bs, 2);	/* video_object_layer_shape */
  536|       |
  537|  46.2k|			DPRINTF(XVID_DEBUG_HEADER, "shape %i\n", dec->shape);
  ------------------
  |  |  198|  46.2k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  538|  46.2k|			if (dec->shape != VIDOBJLAY_SHAPE_RECTANGULAR)
  ------------------
  |  |   87|  46.2k|#define VIDOBJLAY_SHAPE_RECTANGULAR		0
  ------------------
  |  Branch (538:8): [True: 18.4k, False: 27.7k]
  ------------------
  539|  18.4k|			{
  540|  18.4k|				DPRINTF(XVID_DEBUG_ERROR,"non-rectangular shapes are not supported\n");
  ------------------
  |  |  196|  18.4k|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
  541|  18.4k|			}
  542|       |
  543|  46.2k|			if (dec->shape == VIDOBJLAY_SHAPE_GRAYSCALE && vol_ver_id != 1) {
  ------------------
  |  |   90|  92.5k|#define VIDOBJLAY_SHAPE_GRAYSCALE		3
  ------------------
  |  Branch (543:8): [True: 6.77k, False: 39.5k]
  |  Branch (543:51): [True: 1.44k, False: 5.33k]
  ------------------
  544|  1.44k|				BitstreamSkip(bs, 4);	/* video_object_layer_shape_extension */
  545|  1.44k|			}
  546|       |
  547|  46.2k|			READ_MARKER();
  ------------------
  |  |   99|  46.2k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  548|       |
  549|       |			/********************** for decode B-frame time ***********************/
  550|  46.2k|			dec->time_inc_resolution = BitstreamGetBits(bs, 16);	/* vop_time_increment_resolution */
  551|  46.2k|			DPRINTF(XVID_DEBUG_HEADER,"vop_time_increment_resolution %i\n", dec->time_inc_resolution);
  ------------------
  |  |  198|  46.2k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  552|       |
  553|  46.2k|			if (dec->time_inc_resolution > 0) {
  ------------------
  |  Branch (553:8): [True: 33.4k, False: 12.8k]
  ------------------
  554|  33.4k|				dec->time_inc_bits = MAX(log2bin(dec->time_inc_resolution-1), 1);
  ------------------
  |  |  258|  33.4k|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (258:20): [True: 26.0k, False: 7.41k]
  |  |  ------------------
  ------------------
  555|  33.4k|			} else {
  556|       |				/* for "old" xvid compatibility, set time_inc_bits = 1 */
  557|  12.8k|				dec->time_inc_bits = 1;
  558|  12.8k|			}
  559|       |
  560|  46.2k|			READ_MARKER();
  ------------------
  |  |   99|  46.2k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  561|       |
  562|  46.2k|			if (BitstreamGetBit(bs))	/* fixed_vop_rate */
  ------------------
  |  Branch (562:8): [True: 10.2k, False: 36.0k]
  ------------------
  563|  10.2k|			{
  564|  10.2k|				DPRINTF(XVID_DEBUG_HEADER, "+ fixed_vop_rate\n");
  ------------------
  |  |  198|  10.2k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  565|  10.2k|				BitstreamSkip(bs, dec->time_inc_bits);	/* fixed_vop_time_increment */
  566|  10.2k|			}
  567|       |
  568|  46.2k|			if (dec->shape != VIDOBJLAY_SHAPE_BINARY_ONLY) {
  ------------------
  |  |   89|  46.2k|#define VIDOBJLAY_SHAPE_BINARY_ONLY		2
  ------------------
  |  Branch (568:8): [True: 42.4k, False: 3.88k]
  ------------------
  569|       |
  570|  42.4k|				if (dec->shape == VIDOBJLAY_SHAPE_RECTANGULAR) {
  ------------------
  |  |   87|  42.4k|#define VIDOBJLAY_SHAPE_RECTANGULAR		0
  ------------------
  |  Branch (570:9): [True: 27.7k, False: 14.6k]
  ------------------
  571|  27.7k|					READ_MARKER();
  ------------------
  |  |   99|  27.7k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  572|  27.7k|					width = BitstreamGetBits(bs, 13);	/* video_object_layer_width */
  573|  27.7k|					READ_MARKER();
  ------------------
  |  |   99|  27.7k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  574|  27.7k|					height = BitstreamGetBits(bs, 13);	/* video_object_layer_height */
  575|  27.7k|					READ_MARKER();
  ------------------
  |  |   99|  27.7k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  576|       |
  577|  27.7k|					DPRINTF(XVID_DEBUG_HEADER, "width %i\n", width);
  ------------------
  |  |  198|  27.7k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  578|  27.7k|					DPRINTF(XVID_DEBUG_HEADER, "height %i\n", height);
  ------------------
  |  |  198|  27.7k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  579|  27.7k|				}
  580|       |
  581|  42.4k|				dec->interlacing = BitstreamGetBit(bs);
  582|  42.4k|				DPRINTF(XVID_DEBUG_HEADER, "interlacing %i\n", dec->interlacing);
  ------------------
  |  |  198|  42.4k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  583|       |
  584|  42.4k|				if (!BitstreamGetBit(bs))	/* obmc_disable */
  ------------------
  |  Branch (584:9): [True: 28.7k, False: 13.6k]
  ------------------
  585|  28.7k|				{
  586|  28.7k|					DPRINTF(XVID_DEBUG_ERROR, "obmc_disabled==false not supported\n");
  ------------------
  |  |  196|  28.7k|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
  587|       |					/* TODO */
  588|       |					/* fucking divx4.02 has this enabled */
  589|  28.7k|				}
  590|       |
  591|  42.4k|				dec->sprite_enable = BitstreamGetBits(bs, (vol_ver_id == 1 ? 1 : 2));	/* sprite_enable */
  ------------------
  |  Branch (591:48): [True: 28.1k, False: 14.2k]
  ------------------
  592|       |
  593|  42.4k|				if (dec->sprite_enable == SPRITE_STATIC || dec->sprite_enable == SPRITE_GMC)
  ------------------
  |  |   94|  84.8k|#define SPRITE_STATIC	1
  ------------------
              				if (dec->sprite_enable == SPRITE_STATIC || dec->sprite_enable == SPRITE_GMC)
  ------------------
  |  |   95|  36.1k|#define SPRITE_GMC		2
  ------------------
  |  Branch (593:9): [True: 6.21k, False: 36.1k]
  |  Branch (593:48): [True: 4.29k, False: 31.8k]
  ------------------
  594|  10.5k|				{
  595|  10.5k|					int low_latency_sprite_enable;
  596|       |
  597|  10.5k|					if (dec->sprite_enable != SPRITE_GMC)
  ------------------
  |  |   95|  10.5k|#define SPRITE_GMC		2
  ------------------
  |  Branch (597:10): [True: 6.21k, False: 4.29k]
  ------------------
  598|  6.21k|					{
  599|  6.21k|						int sprite_width;
  600|  6.21k|						int sprite_height;
  601|  6.21k|						int sprite_left_coord;
  602|  6.21k|						int sprite_top_coord;
  603|  6.21k|						sprite_width = BitstreamGetBits(bs, 13);		/* sprite_width */
  604|  6.21k|						READ_MARKER();
  ------------------
  |  |   99|  6.21k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  605|  6.21k|						sprite_height = BitstreamGetBits(bs, 13);	/* sprite_height */
  606|  6.21k|						READ_MARKER();
  ------------------
  |  |   99|  6.21k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  607|  6.21k|						sprite_left_coord = BitstreamGetBits(bs, 13);	/* sprite_left_coordinate */
  608|  6.21k|						READ_MARKER();
  ------------------
  |  |   99|  6.21k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  609|  6.21k|						sprite_top_coord = BitstreamGetBits(bs, 13);	/* sprite_top_coordinate */
  610|  6.21k|						READ_MARKER();
  ------------------
  |  |   99|  6.21k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  611|  6.21k|					}
  612|  10.5k|					dec->sprite_warping_points = BitstreamGetBits(bs, 6);		/* no_of_sprite_warping_points */
  613|  10.5k|					dec->sprite_warping_accuracy = BitstreamGetBits(bs, 2);		/* sprite_warping_accuracy */
  614|  10.5k|					dec->sprite_brightness_change = BitstreamGetBits(bs, 1);		/* brightness_change */
  615|  10.5k|					if (dec->sprite_enable != SPRITE_GMC)
  ------------------
  |  |   95|  10.5k|#define SPRITE_GMC		2
  ------------------
  |  Branch (615:10): [True: 6.21k, False: 4.29k]
  ------------------
  616|  6.21k|					{
  617|  6.21k|						low_latency_sprite_enable = BitstreamGetBits(bs, 1);		/* low_latency_sprite_enable */
  618|  6.21k|					}
  619|  10.5k|				}
  620|       |
  621|  42.4k|				if (vol_ver_id != 1 &&
  ------------------
  |  Branch (621:9): [True: 14.2k, False: 28.1k]
  ------------------
  622|  42.4k|					dec->shape != VIDOBJLAY_SHAPE_RECTANGULAR) {
  ------------------
  |  |   87|  14.2k|#define VIDOBJLAY_SHAPE_RECTANGULAR		0
  ------------------
  |  Branch (622:6): [True: 2.97k, False: 11.2k]
  ------------------
  623|  2.97k|					BitstreamSkip(bs, 1);	/* sadct_disable */
  624|  2.97k|				}
  625|       |
  626|  42.4k|				if (BitstreamGetBit(bs))	/* not_8_bit */
  ------------------
  |  Branch (626:9): [True: 11.7k, False: 30.6k]
  ------------------
  627|  11.7k|				{
  628|  11.7k|					DPRINTF(XVID_DEBUG_HEADER, "not_8_bit==true (ignored)\n");
  ------------------
  |  |  198|  11.7k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  629|  11.7k|					dec->quant_bits = BitstreamGetBits(bs, 4);	/* quant_precision */
  630|  11.7k|					BitstreamSkip(bs, 4);	/* bits_per_pixel */
  631|  30.6k|				} else {
  632|  30.6k|					dec->quant_bits = 5;
  633|  30.6k|				}
  634|       |
  635|  42.4k|				if (dec->shape == VIDOBJLAY_SHAPE_GRAYSCALE) {
  ------------------
  |  |   90|  42.4k|#define VIDOBJLAY_SHAPE_GRAYSCALE		3
  ------------------
  |  Branch (635:9): [True: 6.77k, False: 35.6k]
  ------------------
  636|  6.77k|					BitstreamSkip(bs, 1);	/* no_gray_quant_update */
  637|  6.77k|					BitstreamSkip(bs, 1);	/* composition_method */
  638|  6.77k|					BitstreamSkip(bs, 1);	/* linear_composition */
  639|  6.77k|				}
  640|       |
  641|  42.4k|				dec->quant_type = BitstreamGetBit(bs);	/* quant_type */
  642|  42.4k|				DPRINTF(XVID_DEBUG_HEADER, "quant_type %i\n", dec->quant_type);
  ------------------
  |  |  198|  42.4k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  643|       |
  644|  42.4k|				if (dec->quant_type) {
  ------------------
  |  Branch (644:9): [True: 11.9k, False: 30.4k]
  ------------------
  645|  11.9k|					if (BitstreamGetBit(bs))	/* load_intra_quant_mat */
  ------------------
  |  Branch (645:10): [True: 3.17k, False: 8.76k]
  ------------------
  646|  3.17k|					{
  647|  3.17k|						uint8_t matrix[64];
  648|       |
  649|  3.17k|						DPRINTF(XVID_DEBUG_HEADER, "load_intra_quant_mat\n");
  ------------------
  |  |  198|  3.17k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  650|       |
  651|  3.17k|						bs_get_matrix(bs, matrix);
  652|  3.17k|						set_intra_matrix(dec->mpeg_quant_matrices, matrix);
  653|  3.17k|					} else
  654|  8.76k|						set_intra_matrix(dec->mpeg_quant_matrices, get_default_intra_matrix());
  655|       |
  656|  11.9k|					if (BitstreamGetBit(bs))	/* load_inter_quant_mat */
  ------------------
  |  Branch (656:10): [True: 1.34k, False: 10.6k]
  ------------------
  657|  1.34k|					{
  658|  1.34k|						uint8_t matrix[64];
  659|       |
  660|  1.34k|						DPRINTF(XVID_DEBUG_HEADER, "load_inter_quant_mat\n");
  ------------------
  |  |  198|  1.34k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  661|       |
  662|  1.34k|						bs_get_matrix(bs, matrix);
  663|  1.34k|						set_inter_matrix(dec->mpeg_quant_matrices, matrix);
  664|  1.34k|					} else
  665|  10.6k|						set_inter_matrix(dec->mpeg_quant_matrices, get_default_inter_matrix());
  666|       |
  667|  11.9k|					if (dec->shape == VIDOBJLAY_SHAPE_GRAYSCALE) {
  ------------------
  |  |   90|  11.9k|#define VIDOBJLAY_SHAPE_GRAYSCALE		3
  ------------------
  |  Branch (667:10): [True: 2.38k, False: 9.56k]
  ------------------
  668|  2.38k|						DPRINTF(XVID_DEBUG_ERROR, "greyscale matrix not supported\n");
  ------------------
  |  |  196|  2.38k|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
  669|  2.38k|						return -1;
  670|  2.38k|					}
  671|       |
  672|  11.9k|				}
  673|       |
  674|       |
  675|  40.0k|				if (vol_ver_id != 1) {
  ------------------
  |  Branch (675:9): [True: 14.0k, False: 25.9k]
  ------------------
  676|  14.0k|					dec->quarterpel = BitstreamGetBit(bs);	/* quarter_sample */
  677|  14.0k|					DPRINTF(XVID_DEBUG_HEADER,"quarterpel %i\n", dec->quarterpel);
  ------------------
  |  |  198|  14.0k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  678|  14.0k|				}
  679|  25.9k|				else
  680|  25.9k|					dec->quarterpel = 0;
  681|       |
  682|       |
  683|  40.0k|				dec->complexity_estimation_disable = BitstreamGetBit(bs);	/* complexity estimation disable */
  684|  40.0k|				if (!dec->complexity_estimation_disable)
  ------------------
  |  Branch (684:9): [True: 26.6k, False: 13.3k]
  ------------------
  685|  26.6k|				{
  686|  26.6k|					read_vol_complexity_estimation_header(bs, dec);
  687|  26.6k|				}
  688|       |
  689|  40.0k|				BitstreamSkip(bs, 1);	/* resync_marker_disable */
  690|       |
  691|  40.0k|				if (BitstreamGetBit(bs))	/* data_partitioned */
  ------------------
  |  Branch (691:9): [True: 12.2k, False: 27.7k]
  ------------------
  692|  12.2k|				{
  693|  12.2k|					DPRINTF(XVID_DEBUG_ERROR, "data_partitioned not supported\n");
  ------------------
  |  |  196|  12.2k|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
  694|  12.2k|					BitstreamSkip(bs, 1);	/* reversible_vlc */
  695|  12.2k|				}
  696|       |
  697|  40.0k|				if (vol_ver_id != 1) {
  ------------------
  |  Branch (697:9): [True: 14.0k, False: 25.9k]
  ------------------
  698|  14.0k|					dec->newpred_enable = BitstreamGetBit(bs);
  699|  14.0k|					if (dec->newpred_enable)	/* newpred_enable */
  ------------------
  |  Branch (699:10): [True: 4.03k, False: 9.99k]
  ------------------
  700|  4.03k|					{
  701|  4.03k|						DPRINTF(XVID_DEBUG_HEADER, "+ newpred_enable\n");
  ------------------
  |  |  198|  4.03k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  702|  4.03k|						BitstreamSkip(bs, 2);	/* requested_upstream_message_type */
  703|  4.03k|						BitstreamSkip(bs, 1);	/* newpred_segment_type */
  704|  4.03k|					}
  705|  14.0k|					dec->reduced_resolution_enable = BitstreamGetBit(bs);	/* reduced_resolution_vop_enable */
  706|  14.0k|					DPRINTF(XVID_DEBUG_HEADER, "reduced_resolution_enable %i\n", dec->reduced_resolution_enable);
  ------------------
  |  |  198|  14.0k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  707|  14.0k|				}
  708|  25.9k|				else
  709|  25.9k|				{
  710|  25.9k|					dec->newpred_enable = 0;
  711|  25.9k|					dec->reduced_resolution_enable = 0;
  712|  25.9k|				}
  713|       |
  714|  40.0k|				dec->scalability = BitstreamGetBit(bs);	/* scalability */
  715|  40.0k|				if (dec->scalability)
  ------------------
  |  Branch (715:9): [True: 8.51k, False: 31.5k]
  ------------------
  716|  8.51k|				{
  717|  8.51k|					DPRINTF(XVID_DEBUG_ERROR, "scalability not supported\n");
  ------------------
  |  |  196|  8.51k|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
  718|  8.51k|					BitstreamSkip(bs, 1);	/* hierarchy_type */
  719|  8.51k|					BitstreamSkip(bs, 4);	/* ref_layer_id */
  720|  8.51k|					BitstreamSkip(bs, 1);	/* ref_layer_sampling_direc */
  721|  8.51k|					BitstreamSkip(bs, 5);	/* hor_sampling_factor_n */
  722|  8.51k|					BitstreamSkip(bs, 5);	/* hor_sampling_factor_m */
  723|  8.51k|					BitstreamSkip(bs, 5);	/* vert_sampling_factor_n */
  724|  8.51k|					BitstreamSkip(bs, 5);	/* vert_sampling_factor_m */
  725|  8.51k|					BitstreamSkip(bs, 1);	/* enhancement_type */
  726|  8.51k|					if(dec->shape == VIDOBJLAY_SHAPE_BINARY /* && hierarchy_type==0 */) {
  ------------------
  |  |   88|  8.51k|#define VIDOBJLAY_SHAPE_BINARY			1
  ------------------
  |  Branch (726:9): [True: 1.61k, False: 6.90k]
  ------------------
  727|  1.61k|						BitstreamSkip(bs, 1);	/* use_ref_shape */
  728|  1.61k|						BitstreamSkip(bs, 1);	/* use_ref_texture */
  729|  1.61k|						BitstreamSkip(bs, 5);	/* shape_hor_sampling_factor_n */
  730|  1.61k|						BitstreamSkip(bs, 5);	/* shape_hor_sampling_factor_m */
  731|  1.61k|						BitstreamSkip(bs, 5);	/* shape_vert_sampling_factor_n */
  732|  1.61k|						BitstreamSkip(bs, 5);	/* shape_vert_sampling_factor_m */
  733|  1.61k|					}
  734|  8.51k|					return -1;
  735|  8.51k|				}
  736|  40.0k|			} else				/* dec->shape == BINARY_ONLY */
  737|  3.88k|			{
  738|  3.88k|				if (vol_ver_id != 1) {
  ------------------
  |  Branch (738:9): [True: 2.04k, False: 1.83k]
  ------------------
  739|  2.04k|					dec->scalability = BitstreamGetBit(bs); /* scalability */
  740|  2.04k|					if (dec->scalability)
  ------------------
  |  Branch (740:10): [True: 304, False: 1.74k]
  ------------------
  741|    304|					{
  742|    304|						DPRINTF(XVID_DEBUG_ERROR, "scalability not supported\n");
  ------------------
  |  |  196|    304|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
  743|    304|						BitstreamSkip(bs, 4);	/* ref_layer_id */
  744|    304|						BitstreamSkip(bs, 5);	/* hor_sampling_factor_n */
  745|    304|						BitstreamSkip(bs, 5);	/* hor_sampling_factor_m */
  746|    304|						BitstreamSkip(bs, 5);	/* vert_sampling_factor_n */
  747|    304|						BitstreamSkip(bs, 5);	/* vert_sampling_factor_m */
  748|    304|						return -1;
  749|    304|					}
  750|  2.04k|				}
  751|  3.57k|				BitstreamSkip(bs, 1);	/* resync_marker_disable */
  752|       |
  753|  3.57k|			}
  754|       |
  755|  35.0k|			if (((width > 0) && (height > 0)) && (dec->width != width || dec->height != height))
  ------------------
  |  Branch (755:9): [True: 16.5k, False: 18.4k]
  |  Branch (755:24): [True: 14.7k, False: 1.82k]
  |  Branch (755:42): [True: 12.6k, False: 2.09k]
  |  Branch (755:65): [True: 441, False: 1.65k]
  ------------------
  756|  13.1k|			{
  757|  13.1k|				if (dec->fixed_dimensions)
  ------------------
  |  Branch (757:9): [True: 0, False: 13.1k]
  ------------------
  758|      0|				{
  759|      0|					DPRINTF(XVID_DEBUG_ERROR, "decoder width/height does not match bitstream\n");
  ------------------
  |  |  196|      0|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
  760|      0|					return -1;
  761|      0|				}
  762|  13.1k|				dec->width = width;
  763|  13.1k|				dec->height = height;
  764|  13.1k|				return -3;
  765|  13.1k|			}
  766|       |
  767|  21.9k|			return -2;	/* VOL */
  768|       |
  769|  24.2M|		} else if (start_code == GRPOFVOP_START_CODE) {
  ------------------
  |  |   45|  24.2M|#define GRPOFVOP_START_CODE		0x000001b3
  ------------------
  |  Branch (769:14): [True: 12.5k, False: 24.2M]
  ------------------
  770|       |
  771|  12.5k|			DPRINTF(XVID_DEBUG_STARTCODE, "<group_of_vop>\n");
  ------------------
  |  |  197|  12.5k|#define XVID_DEBUG_STARTCODE (1<< 1)
  ------------------
  772|       |
  773|  12.5k|			BitstreamSkip(bs, 32);
  774|  12.5k|			{
  775|  12.5k|				int hours, minutes, seconds;
  776|       |
  777|  12.5k|				hours = BitstreamGetBits(bs, 5);
  778|  12.5k|				minutes = BitstreamGetBits(bs, 6);
  779|  12.5k|				READ_MARKER();
  ------------------
  |  |   99|  12.5k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  780|  12.5k|				seconds = BitstreamGetBits(bs, 6);
  781|       |
  782|  12.5k|				DPRINTF(XVID_DEBUG_HEADER, "time %ih%im%is\n", hours,minutes,seconds);
  ------------------
  |  |  198|  12.5k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  783|  12.5k|			}
  784|  12.5k|			BitstreamSkip(bs, 1);	/* closed_gov */
  785|  12.5k|			BitstreamSkip(bs, 1);	/* broken_link */
  786|       |
  787|  24.2M|		} else if (start_code == VOP_START_CODE) {
  ------------------
  |  |   48|  24.2M|#define VOP_START_CODE			0x000001b6
  ------------------
  |  Branch (787:14): [True: 2.05M, False: 22.1M]
  ------------------
  788|       |
  789|  2.05M|			DPRINTF(XVID_DEBUG_STARTCODE, "<vop>\n");
  ------------------
  |  |  197|  2.05M|#define XVID_DEBUG_STARTCODE (1<< 1)
  ------------------
  790|       |
  791|  2.05M|			BitstreamSkip(bs, 32);	/* vop_start_code */
  792|       |
  793|  2.05M|			coding_type = BitstreamGetBits(bs, 2);	/* vop_coding_type */
  794|  2.05M|			DPRINTF(XVID_DEBUG_HEADER, "coding_type %i\n", coding_type);
  ------------------
  |  |  198|  2.05M|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  795|       |
  796|       |			/*********************** for decode B-frame time ***********************/
  797|  13.7M|			while (BitstreamGetBit(bs) != 0)	/* time_base */
  ------------------
  |  Branch (797:11): [True: 11.6M, False: 2.05M]
  ------------------
  798|  11.6M|				time_incr++;
  799|       |
  800|  2.05M|			READ_MARKER();
  ------------------
  |  |   99|  2.05M|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  801|       |
  802|  2.05M|			if (dec->time_inc_bits) {
  ------------------
  |  Branch (802:8): [True: 2.05M, False: 4.33k]
  ------------------
  803|  2.05M|				time_increment = (BitstreamGetBits(bs, dec->time_inc_bits));	/* vop_time_increment */
  804|  2.05M|			}
  805|       |
  806|  2.05M|			DPRINTF(XVID_DEBUG_HEADER, "time_base %i\n", time_incr);
  ------------------
  |  |  198|  2.05M|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  807|  2.05M|			DPRINTF(XVID_DEBUG_HEADER, "time_increment %i\n", time_increment);
  ------------------
  |  |  198|  2.05M|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  808|       |
  809|  2.05M|			DPRINTF(XVID_DEBUG_TIMECODE, "%c %i:%i\n",
  ------------------
  |  |  199|  2.05M|#define XVID_DEBUG_TIMECODE  (1<< 3)
  ------------------
  810|  2.05M|				coding_type == I_VOP ? 'I' : coding_type == P_VOP ? 'P' : coding_type == B_VOP ? 'B' : 'S',
  ------------------
  |  |  104|  2.05M|#define I_VOP	0
  ------------------
              				coding_type == I_VOP ? 'I' : coding_type == P_VOP ? 'P' : coding_type == B_VOP ? 'B' : 'S',
  ------------------
  |  |  105|  2.00M|#define P_VOP	1
  ------------------
              				coding_type == I_VOP ? 'I' : coding_type == P_VOP ? 'P' : coding_type == B_VOP ? 'B' : 'S',
  ------------------
  |  |  106|  1.94M|#define B_VOP	2
  ------------------
  |  Branch (810:5): [True: 52.0k, False: 2.00M]
  |  Branch (810:34): [True: 61.7k, False: 1.94M]
  |  Branch (810:63): [True: 217k, False: 1.72M]
  ------------------
  811|  2.05M|				time_incr, time_increment);
  812|       |
  813|  2.05M|			if (coding_type != B_VOP) {
  ------------------
  |  |  106|  2.05M|#define B_VOP	2
  ------------------
  |  Branch (813:8): [True: 1.84M, False: 217k]
  ------------------
  814|  1.84M|				dec->last_time_base = dec->time_base;
  815|  1.84M|				dec->time_base += time_incr;
  816|  1.84M|				dec->time = dec->time_base*dec->time_inc_resolution + time_increment;
  817|  1.84M|				dec->time_pp = (int32_t)(dec->time - dec->last_non_b_time);
  818|  1.84M|                dec->last_non_b_time = dec->time;
  819|  1.84M|			} else {
  820|   217k|                dec->time = (dec->last_time_base + time_incr)*dec->time_inc_resolution + time_increment;
  821|   217k|				dec->time_bp = dec->time_pp - (int32_t)(dec->last_non_b_time - dec->time);
  822|   217k|			}
  823|  2.05M|            if (dec->time_pp <= 0) dec->time_pp = 1;
  ------------------
  |  Branch (823:17): [True: 768k, False: 1.29M]
  ------------------
  824|  2.05M|			DPRINTF(XVID_DEBUG_HEADER,"time_pp=%i\n", dec->time_pp);
  ------------------
  |  |  198|  2.05M|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  825|  2.05M|			DPRINTF(XVID_DEBUG_HEADER,"time_bp=%i\n", dec->time_bp);
  ------------------
  |  |  198|  2.05M|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  826|       |
  827|  2.05M|			READ_MARKER();
  ------------------
  |  |   99|  2.05M|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  828|       |
  829|  2.05M|			if (!BitstreamGetBit(bs))	/* vop_coded */
  ------------------
  |  Branch (829:8): [True: 225k, False: 1.83M]
  ------------------
  830|   225k|			{
  831|   225k|				DPRINTF(XVID_DEBUG_HEADER, "vop_coded==false\n");
  ------------------
  |  |  198|   225k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  832|   225k|				return N_VOP;
  ------------------
  |  |  108|   225k|#define N_VOP	4
  ------------------
  833|   225k|			}
  834|       |
  835|  1.83M|			if (dec->newpred_enable)
  ------------------
  |  Branch (835:8): [True: 642k, False: 1.19M]
  ------------------
  836|   642k|			{
  837|   642k|				int vop_id;
  838|   642k|				int vop_id_for_prediction;
  839|       |
  840|   642k|				vop_id = BitstreamGetBits(bs, MIN(dec->time_inc_bits + 3, 15));
  ------------------
  |  |  255|   642k|#define MIN(X, Y) ((X)<(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (255:20): [True: 480k, False: 162k]
  |  |  ------------------
  ------------------
  841|   642k|				DPRINTF(XVID_DEBUG_HEADER, "vop_id %i\n", vop_id);
  ------------------
  |  |  198|   642k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  842|   642k|				if (BitstreamGetBit(bs))	/* vop_id_for_prediction_indication */
  ------------------
  |  Branch (842:9): [True: 60.8k, False: 581k]
  ------------------
  843|  60.8k|				{
  844|  60.8k|					vop_id_for_prediction = BitstreamGetBits(bs, MIN(dec->time_inc_bits + 3, 15));
  ------------------
  |  |  255|  60.8k|#define MIN(X, Y) ((X)<(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (255:20): [True: 56.6k, False: 4.17k]
  |  |  ------------------
  ------------------
  845|  60.8k|					DPRINTF(XVID_DEBUG_HEADER, "vop_id_for_prediction %i\n", vop_id_for_prediction);
  ------------------
  |  |  198|  60.8k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  846|  60.8k|				}
  847|   642k|				READ_MARKER();
  ------------------
  |  |   99|   642k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  848|   642k|			}
  849|       |
  850|       |
  851|       |
  852|       |			/* fix a little bug by MinChen <chenm002@163.com> */
  853|  1.83M|			if ((dec->shape != VIDOBJLAY_SHAPE_BINARY_ONLY) &&
  ------------------
  |  |   89|  1.83M|#define VIDOBJLAY_SHAPE_BINARY_ONLY		2
  ------------------
  |  Branch (853:8): [True: 1.74M, False: 93.0k]
  ------------------
  854|  1.83M|				( (coding_type == P_VOP) || (coding_type == S_VOP && dec->sprite_enable == SPRITE_GMC) ) ) {
  ------------------
  |  |  105|  1.74M|#define P_VOP	1
  ------------------
              				( (coding_type == P_VOP) || (coding_type == S_VOP && dec->sprite_enable == SPRITE_GMC) ) ) {
  ------------------
  |  |  107|  3.39M|#define S_VOP	3
  ------------------
              				( (coding_type == P_VOP) || (coding_type == S_VOP && dec->sprite_enable == SPRITE_GMC) ) ) {
  ------------------
  |  |   95|  1.52M|#define SPRITE_GMC		2
  ------------------
  |  Branch (854:7): [True: 41.1k, False: 1.69M]
  |  Branch (854:34): [True: 1.52M, False: 172k]
  |  Branch (854:58): [True: 359k, False: 1.16M]
  ------------------
  855|   400k|				*rounding = BitstreamGetBit(bs);	/* rounding_type */
  856|   400k|				DPRINTF(XVID_DEBUG_HEADER, "rounding %i\n", *rounding);
  ------------------
  |  |  198|   400k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  857|   400k|			}
  858|       |
  859|  1.83M|			if (dec->reduced_resolution_enable &&
  ------------------
  |  Branch (859:8): [True: 719k, False: 1.11M]
  ------------------
  860|  1.83M|				dec->shape == VIDOBJLAY_SHAPE_RECTANGULAR &&
  ------------------
  |  |   87|  2.55M|#define VIDOBJLAY_SHAPE_RECTANGULAR		0
  ------------------
  |  Branch (860:5): [True: 136k, False: 582k]
  ------------------
  861|  1.83M|				(coding_type == P_VOP || coding_type == I_VOP)) {
  ------------------
  |  |  105|   273k|#define P_VOP	1
  ------------------
              				(coding_type == P_VOP || coding_type == I_VOP)) {
  ------------------
  |  |  104|   134k|#define I_VOP	0
  ------------------
  |  Branch (861:6): [True: 1.90k, False: 134k]
  |  Branch (861:30): [True: 2.44k, False: 132k]
  ------------------
  862|       |
  863|  4.35k|				if (BitstreamGetBit(bs)) {
  ------------------
  |  Branch (863:9): [True: 1.27k, False: 3.08k]
  ------------------
  864|  1.27k|					DPRINTF(XVID_DEBUG_ERROR, "RRV not supported (anymore)\n");
  ------------------
  |  |  196|  1.27k|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
  865|  1.27k|				}
  866|  4.35k|			}
  867|       |
  868|  1.83M|			if (dec->shape != VIDOBJLAY_SHAPE_RECTANGULAR) {
  ------------------
  |  |   87|  1.83M|#define VIDOBJLAY_SHAPE_RECTANGULAR		0
  ------------------
  |  Branch (868:8): [True: 948k, False: 885k]
  ------------------
  869|   948k|				if(!(dec->sprite_enable == SPRITE_STATIC && coding_type == I_VOP)) {
  ------------------
  |  |   94|  1.89M|#define SPRITE_STATIC	1
  ------------------
              				if(!(dec->sprite_enable == SPRITE_STATIC && coding_type == I_VOP)) {
  ------------------
  |  |  104|   320k|#define I_VOP	0
  ------------------
  |  Branch (869:10): [True: 320k, False: 628k]
  |  Branch (869:49): [True: 3.60k, False: 316k]
  ------------------
  870|       |
  871|   945k|					uint32_t width, height;
  872|   945k|					uint32_t horiz_mc_ref, vert_mc_ref;
  873|       |
  874|   945k|					width = BitstreamGetBits(bs, 13);
  875|   945k|					READ_MARKER();
  ------------------
  |  |   99|   945k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  876|   945k|					height = BitstreamGetBits(bs, 13);
  877|   945k|					READ_MARKER();
  ------------------
  |  |   99|   945k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  878|   945k|					horiz_mc_ref = BitstreamGetBits(bs, 13);
  879|   945k|					READ_MARKER();
  ------------------
  |  |   99|   945k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  880|   945k|					vert_mc_ref = BitstreamGetBits(bs, 13);
  881|   945k|					READ_MARKER();
  ------------------
  |  |   99|   945k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  882|       |
  883|   945k|					DPRINTF(XVID_DEBUG_HEADER, "width %i\n", width);
  ------------------
  |  |  198|   945k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  884|   945k|					DPRINTF(XVID_DEBUG_HEADER, "height %i\n", height);
  ------------------
  |  |  198|   945k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  885|   945k|					DPRINTF(XVID_DEBUG_HEADER, "horiz_mc_ref %i\n", horiz_mc_ref);
  ------------------
  |  |  198|   945k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  886|   945k|					DPRINTF(XVID_DEBUG_HEADER, "vert_mc_ref %i\n", vert_mc_ref);
  ------------------
  |  |  198|   945k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  887|   945k|				}
  888|       |
  889|   948k|				BitstreamSkip(bs, 1);	/* change_conv_ratio_disable */
  890|   948k|				if (BitstreamGetBit(bs))	/* vop_constant_alpha */
  ------------------
  |  Branch (890:9): [True: 608k, False: 340k]
  ------------------
  891|   608k|				{
  892|   608k|					BitstreamSkip(bs, 8);	/* vop_constant_alpha_value */
  893|   608k|				}
  894|   948k|			}
  895|       |
  896|  1.83M|			if (dec->shape != VIDOBJLAY_SHAPE_BINARY_ONLY) {
  ------------------
  |  |   89|  1.83M|#define VIDOBJLAY_SHAPE_BINARY_ONLY		2
  ------------------
  |  Branch (896:8): [True: 1.74M, False: 93.0k]
  ------------------
  897|       |
  898|  1.74M|				if (!dec->complexity_estimation_disable)
  ------------------
  |  Branch (898:9): [True: 1.55M, False: 186k]
  ------------------
  899|  1.55M|				{
  900|  1.55M|					read_vop_complexity_estimation_header(bs, dec, coding_type);
  901|  1.55M|				}
  902|       |
  903|       |				/* intra_dc_vlc_threshold */
  904|  1.74M|				*intra_dc_threshold =
  905|  1.74M|					intra_dc_threshold_table[BitstreamGetBits(bs, 3)];
  906|       |
  907|  1.74M|				dec->top_field_first = 0;
  908|  1.74M|				dec->alternate_vertical_scan = 0;
  909|       |
  910|  1.74M|				if (dec->interlacing) {
  ------------------
  |  Branch (910:9): [True: 897k, False: 843k]
  ------------------
  911|   897k|					dec->top_field_first = BitstreamGetBit(bs);
  912|   897k|					DPRINTF(XVID_DEBUG_HEADER, "interlace top_field_first %i\n", dec->top_field_first);
  ------------------
  |  |  198|   897k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  913|   897k|					dec->alternate_vertical_scan = BitstreamGetBit(bs);
  914|   897k|					DPRINTF(XVID_DEBUG_HEADER, "interlace alternate_vertical_scan %i\n", dec->alternate_vertical_scan);
  ------------------
  |  |  198|   897k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  915|       |
  916|   897k|				}
  917|  1.74M|			}
  918|       |
  919|  1.83M|			if ((dec->sprite_enable == SPRITE_STATIC || dec->sprite_enable== SPRITE_GMC) && coding_type == S_VOP) {
  ------------------
  |  |   94|  3.66M|#define SPRITE_STATIC	1
  ------------------
              			if ((dec->sprite_enable == SPRITE_STATIC || dec->sprite_enable== SPRITE_GMC) && coding_type == S_VOP) {
  ------------------
  |  |   95|  1.06M|#define SPRITE_GMC		2
  ------------------
              			if ((dec->sprite_enable == SPRITE_STATIC || dec->sprite_enable== SPRITE_GMC) && coding_type == S_VOP) {
  ------------------
  |  |  107|  1.15M|#define S_VOP	3
  ------------------
  |  Branch (919:9): [True: 773k, False: 1.06M]
  |  Branch (919:48): [True: 383k, False: 677k]
  |  Branch (919:84): [True: 1.04M, False: 114k]
  ------------------
  920|       |
  921|  1.04M|				int i;
  922|       |
  923|  3.02M|				for (i = 0 ; i < MIN(4, dec->sprite_warping_points); i++)
  ------------------
  |  |  255|  3.02M|#define MIN(X, Y) ((X)<(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (255:20): [True: 2.42M, False: 597k]
  |  |  ------------------
  ------------------
  |  Branch (923:18): [True: 1.98M, False: 1.04M]
  ------------------
  924|  1.98M|				{
  925|  1.98M|					int length;
  926|  1.98M|					int x = 0, y = 0;
  927|       |
  928|       |					/* sprite code borowed from ffmpeg; thx Michael Niedermayer <michaelni@gmx.at> */
  929|  1.98M|					length = bs_get_spritetrajectory(bs);
  930|  1.98M|					if(length){
  ------------------
  |  Branch (930:9): [True: 1.31M, False: 672k]
  ------------------
  931|  1.31M|						x= BitstreamGetBits(bs, length);
  932|  1.31M|						if ((x >> (length - 1)) == 0) /* if MSB not set it is negative*/
  ------------------
  |  Branch (932:11): [True: 991k, False: 318k]
  ------------------
  933|   991k|							x = - (x ^ ((1 << length) - 1));
  934|  1.31M|					}
  935|  1.98M|					READ_MARKER();
  ------------------
  |  |   99|  1.98M|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  936|       |
  937|  1.98M|					length = bs_get_spritetrajectory(bs);
  938|  1.98M|					if(length){
  ------------------
  |  Branch (938:9): [True: 1.29M, False: 692k]
  ------------------
  939|  1.29M|						y = BitstreamGetBits(bs, length);
  940|  1.29M|						if ((y >> (length - 1)) == 0) /* if MSB not set it is negative*/
  ------------------
  |  Branch (940:11): [True: 996k, False: 294k]
  ------------------
  941|   996k|							y = - (y ^ ((1 << length) - 1));
  942|  1.29M|					}
  943|  1.98M|					READ_MARKER();
  ------------------
  |  |   99|  1.98M|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  944|       |
  945|  1.98M|					gmc_warp->duv[i].x = x;
  946|  1.98M|					gmc_warp->duv[i].y = y;
  947|       |
  948|  1.98M|					DPRINTF(XVID_DEBUG_HEADER,"sprite_warping_point[%i] xy=(%i,%i)\n", i, x, y);
  ------------------
  |  |  198|  1.98M|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  949|  1.98M|				}
  950|       |
  951|  1.04M|				if (dec->sprite_brightness_change)
  ------------------
  |  Branch (951:9): [True: 209k, False: 833k]
  ------------------
  952|   209k|				{
  953|       |					/* XXX: brightness_change_factor() */
  954|   209k|				}
  955|  1.04M|				if (dec->sprite_enable == SPRITE_STATIC)
  ------------------
  |  |   94|  1.04M|#define SPRITE_STATIC	1
  ------------------
  |  Branch (955:9): [True: 679k, False: 362k]
  ------------------
  956|   679k|				{
  957|       |					/* XXX: todo */
  958|   679k|				}
  959|       |
  960|  1.04M|			}
  961|       |
  962|  1.83M|			if ((*quant = BitstreamGetBits(bs, dec->quant_bits)) < 1)	/* vop_quant */
  ------------------
  |  Branch (962:8): [True: 780k, False: 1.05M]
  ------------------
  963|   780k|				*quant = 1;
  964|  1.83M|			DPRINTF(XVID_DEBUG_HEADER, "quant %i\n", *quant);
  ------------------
  |  |  198|  1.83M|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  965|       |
  966|  1.83M|			if (coding_type != I_VOP) {
  ------------------
  |  |  104|  1.83M|#define I_VOP	0
  ------------------
  |  Branch (966:8): [True: 1.80M, False: 26.5k]
  ------------------
  967|  1.80M|				*fcode_forward = BitstreamGetBits(bs, 3);	/* fcode_forward */
  968|  1.80M|				DPRINTF(XVID_DEBUG_HEADER, "fcode_forward %i\n", *fcode_forward);
  ------------------
  |  |  198|  1.80M|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  969|  1.80M|			}
  970|       |
  971|  1.83M|			if (coding_type == B_VOP) {
  ------------------
  |  |  106|  1.83M|#define B_VOP	2
  ------------------
  |  Branch (971:8): [True: 148k, False: 1.68M]
  ------------------
  972|   148k|				*fcode_backward = BitstreamGetBits(bs, 3);	/* fcode_backward */
  973|   148k|				DPRINTF(XVID_DEBUG_HEADER, "fcode_backward %i\n", *fcode_backward);
  ------------------
  |  |  198|   148k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  974|   148k|			}
  975|  1.83M|			if (!dec->scalability) {
  ------------------
  |  Branch (975:8): [True: 689k, False: 1.14M]
  ------------------
  976|   689k|				if ((dec->shape != VIDOBJLAY_SHAPE_RECTANGULAR) &&
  ------------------
  |  |   87|   689k|#define VIDOBJLAY_SHAPE_RECTANGULAR		0
  ------------------
  |  Branch (976:9): [True: 115k, False: 573k]
  ------------------
  977|   689k|					(coding_type != I_VOP)) {
  ------------------
  |  |  104|   115k|#define I_VOP	0
  ------------------
  |  Branch (977:6): [True: 113k, False: 1.80k]
  ------------------
  978|   113k|					BitstreamSkip(bs, 1);	/* vop_shape_coding_type */
  979|   113k|				}
  980|   689k|			}
  981|  1.83M|			return coding_type;
  982|       |
  983|  22.1M|		} else if (start_code == USERDATA_START_CODE) {
  ------------------
  |  |   44|  22.1M|#define USERDATA_START_CODE		0x000001b2
  ------------------
  |  Branch (983:14): [True: 8.03k, False: 22.1M]
  ------------------
  984|  8.03k|			char tmp[256];
  985|  8.03k|			int i, version = 0, build = 0;
  986|  8.03k|			char packed;
  987|       |
  988|  8.03k|			BitstreamSkip(bs, 32);	/* user_data_start_code */
  989|       |
  990|  8.03k|			memset(tmp, 0, 256);
  991|  8.03k|			tmp[0] = BitstreamShowBits(bs, 8);
  992|       |
  993|  72.3k|			for(i = 1; i < 255; i++){
  ------------------
  |  Branch (993:15): [True: 72.3k, False: 67]
  ------------------
  994|  72.3k|				tmp[i] = (BitstreamShowBits(bs, 16) & 0xFF);
  995|       |
  996|  72.3k|				if(tmp[i] == 0)
  ------------------
  |  Branch (996:8): [True: 7.96k, False: 64.3k]
  ------------------
  997|  7.96k|					break;
  998|       |
  999|  64.3k|				BitstreamSkip(bs, 8);
 1000|  64.3k|			}
 1001|       |
 1002|  8.03k|			DPRINTF(XVID_DEBUG_STARTCODE, "<user_data>: %s\n", tmp);
  ------------------
  |  |  197|  8.03k|#define XVID_DEBUG_STARTCODE (1<< 1)
  ------------------
 1003|       |
 1004|       |			/* read xvid bitstream version */
 1005|  8.03k|			if(strncmp(tmp, "XviD", 4) == 0) {
  ------------------
  |  Branch (1005:7): [True: 3.23k, False: 4.80k]
  ------------------
 1006|  3.23k|				if (tmp[strlen(tmp)-1] == 'C') {				
  ------------------
  |  Branch (1006:9): [True: 325, False: 2.90k]
  ------------------
 1007|    325|					sscanf(tmp, "XviD%dC", &dec->bs_version);
 1008|    325|					dec->cartoon_mode = 1;
 1009|    325|				}
 1010|  2.90k|				else
 1011|  2.90k|					sscanf(tmp, "XviD%d", &dec->bs_version);
 1012|       |
 1013|  3.23k|				DPRINTF(XVID_DEBUG_HEADER, "xvid bitstream version=%i\n", dec->bs_version);
  ------------------
  |  |  198|  3.23k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
 1014|  3.23k|			}
 1015|       |
 1016|       |		    /* divx detection */
 1017|  8.03k|			i = sscanf(tmp, "DivX%dBuild%d%c", &version, &build, &packed);
 1018|  8.03k|			if (i < 2)
  ------------------
  |  Branch (1018:8): [True: 8.03k, False: 0]
  ------------------
 1019|  8.03k|				i = sscanf(tmp, "DivX%db%d%c", &version, &build, &packed);
 1020|       |
 1021|  8.03k|			if (i >= 2)
  ------------------
  |  Branch (1021:8): [True: 2.65k, False: 5.37k]
  ------------------
 1022|  2.65k|			{
 1023|  2.65k|				dec->packed_mode = (i == 3 && packed == 'p');
  ------------------
  |  Branch (1023:25): [True: 644, False: 2.01k]
  |  Branch (1023:35): [True: 35, False: 609]
  ------------------
 1024|  2.65k|				DPRINTF(XVID_DEBUG_HEADER, "divx version=%i, build=%i packed=%i\n",
  ------------------
  |  |  198|  2.65k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
 1025|  2.65k|						version, build, dec->packed_mode);
 1026|  2.65k|			}
 1027|       |
 1028|  8.03k|			if ((dec->bs_version == 0) && (build > 0) &&
  ------------------
  |  Branch (1028:8): [True: 5.06k, False: 2.96k]
  |  Branch (1028:34): [True: 2.05k, False: 3.01k]
  ------------------
 1029|  8.03k|				(build != 1393)) { /* non-xvid stream with xvid fourcc */
  ------------------
  |  Branch (1029:5): [True: 1.97k, False: 86]
  ------------------
 1030|  1.97k|				dec->bs_version = 0xffff;
 1031|  1.97k|			}
 1032|       |
 1033|  8.03k|		} else					/* start_code == ? */
 1034|  22.1M|		{
 1035|  22.1M|			if (BitstreamShowBits(bs, 24) == 0x000001) {
  ------------------
  |  Branch (1035:8): [True: 383k, False: 21.7M]
  ------------------
 1036|   383k|				DPRINTF(XVID_DEBUG_STARTCODE, "<unknown: %x>\n", BitstreamShowBits(bs, 32));
  ------------------
  |  |  197|   383k|#define XVID_DEBUG_STARTCODE (1<< 1)
  ------------------
 1037|   383k|			}
 1038|  22.1M|			BitstreamSkip(bs, 8);
 1039|  22.1M|		}
 1040|  24.3M|	}
 1041|       |
 1042|       |#if 0
 1043|       |	DPRINTF("*** WARNING: no vop_start_code found");
 1044|       |#endif
 1045|  3.13k|	return -1;					/* ignore it */
 1046|  2.11M|}
bitstream.c:log2bin:
   39|  91.0k|{
   40|  91.0k|  int n = 0;
   41|  91.0k|  if (value & 0xffff0000) {
  ------------------
  |  Branch (41:7): [True: 378, False: 90.7k]
  ------------------
   42|    378|    value >>= 16;
   43|    378|    n += 16;
   44|    378|  }
   45|  91.0k|  if (value & 0xff00) {
  ------------------
  |  Branch (45:7): [True: 61.1k, False: 29.9k]
  ------------------
   46|  61.1k|    value >>= 8;
   47|  61.1k|    n += 8;
   48|  61.1k|  }
   49|  91.0k|  if (value & 0xf0) {
  ------------------
  |  Branch (49:7): [True: 50.1k, False: 40.9k]
  ------------------
   50|  50.1k|    value >>= 4;
   51|  50.1k|    n += 4;
   52|  50.1k|  }
   53|  91.0k| return n + log2_tab_16[value];
   54|  91.0k|}
bitstream.c:bs_get_matrix:
   71|  4.51k|{
   72|  4.51k|	int i = 0;
   73|  4.51k|	int last, value = 0;
   74|       |
   75|  45.3k|	do {
   76|  45.3k|		last = value;
   77|  45.3k|		value = BitstreamGetBits(bs, 8);
   78|  45.3k|		matrix[scan_tables[0][i++]] = value;
   79|  45.3k|	}
   80|  45.3k|	while (value != 0 && i < 64);
  ------------------
  |  Branch (80:9): [True: 41.0k, False: 4.21k]
  |  Branch (80:23): [True: 40.7k, False: 304]
  ------------------
   81|       |
   82|  4.51k|	if (value != 0) return;
  ------------------
  |  Branch (82:6): [True: 304, False: 4.21k]
  ------------------
   83|       |
   84|  4.21k|	i--;
   85|   252k|	while (i < 64) {
  ------------------
  |  Branch (85:9): [True: 248k, False: 4.21k]
  ------------------
   86|   248k|		matrix[scan_tables[0][i++]] = last;
   87|   248k|	}
   88|  4.21k|}
bitstream.c:read_vol_complexity_estimation_header:
  225|  26.6k|{
  226|  26.6k|	ESTIMATION * e = &dec->estimation;
  227|       |
  228|  26.6k|	e->method = BitstreamGetBits(bs, 2);	/* estimation_method */
  229|  26.6k|	DPRINTF(XVID_DEBUG_HEADER,"+ complexity_estimation_header; method=%i\n", e->method);
  ------------------
  |  |  198|  26.6k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  230|       |
  231|  26.6k|	if (e->method == 0 || e->method == 1)
  ------------------
  |  Branch (231:6): [True: 18.0k, False: 8.66k]
  |  Branch (231:24): [True: 5.18k, False: 3.48k]
  ------------------
  232|  23.1k|	{
  233|  23.1k|		if (!BitstreamGetBit(bs))		/* shape_complexity_estimation_disable */
  ------------------
  |  Branch (233:7): [True: 20.1k, False: 3.00k]
  ------------------
  234|  20.1k|		{
  235|  20.1k|			e->opaque = BitstreamGetBit(bs);		/* opaque */
  236|  20.1k|			e->transparent = BitstreamGetBit(bs);		/* transparent */
  237|  20.1k|			e->intra_cae = BitstreamGetBit(bs);		/* intra_cae */
  238|  20.1k|			e->inter_cae = BitstreamGetBit(bs);		/* inter_cae */
  239|  20.1k|			e->no_update = BitstreamGetBit(bs);		/* no_update */
  240|  20.1k|			e->upsampling = BitstreamGetBit(bs);		/* upsampling */
  241|  20.1k|		}
  242|       |
  243|  23.1k|		if (!BitstreamGetBit(bs))	/* texture_complexity_estimation_set_1_disable */
  ------------------
  |  Branch (243:7): [True: 15.6k, False: 7.50k]
  ------------------
  244|  15.6k|		{
  245|  15.6k|			e->intra_blocks = BitstreamGetBit(bs);		/* intra_blocks */
  246|  15.6k|			e->inter_blocks = BitstreamGetBit(bs);		/* inter_blocks */
  247|  15.6k|			e->inter4v_blocks = BitstreamGetBit(bs);		/* inter4v_blocks */
  248|  15.6k|			e->not_coded_blocks = BitstreamGetBit(bs);		/* not_coded_blocks */
  249|  15.6k|		}
  250|  23.1k|	}
  251|       |
  252|  26.6k|	READ_MARKER();
  ------------------
  |  |   99|  26.6k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  253|       |
  254|  26.6k|	if (!BitstreamGetBit(bs))		/* texture_complexity_estimation_set_2_disable */
  ------------------
  |  Branch (254:6): [True: 15.9k, False: 10.7k]
  ------------------
  255|  15.9k|	{
  256|  15.9k|		e->dct_coefs = BitstreamGetBit(bs);		/* dct_coefs */
  257|  15.9k|		e->dct_lines = BitstreamGetBit(bs);		/* dct_lines */
  258|  15.9k|		e->vlc_symbols = BitstreamGetBit(bs);		/* vlc_symbols */
  259|  15.9k|		e->vlc_bits = BitstreamGetBit(bs);		/* vlc_bits */
  260|  15.9k|	}
  261|       |
  262|  26.6k|	if (!BitstreamGetBit(bs))		/* motion_compensation_complexity_disable */
  ------------------
  |  Branch (262:6): [True: 18.4k, False: 8.22k]
  ------------------
  263|  18.4k|	{
  264|  18.4k|		e->apm = BitstreamGetBit(bs);		/* apm */
  265|  18.4k|		e->npm = BitstreamGetBit(bs);		/* npm */
  266|  18.4k|		e->interpolate_mc_q = BitstreamGetBit(bs);		/* interpolate_mc_q */
  267|  18.4k|		e->forw_back_mc_q = BitstreamGetBit(bs);		/* forw_back_mc_q */
  268|  18.4k|		e->halfpel2 = BitstreamGetBit(bs);		/* halfpel2 */
  269|  18.4k|		e->halfpel4 = BitstreamGetBit(bs);		/* halfpel4 */
  270|  18.4k|	}
  271|       |
  272|  26.6k|	READ_MARKER();
  ------------------
  |  |   99|  26.6k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  273|       |
  274|  26.6k|	if (e->method == 1)
  ------------------
  |  Branch (274:6): [True: 5.18k, False: 21.4k]
  ------------------
  275|  5.18k|	{
  276|  5.18k|		if (!BitstreamGetBit(bs))	/* version2_complexity_estimation_disable */
  ------------------
  |  Branch (276:7): [True: 4.06k, False: 1.11k]
  ------------------
  277|  4.06k|		{
  278|  4.06k|			e->sadct = BitstreamGetBit(bs);		/* sadct */
  279|  4.06k|			e->quarterpel = BitstreamGetBit(bs);		/* quarterpel */
  280|  4.06k|		}
  281|  5.18k|	}
  282|  26.6k|}
bitstream.c:read_vop_complexity_estimation_header:
  288|  1.55M|{
  289|  1.55M|	ESTIMATION * e = &dec->estimation;
  290|       |
  291|  1.55M|	if (e->method == 0 || e->method == 1)
  ------------------
  |  Branch (291:6): [True: 1.05M, False: 500k]
  |  Branch (291:24): [True: 400k, False: 99.9k]
  ------------------
  292|  1.45M|	{
  293|  1.45M|		if (coding_type == I_VOP) {
  ------------------
  |  |  104|  1.45M|#define I_VOP	0
  ------------------
  |  Branch (293:7): [True: 17.7k, False: 1.43M]
  ------------------
  294|  17.7k|			if (e->opaque)		BitstreamSkip(bs, 8);	/* dcecs_opaque */
  ------------------
  |  Branch (294:8): [True: 4.09k, False: 13.6k]
  ------------------
  295|  17.7k|			if (e->transparent) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (295:8): [True: 7.61k, False: 10.1k]
  ------------------
  296|  17.7k|			if (e->intra_cae)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (296:8): [True: 3.25k, False: 14.4k]
  ------------------
  297|  17.7k|			if (e->inter_cae)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (297:8): [True: 7.07k, False: 10.6k]
  ------------------
  298|  17.7k|			if (e->no_update)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (298:8): [True: 4.22k, False: 13.4k]
  ------------------
  299|  17.7k|			if (e->upsampling)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (299:8): [True: 4.09k, False: 13.6k]
  ------------------
  300|  17.7k|			if (e->intra_blocks) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (300:8): [True: 2.96k, False: 14.7k]
  ------------------
  301|  17.7k|			if (e->not_coded_blocks) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (301:8): [True: 5.05k, False: 12.6k]
  ------------------
  302|  17.7k|			if (e->dct_coefs)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (302:8): [True: 3.38k, False: 14.3k]
  ------------------
  303|  17.7k|			if (e->dct_lines)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (303:8): [True: 4.00k, False: 13.7k]
  ------------------
  304|  17.7k|			if (e->vlc_symbols) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (304:8): [True: 6.61k, False: 11.1k]
  ------------------
  305|  17.7k|			if (e->vlc_bits)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (305:8): [True: 3.12k, False: 14.5k]
  ------------------
  306|  17.7k|			if (e->sadct)		BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (306:8): [True: 5.29k, False: 12.4k]
  ------------------
  307|  17.7k|		}
  308|       |
  309|  1.45M|		if (coding_type == P_VOP) {
  ------------------
  |  |  105|  1.45M|#define P_VOP	1
  ------------------
  |  Branch (309:7): [True: 36.3k, False: 1.41M]
  ------------------
  310|  36.3k|			if (e->opaque) BitstreamSkip(bs, 8);		/* */
  ------------------
  |  Branch (310:8): [True: 9.60k, False: 26.7k]
  ------------------
  311|  36.3k|			if (e->transparent) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (311:8): [True: 19.1k, False: 17.1k]
  ------------------
  312|  36.3k|			if (e->intra_cae)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (312:8): [True: 21.9k, False: 14.3k]
  ------------------
  313|  36.3k|			if (e->inter_cae)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (313:8): [True: 17.0k, False: 19.2k]
  ------------------
  314|  36.3k|			if (e->no_update)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (314:8): [True: 16.5k, False: 19.7k]
  ------------------
  315|  36.3k|			if (e->upsampling) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (315:8): [True: 7.65k, False: 28.6k]
  ------------------
  316|  36.3k|			if (e->intra_blocks) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (316:8): [True: 8.86k, False: 27.4k]
  ------------------
  317|  36.3k|			if (e->not_coded_blocks)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (317:8): [True: 6.74k, False: 29.6k]
  ------------------
  318|  36.3k|			if (e->dct_coefs)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (318:8): [True: 15.1k, False: 21.2k]
  ------------------
  319|  36.3k|			if (e->dct_lines)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (319:8): [True: 19.6k, False: 16.7k]
  ------------------
  320|  36.3k|			if (e->vlc_symbols) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (320:8): [True: 10.2k, False: 26.1k]
  ------------------
  321|  36.3k|			if (e->vlc_bits)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (321:8): [True: 11.6k, False: 24.6k]
  ------------------
  322|  36.3k|			if (e->inter_blocks) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (322:8): [True: 5.09k, False: 31.2k]
  ------------------
  323|  36.3k|			if (e->inter4v_blocks) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (323:8): [True: 9.83k, False: 26.5k]
  ------------------
  324|  36.3k|			if (e->apm)			BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (324:8): [True: 6.05k, False: 30.2k]
  ------------------
  325|  36.3k|			if (e->npm)			BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (325:8): [True: 7.11k, False: 29.2k]
  ------------------
  326|  36.3k|			if (e->forw_back_mc_q) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (326:8): [True: 8.89k, False: 27.4k]
  ------------------
  327|  36.3k|			if (e->halfpel2)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (327:8): [True: 5.37k, False: 30.9k]
  ------------------
  328|  36.3k|			if (e->halfpel4)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (328:8): [True: 7.71k, False: 28.6k]
  ------------------
  329|  36.3k|			if (e->sadct)		BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (329:8): [True: 16.2k, False: 20.0k]
  ------------------
  330|  36.3k|			if (e->quarterpel)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (330:8): [True: 17.2k, False: 19.0k]
  ------------------
  331|  36.3k|		}
  332|  1.45M|		if (coding_type == B_VOP) {
  ------------------
  |  |  106|  1.45M|#define B_VOP	2
  ------------------
  |  Branch (332:7): [True: 130k, False: 1.32M]
  ------------------
  333|   130k|			if (e->opaque)		BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (333:8): [True: 66.9k, False: 63.7k]
  ------------------
  334|   130k|			if (e->transparent)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (334:8): [True: 67.2k, False: 63.4k]
  ------------------
  335|   130k|			if (e->intra_cae)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (335:8): [True: 15.7k, False: 115k]
  ------------------
  336|   130k|			if (e->inter_cae)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (336:8): [True: 70.9k, False: 59.8k]
  ------------------
  337|   130k|			if (e->no_update)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (337:8): [True: 23.9k, False: 106k]
  ------------------
  338|   130k|			if (e->upsampling)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (338:8): [True: 15.0k, False: 115k]
  ------------------
  339|   130k|			if (e->intra_blocks) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (339:8): [True: 27.3k, False: 103k]
  ------------------
  340|   130k|			if (e->not_coded_blocks) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (340:8): [True: 9.77k, False: 120k]
  ------------------
  341|   130k|			if (e->dct_coefs)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (341:8): [True: 16.0k, False: 114k]
  ------------------
  342|   130k|			if (e->dct_lines)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (342:8): [True: 15.3k, False: 115k]
  ------------------
  343|   130k|			if (e->vlc_symbols)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (343:8): [True: 52.4k, False: 78.2k]
  ------------------
  344|   130k|			if (e->vlc_bits)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (344:8): [True: 11.6k, False: 119k]
  ------------------
  345|   130k|			if (e->inter_blocks) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (345:8): [True: 13.3k, False: 117k]
  ------------------
  346|   130k|			if (e->inter4v_blocks) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (346:8): [True: 17.4k, False: 113k]
  ------------------
  347|   130k|			if (e->apm)			BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (347:8): [True: 81.1k, False: 49.5k]
  ------------------
  348|   130k|			if (e->npm)			BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (348:8): [True: 84.7k, False: 45.9k]
  ------------------
  349|   130k|			if (e->forw_back_mc_q) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (349:8): [True: 21.7k, False: 108k]
  ------------------
  350|   130k|			if (e->halfpel2)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (350:8): [True: 26.5k, False: 104k]
  ------------------
  351|   130k|			if (e->halfpel4)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (351:8): [True: 42.5k, False: 88.1k]
  ------------------
  352|   130k|			if (e->interpolate_mc_q) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (352:8): [True: 78.9k, False: 51.8k]
  ------------------
  353|   130k|			if (e->sadct)		BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (353:8): [True: 59.6k, False: 71.0k]
  ------------------
  354|   130k|			if (e->quarterpel)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (354:8): [True: 53.1k, False: 77.6k]
  ------------------
  355|   130k|		}
  356|       |
  357|  1.45M|		if (coding_type == S_VOP && dec->sprite_enable == SPRITE_STATIC) {
  ------------------
  |  |  107|  2.90M|#define S_VOP	3
  ------------------
              		if (coding_type == S_VOP && dec->sprite_enable == SPRITE_STATIC) {
  ------------------
  |  |   94|  1.26M|#define SPRITE_STATIC	1
  ------------------
  |  Branch (357:7): [True: 1.26M, False: 184k]
  |  Branch (357:31): [True: 527k, False: 742k]
  ------------------
  358|   527k|			if (e->intra_blocks) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (358:8): [True: 82.6k, False: 444k]
  ------------------
  359|   527k|			if (e->not_coded_blocks) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (359:8): [True: 270k, False: 256k]
  ------------------
  360|   527k|			if (e->dct_coefs)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (360:8): [True: 50.6k, False: 476k]
  ------------------
  361|   527k|			if (e->dct_lines)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (361:8): [True: 27.2k, False: 499k]
  ------------------
  362|   527k|			if (e->vlc_symbols)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (362:8): [True: 195k, False: 331k]
  ------------------
  363|   527k|			if (e->vlc_bits)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (363:8): [True: 39.8k, False: 487k]
  ------------------
  364|   527k|			if (e->inter_blocks) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (364:8): [True: 57.5k, False: 469k]
  ------------------
  365|   527k|			if (e->inter4v_blocks)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (365:8): [True: 294k, False: 232k]
  ------------------
  366|   527k|			if (e->apm)			BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (366:8): [True: 195k, False: 331k]
  ------------------
  367|   527k|			if (e->npm)			BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (367:8): [True: 461k, False: 65.1k]
  ------------------
  368|   527k|			if (e->forw_back_mc_q)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (368:8): [True: 308k, False: 218k]
  ------------------
  369|   527k|			if (e->halfpel2)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (369:8): [True: 328k, False: 198k]
  ------------------
  370|   527k|			if (e->halfpel4)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (370:8): [True: 295k, False: 231k]
  ------------------
  371|   527k|			if (e->interpolate_mc_q) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (371:8): [True: 449k, False: 77.3k]
  ------------------
  372|   527k|		}
  373|  1.45M|	}
  374|  1.55M|}

mbcoding.c:BitstreamShowBits:
  254|   620M|{
  255|   620M|	int nbit = (bits + bs->pos) - 32;
  256|       |
  257|   620M|	if (nbit > 0) {
  ------------------
  |  Branch (257:6): [True: 252M, False: 367M]
  ------------------
  258|   252M|		return ((bs->bufa & (0xffffffff >> bs->pos)) << nbit) | (bs->
  259|   252M|																 bufb >> (32 -
  260|   252M|																		  nbit));
  261|   367M|	} else {
  262|   367M|		return (bs->bufa & (0xffffffff >> bs->pos)) >> (32 - bs->pos - bits);
  263|   367M|	}
  264|   620M|}
mbcoding.c:BitstreamSkip:
  272|   251M|{
  273|   251M|	bs->pos += bits;
  274|       |
  275|   251M|	if (bs->pos >= 32) {
  ------------------
  |  Branch (275:6): [True: 58.5M, False: 192M]
  ------------------
  276|  58.5M|		uint32_t tmp;
  277|       |
  278|  58.5M|		bs->bufa = bs->bufb;
  279|  58.5M|#if defined(XVID_SAFE_BS_TAIL)
  280|  58.5M|		if (bs->tail<(bs->start+((bs->length+3)>>2)))
  ------------------
  |  Branch (280:7): [True: 6.53M, False: 52.0M]
  ------------------
  281|  6.53M|#endif
  282|  6.53M|		{
  283|  6.53M|			tmp = *((uint32_t *) bs->tail + 2);
  284|  6.53M|#ifndef ARCH_IS_BIG_ENDIAN
  285|  6.53M|			BSWAP(tmp);
  ------------------
  |  |  365|  6.53M|	((a) = (((a) & 0xff) << 24)  | (((a) & 0xff00) << 8) | \
  |  |  366|  6.53M|	 (((a) >> 8) & 0xff00) | (((a) >> 24) & 0xff))
  ------------------
  286|  6.53M|#endif
  287|  6.53M|			bs->bufb = tmp;
  288|  6.53M|			bs->tail++;
  289|  6.53M|		}
  290|  52.0M|#if defined(XVID_SAFE_BS_TAIL)
  291|  52.0M|		else {
  292|  52.0M|			bs->bufb = 0;
  293|  52.0M|		}
  294|  58.5M|#endif
  295|  58.5M|		bs->pos -= 32;
  296|  58.5M|	}
  297|   251M|}
mbcoding.c:BitstreamNumBitsToByteAlign:
  303|  51.3M|{
  304|  51.3M|	uint32_t n = (32 - bs->pos) % 8;
  305|  51.3M|	return n == 0 ? 8 : n;
  ------------------
  |  Branch (305:9): [True: 5.87M, False: 45.5M]
  ------------------
  306|  51.3M|}
mbcoding.c:BitstreamShowBitsFromByteAlign:
  312|  5.24M|{
  313|  5.24M|	int bspos = bs->pos + BitstreamNumBitsToByteAlign(bs);
  314|  5.24M|	int nbit = (bits + bspos) - 32;
  315|       |
  316|  5.24M|	if (bspos >= 32) {
  ------------------
  |  Branch (316:6): [True: 975k, False: 4.26M]
  ------------------
  317|   975k|		return bs->bufb >> (32 - nbit);
  318|  4.26M|	} else	if (nbit > 0) {
  ------------------
  |  Branch (318:13): [True: 1.82M, False: 2.43M]
  ------------------
  319|  1.82M|		return ((bs->bufa & (0xffffffff >> bspos)) << nbit) | (bs->
  320|  1.82M|																 bufb >> (32 -
  321|  1.82M|																		  nbit));
  322|  2.43M|	} else {
  323|  2.43M|		return (bs->bufa & (0xffffffff >> bspos)) >> (32 - bspos - bits);
  324|  2.43M|	}
  325|       |
  326|  5.24M|}
mbcoding.c:BitstreamGetBit:
  418|  1.61M|{
  419|  1.61M|	return BitstreamGetBits(bs, 1);
  420|  1.61M|}
mbcoding.c:BitstreamGetBits:
  406|  83.1M|{
  407|  83.1M|	uint32_t ret = BitstreamShowBits(bs, n);
  408|       |
  409|  83.1M|	BitstreamSkip(bs, n);
  410|  83.1M|	return ret;
  411|  83.1M|}
decoder.c:BitstreamInit:
  184|  72.0k|{
  185|  72.0k|	uint32_t tmp;
  186|  72.0k|	size_t bitpos;
  187|  72.0k|	ptr_t adjbitstream = (ptr_t)bitstream;
  ------------------
  |  |  132|  72.0k|#    define ptr_t uint64_t
  ------------------
  188|       |
  189|       |	/*
  190|       |	 * Start the stream on a uint32_t boundary, by rounding down to the
  191|       |	 * previous uint32_t and skipping the intervening bytes.
  192|       |	 */
  193|  72.0k|	bitpos = ((sizeof(uint32_t)-1) & (size_t)bitstream);
  194|  72.0k|	adjbitstream = adjbitstream - bitpos;
  195|  72.0k|	bs->start = bs->tail = (uint32_t *) adjbitstream;
  196|       |
  197|  72.0k|	tmp = *bs->start;
  198|  72.0k|#ifndef ARCH_IS_BIG_ENDIAN
  199|  72.0k|	BSWAP(tmp);
  ------------------
  |  |  365|  72.0k|	((a) = (((a) & 0xff) << 24)  | (((a) & 0xff00) << 8) | \
  |  |  366|  72.0k|	 (((a) >> 8) & 0xff00) | (((a) >> 24) & 0xff))
  ------------------
  200|  72.0k|#endif
  201|  72.0k|	bs->bufa = tmp;
  202|       |
  203|  72.0k|	tmp = *(bs->start + 1);
  204|  72.0k|#ifndef ARCH_IS_BIG_ENDIAN
  205|  72.0k|	BSWAP(tmp);
  ------------------
  |  |  365|  72.0k|	((a) = (((a) & 0xff) << 24)  | (((a) & 0xff00) << 8) | \
  |  |  366|  72.0k|	 (((a) >> 8) & 0xff00) | (((a) >> 24) & 0xff))
  ------------------
  206|  72.0k|#endif
  207|  72.0k|	bs->bufb = tmp;
  208|       |
  209|  72.0k|	bs->pos = bs->initpos = (uint32_t) bitpos*8;
  210|       |	/* preserve the intervening bytes */
  211|  72.0k|	if (bs->initpos > 0)
  ------------------
  |  Branch (211:6): [True: 45.2k, False: 26.8k]
  ------------------
  212|  45.2k|		bs->buf = bs->bufa & (0xffffffff << (32 - bs->initpos));
  213|  26.8k|	else
  214|  26.8k|		bs->buf = 0;
  215|  72.0k|	bs->length = length;
  216|  72.0k|}
decoder.c:BitstreamShowBits:
  254|   189M|{
  255|   189M|	int nbit = (bits + bs->pos) - 32;
  256|       |
  257|   189M|	if (nbit > 0) {
  ------------------
  |  Branch (257:6): [True: 43.2M, False: 146M]
  ------------------
  258|  43.2M|		return ((bs->bufa & (0xffffffff >> bs->pos)) << nbit) | (bs->
  259|  43.2M|																 bufb >> (32 -
  260|  43.2M|																		  nbit));
  261|   146M|	} else {
  262|   146M|		return (bs->bufa & (0xffffffff >> bs->pos)) >> (32 - bs->pos - bits);
  263|   146M|	}
  264|   189M|}
decoder.c:BitstreamPos:
  347|  72.0k|{
  348|  72.0k|	return((uint32_t)(8*((ptr_t)bs->tail - (ptr_t)bs->start) + bs->pos - bs->initpos));
  349|  72.0k|}
decoder.c:BitstreamSkip:
  272|   107M|{
  273|   107M|	bs->pos += bits;
  274|       |
  275|   107M|	if (bs->pos >= 32) {
  ------------------
  |  Branch (275:6): [True: 33.1M, False: 74.0M]
  ------------------
  276|  33.1M|		uint32_t tmp;
  277|       |
  278|  33.1M|		bs->bufa = bs->bufb;
  279|  33.1M|#if defined(XVID_SAFE_BS_TAIL)
  280|  33.1M|		if (bs->tail<(bs->start+((bs->length+3)>>2)))
  ------------------
  |  Branch (280:7): [True: 3.68M, False: 29.5M]
  ------------------
  281|  3.68M|#endif
  282|  3.68M|		{
  283|  3.68M|			tmp = *((uint32_t *) bs->tail + 2);
  284|  3.68M|#ifndef ARCH_IS_BIG_ENDIAN
  285|  3.68M|			BSWAP(tmp);
  ------------------
  |  |  365|  3.68M|	((a) = (((a) & 0xff) << 24)  | (((a) & 0xff00) << 8) | \
  |  |  366|  3.68M|	 (((a) >> 8) & 0xff00) | (((a) >> 24) & 0xff))
  ------------------
  286|  3.68M|#endif
  287|  3.68M|			bs->bufb = tmp;
  288|  3.68M|			bs->tail++;
  289|  3.68M|		}
  290|  29.5M|#if defined(XVID_SAFE_BS_TAIL)
  291|  29.5M|		else {
  292|  29.5M|			bs->bufb = 0;
  293|  29.5M|		}
  294|  33.1M|#endif
  295|  33.1M|		bs->pos -= 32;
  296|  33.1M|	}
  297|   107M|}
decoder.c:BitstreamGetBit:
  418|   102M|{
  419|   102M|	return BitstreamGetBits(bs, 1);
  420|   102M|}
decoder.c:BitstreamGetBits:
  406|   107M|{
  407|   107M|	uint32_t ret = BitstreamShowBits(bs, n);
  408|       |
  409|   107M|	BitstreamSkip(bs, n);
  410|   107M|	return ret;
  411|   107M|}
bitstream.c:BitstreamSkip:
  272|  83.5M|{
  273|  83.5M|	bs->pos += bits;
  274|       |
  275|  83.5M|	if (bs->pos >= 32) {
  ------------------
  |  Branch (275:6): [True: 38.3M, False: 45.2M]
  ------------------
  276|  38.3M|		uint32_t tmp;
  277|       |
  278|  38.3M|		bs->bufa = bs->bufb;
  279|  38.3M|#if defined(XVID_SAFE_BS_TAIL)
  280|  38.3M|		if (bs->tail<(bs->start+((bs->length+3)>>2)))
  ------------------
  |  Branch (280:7): [True: 36.6M, False: 1.63M]
  ------------------
  281|  36.6M|#endif
  282|  36.6M|		{
  283|  36.6M|			tmp = *((uint32_t *) bs->tail + 2);
  284|  36.6M|#ifndef ARCH_IS_BIG_ENDIAN
  285|  36.6M|			BSWAP(tmp);
  ------------------
  |  |  365|  36.6M|	((a) = (((a) & 0xff) << 24)  | (((a) & 0xff00) << 8) | \
  |  |  366|  36.6M|	 (((a) >> 8) & 0xff00) | (((a) >> 24) & 0xff))
  ------------------
  286|  36.6M|#endif
  287|  36.6M|			bs->bufb = tmp;
  288|  36.6M|			bs->tail++;
  289|  36.6M|		}
  290|  1.63M|#if defined(XVID_SAFE_BS_TAIL)
  291|  1.63M|		else {
  292|  1.63M|			bs->bufb = 0;
  293|  1.63M|		}
  294|  38.3M|#endif
  295|  38.3M|		bs->pos -= 32;
  296|  38.3M|	}
  297|  83.5M|}
bitstream.c:BitstreamNumBitsToByteAlign:
  303|  31.5k|{
  304|  31.5k|	uint32_t n = (32 - bs->pos) % 8;
  305|  31.5k|	return n == 0 ? 8 : n;
  ------------------
  |  Branch (305:9): [True: 234, False: 31.3k]
  ------------------
  306|  31.5k|}
bitstream.c:BitstreamGetBit:
  418|  20.6M|{
  419|  20.6M|	return BitstreamGetBits(bs, 1);
  420|  20.6M|}
bitstream.c:BitstreamGetBits:
  406|  37.9M|{
  407|  37.9M|	uint32_t ret = BitstreamShowBits(bs, n);
  408|       |
  409|  37.9M|	BitstreamSkip(bs, n);
  410|  37.9M|	return ret;
  411|  37.9M|}
bitstream.c:BitstreamPos:
  347|  24.3M|{
  348|  24.3M|	return((uint32_t)(8*((ptr_t)bs->tail - (ptr_t)bs->start) + bs->pos - bs->initpos));
  349|  24.3M|}
bitstream.c:BitstreamByteAlign:
  334|  24.3M|{
  335|  24.3M|	uint32_t remainder = bs->pos % 8;
  336|       |
  337|  24.3M|	if (remainder) {
  ------------------
  |  Branch (337:6): [True: 1.88M, False: 22.4M]
  ------------------
  338|  1.88M|		BitstreamSkip(bs, 8 - remainder);
  339|  1.88M|	}
  340|  24.3M|}
bitstream.c:BitstreamShowBits:
  254|  84.9M|{
  255|  84.9M|	int nbit = (bits + bs->pos) - 32;
  256|       |
  257|  84.9M|	if (nbit > 0) {
  ------------------
  |  Branch (257:6): [True: 15.5M, False: 69.3M]
  ------------------
  258|  15.5M|		return ((bs->bufa & (0xffffffff >> bs->pos)) << nbit) | (bs->
  259|  15.5M|																 bufb >> (32 -
  260|  15.5M|																		  nbit));
  261|  69.3M|	} else {
  262|  69.3M|		return (bs->bufa & (0xffffffff >> bs->pos)) >> (32 - bs->pos - bits);
  263|  69.3M|	}
  264|  84.9M|}

bs_get_spritetrajectory:
   71|  3.96M|{
   72|  3.96M|	int i;
   73|  28.2M|	for (i = 0; i < 12; i++)
  ------------------
  |  Branch (73:14): [True: 26.7M, False: 1.51M]
  ------------------
   74|  26.7M|	{
   75|  26.7M|		if (BitstreamShowBits(bs, sprite_trajectory_len[i].len) == sprite_trajectory_len[i].code)
  ------------------
  |  Branch (75:7): [True: 2.44M, False: 24.3M]
  ------------------
   76|  2.44M|		{
   77|  2.44M|			BitstreamSkip(bs, sprite_trajectory_len[i].len);
   78|  2.44M|			return i;
   79|  2.44M|		}
   80|  26.7M|	}
   81|  1.51M|	return -1;
   82|  3.96M|}
init_vlc_tables:
   86|      2|{
   87|      2|	uint32_t i, j, k, intra, last, run,  run_esc, level, level_esc, escape, escape_len, offset;
   88|      2|	int32_t l;
   89|       |
   90|      6|	for (intra = 0; intra < 2; intra++)
  ------------------
  |  Branch (90:18): [True: 4, False: 2]
  ------------------
   91|  16.3k|		for (i = 0; i < 4096; i++)
  ------------------
  |  Branch (91:15): [True: 16.3k, False: 4]
  ------------------
   92|  16.3k|			DCT3D[intra][i].event.level = 0;
   93|       |
   94|      6|	for (intra = 0; intra < 2; intra++) {
  ------------------
  |  Branch (94:18): [True: 4, False: 2]
  ------------------
   95|     12|		for (last = 0; last < 2; last++) {
  ------------------
  |  Branch (95:18): [True: 8, False: 4]
  ------------------
   96|    516|			for (run = 0; run < 63 + last; run++) {
  ------------------
  |  Branch (96:18): [True: 508, False: 8]
  ------------------
   97|  24.8k|				for (level = 0; level < (uint32_t)(32 << intra); level++) {
  ------------------
  |  Branch (97:21): [True: 24.3k, False: 508]
  ------------------
   98|  24.3k|					offset = !intra * LEVELOFFSET;
  ------------------
  |  |   46|  24.3k|#define LEVELOFFSET 32
  ------------------
   99|  24.3k|					coeff_VLC[intra][last][level + offset][run].len = 128;
  100|  24.3k|				}
  101|    508|			}
  102|      8|		}
  103|      4|	}
  104|       |
  105|      6|	for (intra = 0; intra < 2; intra++) {
  ------------------
  |  Branch (105:18): [True: 4, False: 2]
  ------------------
  106|    412|		for (i = 0; i < 102; i++) {
  ------------------
  |  Branch (106:15): [True: 408, False: 4]
  ------------------
  107|    408|			offset = !intra * LEVELOFFSET;
  ------------------
  |  |   46|    408|#define LEVELOFFSET 32
  ------------------
  108|       |
  109|  16.6k|			for (j = 0; j < (uint32_t)(1 << (12 - coeff_tab[intra][i].vlc.len)); j++) {
  ------------------
  |  Branch (109:16): [True: 16.2k, False: 408]
  ------------------
  110|  16.2k|				DCT3D[intra][(coeff_tab[intra][i].vlc.code << (12 - coeff_tab[intra][i].vlc.len)) | j].len	 = coeff_tab[intra][i].vlc.len;
  111|  16.2k|				DCT3D[intra][(coeff_tab[intra][i].vlc.code << (12 - coeff_tab[intra][i].vlc.len)) | j].event = coeff_tab[intra][i].event;
  112|  16.2k|			}
  113|       |
  114|    408|			coeff_VLC[intra][coeff_tab[intra][i].event.last][coeff_tab[intra][i].event.level + offset][coeff_tab[intra][i].event.run].code
  115|    408|				= coeff_tab[intra][i].vlc.code << 1;
  116|    408|			coeff_VLC[intra][coeff_tab[intra][i].event.last][coeff_tab[intra][i].event.level + offset][coeff_tab[intra][i].event.run].len
  117|    408|				= coeff_tab[intra][i].vlc.len + 1;
  118|       |
  119|    408|			if (!intra) {
  ------------------
  |  Branch (119:8): [True: 204, False: 204]
  ------------------
  120|    204|				coeff_VLC[intra][coeff_tab[intra][i].event.last][offset - coeff_tab[intra][i].event.level][coeff_tab[intra][i].event.run].code
  121|    204|					= (coeff_tab[intra][i].vlc.code << 1) | 1;
  122|    204|				coeff_VLC[intra][coeff_tab[intra][i].event.last][offset - coeff_tab[intra][i].event.level][coeff_tab[intra][i].event.run].len
  123|    204|					= coeff_tab[intra][i].vlc.len + 1;
  124|    204|			}
  125|    408|		}
  126|      4|	}
  127|       |
  128|      6|	for (intra = 0; intra < 2; intra++) {
  ------------------
  |  Branch (128:18): [True: 4, False: 2]
  ------------------
  129|     12|		for (last = 0; last < 2; last++) {
  ------------------
  |  Branch (129:18): [True: 8, False: 4]
  ------------------
  130|    516|			for (run = 0; run < 63 + last; run++) {
  ------------------
  |  Branch (130:18): [True: 508, False: 8]
  ------------------
  131|  24.3k|				for (level = 1; level < (uint32_t)(32 << intra); level++) {
  ------------------
  |  Branch (131:21): [True: 23.8k, False: 508]
  ------------------
  132|       |
  133|  23.8k|					if (level <= max_level[intra][last][run] && run <= max_run[intra][last][level])
  ------------------
  |  Branch (133:10): [True: 408, False: 23.4k]
  |  Branch (133:50): [True: 408, False: 0]
  ------------------
  134|    408|					    continue;
  135|       |
  136|  23.4k|					offset = !intra * LEVELOFFSET;
  ------------------
  |  |   46|  23.4k|#define LEVELOFFSET 32
  ------------------
  137|  23.4k|                    level_esc = level - max_level[intra][last][run];
  138|  23.4k|					run_esc = run - 1 - max_run[intra][last][level];
  139|       |
  140|  23.4k|					if (level_esc <= max_level[intra][last][run] && run <= max_run[intra][last][level_esc]) {
  ------------------
  |  Branch (140:10): [True: 408, False: 23.0k]
  |  Branch (140:54): [True: 408, False: 0]
  ------------------
  141|    408|						escape     = ESCAPE1;
  ------------------
  |  |   33|    408|#define ESCAPE1 6
  ------------------
  142|    408|						escape_len = 7 + 1;
  143|    408|						run_esc    = run;
  144|  23.0k|					} else {
  145|  23.0k|						if (run_esc <= max_run[intra][last][level] && level <= max_level[intra][last][run_esc]) {
  ------------------
  |  Branch (145:11): [True: 496, False: 22.5k]
  |  Branch (145:53): [True: 222, False: 274]
  ------------------
  146|    222|							escape     = ESCAPE2;
  ------------------
  |  |   34|    222|#define ESCAPE2 14
  ------------------
  147|    222|							escape_len = 7 + 2;
  148|    222|							level_esc  = level;
  149|  22.8k|						} else {
  150|  22.8k|							if (!intra) {
  ------------------
  |  Branch (150:12): [True: 7.36k, False: 15.4k]
  ------------------
  151|  7.36k|								coeff_VLC[intra][last][level + offset][run].code
  152|  7.36k|									= (ESCAPE3 << 21) | (last << 20) | (run << 14) | (1 << 13) | ((level & 0xfff) << 1) | 1;
  ------------------
  |  |   35|  7.36k|#define ESCAPE3 15
  ------------------
  153|  7.36k|								coeff_VLC[intra][last][level + offset][run].len = 30;
  154|  7.36k|									coeff_VLC[intra][last][offset - level][run].code
  155|  7.36k|									= (ESCAPE3 << 21) | (last << 20) | (run << 14) | (1 << 13) | ((-(int32_t)level & 0xfff) << 1) | 1;
  ------------------
  |  |   35|  7.36k|#define ESCAPE3 15
  ------------------
  156|  7.36k|								coeff_VLC[intra][last][offset - level][run].len = 30;
  157|  7.36k|							}
  158|  22.8k|							continue;
  159|  22.8k|						}
  160|  23.0k|					}
  161|       |
  162|    630|					coeff_VLC[intra][last][level + offset][run].code
  163|    630|						= (escape << coeff_VLC[intra][last][level_esc + offset][run_esc].len)
  164|    630|						|  coeff_VLC[intra][last][level_esc + offset][run_esc].code;
  165|    630|					coeff_VLC[intra][last][level + offset][run].len
  166|    630|						= coeff_VLC[intra][last][level_esc + offset][run_esc].len + escape_len;
  167|       |
  168|    630|					if (!intra) {
  ------------------
  |  Branch (168:10): [True: 310, False: 320]
  ------------------
  169|    310|						coeff_VLC[intra][last][offset - level][run].code
  170|    310|							= (escape << coeff_VLC[intra][last][level_esc + offset][run_esc].len)
  171|    310|							|  coeff_VLC[intra][last][level_esc + offset][run_esc].code | 1;
  172|    310|						coeff_VLC[intra][last][offset - level][run].len
  173|    310|							= coeff_VLC[intra][last][level_esc + offset][run_esc].len + escape_len;
  174|    310|					}
  175|    630|				}
  176|       |
  177|    508|				if (!intra) {
  ------------------
  |  Branch (177:9): [True: 254, False: 254]
  ------------------
  178|    254|					coeff_VLC[intra][last][0][run].code
  179|    254|						= (ESCAPE3 << 21) | (last << 20) | (run << 14) | (1 << 13) | ((-32 & 0xfff) << 1) | 1;
  ------------------
  |  |   35|    254|#define ESCAPE3 15
  ------------------
  180|    254|					coeff_VLC[intra][last][0][run].len = 30;
  181|    254|				}
  182|    508|			}
  183|      8|		}
  184|      4|	}
  185|       |
  186|       |	/* init sprite_trajectory tables
  187|       |	 * even if GMC is not specified (it might be used later...) */
  188|       |
  189|      2|	sprite_trajectory_code[0+16384].code = 0;
  190|      2|	sprite_trajectory_code[0+16384].len = 0;
  191|     30|	for (k=0;k<14;k++) {
  ------------------
  |  Branch (191:11): [True: 28, False: 2]
  ------------------
  192|     28|		int limit = (1<<k);
  193|       |
  194|  32.7k|		for (l=-(2*limit-1); l <= -limit; l++) {
  ------------------
  |  Branch (194:24): [True: 32.7k, False: 28]
  ------------------
  195|  32.7k|			sprite_trajectory_code[l+16384].code = (2*limit-1)+l;
  196|  32.7k|			sprite_trajectory_code[l+16384].len = k+1;
  197|  32.7k|		}
  198|       |
  199|  32.7k|		for (l=limit; l<= 2*limit-1; l++) {
  ------------------
  |  Branch (199:17): [True: 32.7k, False: 28]
  ------------------
  200|  32.7k|			sprite_trajectory_code[l+16384].code = l;
  201|  32.7k|			sprite_trajectory_code[l+16384].len = k+1;
  202|  32.7k|		}
  203|     28|	}
  204|      2|}
check_resync_marker:
  814|  46.1M|{
  815|  46.1M|	uint32_t nbits;
  816|  46.1M|	uint32_t code;
  817|  46.1M|	uint32_t nbitsresyncmarker = NUMBITS_VP_RESYNC_MARKER + addbits;
  ------------------
  |  |  111|  46.1M|#define NUMBITS_VP_RESYNC_MARKER  17
  ------------------
  818|       |
  819|  46.1M|	nbits = BitstreamNumBitsToByteAlign(bs);
  820|  46.1M|	code = BitstreamShowBits(bs, nbits);
  821|       |
  822|  46.1M|	if (code == (((uint32_t)1 << (nbits - 1)) - 1))
  ------------------
  |  Branch (822:6): [True: 5.24M, False: 40.9M]
  ------------------
  823|  5.24M|	{
  824|  5.24M|		return BitstreamShowBitsFromByteAlign(bs, nbitsresyncmarker) == RESYNC_MARKER;
  ------------------
  |  |  112|  5.24M|#define RESYNC_MARKER 1
  ------------------
  825|  5.24M|	}
  826|       |
  827|  40.9M|	return 0;
  828|  46.1M|}
get_mcbpc_intra:
  834|  23.9M|{
  835|       |
  836|  23.9M|	uint32_t index;
  837|       |
  838|  23.9M|	index = BitstreamShowBits(bs, 9);
  839|  23.9M|	index >>= 3;
  840|       |
  841|  23.9M|	BitstreamSkip(bs, mcbpc_intra_table[index].len);
  842|       |
  843|  23.9M|	return mcbpc_intra_table[index].code;
  844|       |
  845|  23.9M|}
get_mcbpc_inter:
  849|  16.5M|{
  850|       |
  851|  16.5M|	uint32_t index;
  852|       |
  853|  16.5M|	index = MIN(BitstreamShowBits(bs, 9), 256);
  ------------------
  |  |  255|  16.5M|#define MIN(X, Y) ((X)<(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (255:20): [True: 16.3M, False: 177k]
  |  |  ------------------
  ------------------
  854|       |
  855|  16.5M|	BitstreamSkip(bs, mcbpc_inter_table[index].len);
  856|       |
  857|  16.5M|	return mcbpc_inter_table[index].code;
  858|       |
  859|  16.5M|}
get_cbpy:
  864|  40.5M|{
  865|       |
  866|  40.5M|	int cbpy;
  867|  40.5M|	uint32_t index = BitstreamShowBits(bs, 6);
  868|       |
  869|  40.5M|	BitstreamSkip(bs, cbpy_table[index].len);
  870|  40.5M|	cbpy = cbpy_table[index].code;
  871|       |
  872|  40.5M|	if (!intra)
  ------------------
  |  Branch (872:6): [True: 16.4M, False: 24.0M]
  ------------------
  873|  16.4M|		cbpy = 15 - cbpy;
  874|       |
  875|  40.5M|	return cbpy;
  876|       |
  877|  40.5M|}
get_mv:
  912|  1.61M|{
  913|       |
  914|  1.61M|	int data;
  915|  1.61M|	int res;
  916|  1.61M|	int mv;
  917|  1.61M|	int scale_fac = 1 << (fcode - 1);
  918|       |
  919|  1.61M|	data = get_mv_data(bs);
  920|       |
  921|  1.61M|	if (scale_fac == 1 || data == 0)
  ------------------
  |  Branch (921:6): [True: 371k, False: 1.24M]
  |  Branch (921:24): [True: 891k, False: 350k]
  ------------------
  922|  1.26M|		return data;
  923|       |
  924|   350k|	res = BitstreamGetBits(bs, fcode - 1);
  925|   350k|	mv = ((abs(data) - 1) * scale_fac) + res + 1;
  926|       |
  927|   350k|	return data < 0 ? -mv : mv;
  ------------------
  |  Branch (927:9): [True: 163k, False: 186k]
  ------------------
  928|       |
  929|  1.61M|}
get_dc_dif:
  934|  40.7M|{
  935|       |
  936|  40.7M|	int code = BitstreamGetBits(bs, dc_size);
  937|  40.7M|	int msb = code >> (dc_size - 1);
  938|       |
  939|  40.7M|	if (msb == 0)
  ------------------
  |  Branch (939:6): [True: 40.4M, False: 254k]
  ------------------
  940|  40.4M|		return (-1 * (code ^ ((1 << dc_size) - 1)));
  941|       |
  942|   254k|	return code;
  943|       |
  944|  40.7M|}
get_dc_size_lum:
  948|  81.0M|{
  949|       |
  950|  81.0M|	int code, i;
  951|       |
  952|  81.0M|	code = BitstreamShowBits(bs, 11);
  953|       |
  954|   728M|	for (i = 11; i > 3; i--) {
  ------------------
  |  Branch (954:15): [True: 647M, False: 80.9M]
  ------------------
  955|   647M|		if (code == 1) {
  ------------------
  |  Branch (955:7): [True: 88.7k, False: 647M]
  ------------------
  956|  88.7k|			BitstreamSkip(bs, i);
  957|  88.7k|			return i + 1;
  958|  88.7k|		}
  959|   647M|		code >>= 1;
  960|   647M|	}
  961|       |
  962|  80.9M|	BitstreamSkip(bs, dc_lum_tab[code].len);
  963|  80.9M|	return dc_lum_tab[code].code;
  964|       |
  965|  81.0M|}
get_dc_size_chrom:
  970|  40.5M|{
  971|       |
  972|  40.5M|	uint32_t code, i;
  973|       |
  974|  40.5M|	code = BitstreamShowBits(bs, 12);
  975|       |
  976|   445M|	for (i = 12; i > 2; i--) {
  ------------------
  |  Branch (976:15): [True: 404M, False: 40.4M]
  ------------------
  977|   404M|		if (code == 1) {
  ------------------
  |  Branch (977:7): [True: 54.9k, False: 404M]
  ------------------
  978|  54.9k|			BitstreamSkip(bs, i);
  979|  54.9k|			return i;
  980|  54.9k|		}
  981|   404M|		code >>= 1;
  982|   404M|	}
  983|       |
  984|  40.4M|	return 3 - BitstreamGetBits(bs, 2);
  985|       |
  986|  40.5M|}
get_intra_block:
 1087|   241M|{
 1088|       |
 1089|   241M|	const uint16_t *scan = scan_tables[direction];
 1090|   241M|	int level, run, last = 0;
 1091|       |
 1092|   243M|	do {
 1093|   243M|		level = get_coeff(bs, &run, &last, 1, 0);
 1094|   243M|		coeff += run;
 1095|   243M|		if (coeff & ~63) {
  ------------------
  |  Branch (1095:7): [True: 240M, False: 2.23M]
  ------------------
 1096|   240M|			DPRINTF(XVID_DEBUG_ERROR,"fatal: invalid run or index");
  ------------------
  |  |  196|   240M|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
 1097|   240M|			break;
 1098|   240M|		}
 1099|       |
 1100|  2.23M|		block[scan[coeff]] = level;
 1101|       |
 1102|  2.23M|		DPRINTF(XVID_DEBUG_COEFF,"block[%i] %i\n", scan[coeff], level);
  ------------------
  |  |  201|  2.23M|#define XVID_DEBUG_COEFF     (1<< 5)
  ------------------
 1103|       |#if 0
 1104|       |		DPRINTF(XVID_DEBUG_COEFF,"block[%i] %i %08x\n", scan[coeff], level, BitstreamShowBits(bs, 32));
 1105|       |#endif
 1106|       |
 1107|  2.23M|		if (level < -2047 || level > 2047) {
  ------------------
  |  Branch (1107:7): [True: 20, False: 2.23M]
  |  Branch (1107:24): [True: 0, False: 2.23M]
  ------------------
 1108|     20|			DPRINTF(XVID_DEBUG_ERROR,"warning: intra_overflow %i\n", level);
  ------------------
  |  |  196|     20|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
 1109|     20|		}
 1110|  2.23M|		coeff++;
 1111|  2.23M|	} while (!last);
  ------------------
  |  Branch (1111:11): [True: 1.95M, False: 277k]
  ------------------
 1112|       |
 1113|   241M|}
get_inter_block_h263:
 1122|   253k|{
 1123|       |
 1124|   253k|	const uint16_t *scan = scan_tables[direction];
 1125|   253k|	const uint16_t quant_m_2 = quant << 1;
 1126|   253k|	const uint16_t quant_add = (quant & 1 ? quant : quant - 1);
  ------------------
  |  Branch (1126:30): [True: 186k, False: 67.4k]
  ------------------
 1127|   253k|	int p;
 1128|   253k|	int level;
 1129|   253k|	int run;
 1130|   253k|	int last = 0;
 1131|       |
 1132|   253k|	p = 0;
 1133|   421k|	do {
 1134|   421k|		level = get_coeff(bs, &run, &last, 0, 0);
 1135|   421k|		p += run;
 1136|   421k|		if (p & ~63) {
  ------------------
  |  Branch (1136:7): [True: 222k, False: 198k]
  ------------------
 1137|   222k|			DPRINTF(XVID_DEBUG_ERROR,"fatal: invalid run or index");
  ------------------
  |  |  196|   222k|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
 1138|   222k|			break;
 1139|   222k|		}
 1140|       |
 1141|   198k|		if (level < 0) {
  ------------------
  |  Branch (1141:7): [True: 122k, False: 75.5k]
  ------------------
 1142|   122k|			level = level*quant_m_2 - quant_add;
 1143|   122k|			block[scan[p]] = (level >= -2048 ? level : -2048);
  ------------------
  |  Branch (1143:22): [True: 121k, False: 1.36k]
  ------------------
 1144|   122k|		} else {
 1145|  75.5k|			level = level * quant_m_2 + quant_add;
 1146|  75.5k|			block[scan[p]] = (level <= 2047 ? level : 2047);
  ------------------
  |  Branch (1146:22): [True: 74.5k, False: 1.03k]
  ------------------
 1147|  75.5k|		}		
 1148|   198k|		p++;
 1149|   198k|	} while (!last);
  ------------------
  |  Branch (1149:11): [True: 167k, False: 31.0k]
  ------------------
 1150|   253k|}
get_inter_block_mpeg:
 1159|   377k|{
 1160|   377k|	const uint16_t *scan = scan_tables[direction];
 1161|   377k|	uint32_t sum = 0;
 1162|   377k|	int p;
 1163|   377k|	int level;
 1164|   377k|	int run;
 1165|   377k|	int last = 0;
 1166|       |
 1167|   377k|	p = 0;
 1168|   707k|	do {
 1169|   707k|		level = get_coeff(bs, &run, &last, 0, 0);
 1170|   707k|		p += run;
 1171|   707k|		if (p & ~63) {
  ------------------
  |  Branch (1171:7): [True: 330k, False: 376k]
  ------------------
 1172|   330k|			DPRINTF(XVID_DEBUG_ERROR,"fatal: invalid run or index");
  ------------------
  |  |  196|   330k|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
 1173|   330k|			break;
 1174|   330k|		}
 1175|       |
 1176|   376k|		if (level < 0) {
  ------------------
  |  Branch (1176:7): [True: 233k, False: 143k]
  ------------------
 1177|   233k|			level = ((2 * -level + 1) * matrix[scan[p]] * quant) >> 4;
 1178|   233k|			block[scan[p]] = (level <= 2048 ? -level : -2048);
  ------------------
  |  Branch (1178:22): [True: 228k, False: 4.48k]
  ------------------
 1179|   233k|		} else {
 1180|   143k|			level = ((2 *  level + 1) * matrix[scan[p]] * quant) >> 4;
 1181|   143k|			block[scan[p]] = (level <= 2047 ? level : 2047);
  ------------------
  |  Branch (1181:22): [True: 141k, False: 2.51k]
  ------------------
 1182|   143k|		}
 1183|       |
 1184|   376k|		sum ^= block[scan[p]];
 1185|       |		
 1186|   376k|		p++;
 1187|   376k|	} while (!last);
  ------------------
  |  Branch (1187:11): [True: 329k, False: 46.9k]
  ------------------
 1188|       |
 1189|       |	/*	mismatch control */
 1190|   377k|	if ((sum & 1) == 0) {
  ------------------
  |  Branch (1190:6): [True: 329k, False: 48.3k]
  ------------------
 1191|   329k|		block[63] ^= 1;
 1192|   329k|	}
 1193|   377k|}
mbcoding.c:get_mv_data:
  881|  1.61M|{
  882|       |
  883|  1.61M|	uint32_t index;
  884|       |
  885|  1.61M|	if (BitstreamGetBit(bs))
  ------------------
  |  Branch (885:6): [True: 701k, False: 911k]
  ------------------
  886|   701k|		return 0;
  887|       |
  888|   911k|	index = BitstreamShowBits(bs, 12);
  889|       |
  890|   911k|	if (index >= 512) {
  ------------------
  |  Branch (890:6): [True: 380k, False: 531k]
  ------------------
  891|   380k|		index = (index >> 8) - 2;
  892|   380k|		BitstreamSkip(bs, TMNMVtab0[index].len);
  893|   380k|		return TMNMVtab0[index].code;
  894|   380k|	}
  895|       |
  896|   531k|	if (index >= 128) {
  ------------------
  |  Branch (896:6): [True: 54.2k, False: 476k]
  ------------------
  897|  54.2k|		index = (index >> 2) - 32;
  898|  54.2k|		BitstreamSkip(bs, TMNMVtab1[index].len);
  899|  54.2k|		return TMNMVtab1[index].code;
  900|  54.2k|	}
  901|       |
  902|   476k|	index -= 4;
  903|       |
  904|   476k|	BitstreamSkip(bs, TMNMVtab2[index&0x7f].len);
  905|   476k|	return TMNMVtab2[index&0x7f].code;
  906|       |
  907|   531k|}
mbcoding.c:get_coeff:
  996|   244M|{
  997|       |
  998|   244M|	uint32_t mode;
  999|   244M|	int32_t level;
 1000|   244M|	REVERSE_EVENT *reverse_event;
 1001|       |
 1002|   244M|	uint32_t cache = BitstreamShowBits(bs, 32);
 1003|       |	
 1004|   244M|	if (short_video_header)		/* inter-VLCs will be used for both intra and inter blocks */
  ------------------
  |  Branch (1004:6): [True: 0, False: 244M]
  ------------------
 1005|      0|		intra = 0;
 1006|       |
 1007|   244M|	if (GET_BITS(cache, 7) != ESCAPE) {
  ------------------
  |  |  988|   244M|#define GET_BITS(cache, n) ((cache)>>(32-(n)))
  ------------------
              	if (GET_BITS(cache, 7) != ESCAPE) {
  ------------------
  |  |   32|   244M|#define ESCAPE  3
  ------------------
  |  Branch (1007:6): [True: 244M, False: 18.6k]
  ------------------
 1008|   244M|		reverse_event = &DCT3D[intra][GET_BITS(cache, 12)];
  ------------------
  |  |  988|   244M|#define GET_BITS(cache, n) ((cache)>>(32-(n)))
  ------------------
 1009|       |
 1010|   244M|		if ((level = reverse_event->event.level) == 0)
  ------------------
  |  Branch (1010:7): [True: 241M, False: 2.79M]
  ------------------
 1011|   241M|			goto error;
 1012|       |
 1013|  2.79M|		*last = reverse_event->event.last;
 1014|  2.79M|		*run  = reverse_event->event.run;
 1015|       |
 1016|       |		/* Don't forget to update the bitstream position */
 1017|  2.79M|		BitstreamSkip(bs, reverse_event->len+1);
 1018|       |
 1019|  2.79M|		return (GET_BITS(cache, reverse_event->len+1)&0x01) ? -level : level;
  ------------------
  |  |  988|  2.79M|#define GET_BITS(cache, n) ((cache)>>(32-(n)))
  ------------------
  |  Branch (1019:10): [True: 1.92M, False: 869k]
  ------------------
 1020|   244M|	}
 1021|       |
 1022|       |	/* flush 7bits of cache */
 1023|  18.6k|	cache <<= 7;
 1024|       |
 1025|  18.6k|	if (short_video_header) {
  ------------------
  |  Branch (1025:6): [True: 0, False: 18.6k]
  ------------------
 1026|       |		/* escape mode 4 - H.263 type, only used if short_video_header = 1  */
 1027|      0|		*last =  GET_BITS(cache, 1);
  ------------------
  |  |  988|      0|#define GET_BITS(cache, n) ((cache)>>(32-(n)))
  ------------------
 1028|      0|		*run  = (GET_BITS(cache, 7) &0x3f);
  ------------------
  |  |  988|      0|#define GET_BITS(cache, n) ((cache)>>(32-(n)))
  ------------------
 1029|      0|		level = (GET_BITS(cache, 15)&0xff);
  ------------------
  |  |  988|      0|#define GET_BITS(cache, n) ((cache)>>(32-(n)))
  ------------------
 1030|       |
 1031|      0|		if (level == 0 || level == 128)
  ------------------
  |  Branch (1031:7): [True: 0, False: 0]
  |  Branch (1031:21): [True: 0, False: 0]
  ------------------
 1032|      0|			DPRINTF(XVID_DEBUG_ERROR, "Illegal LEVEL for ESCAPE mode 4: %d\n", level);
  ------------------
  |  |  196|      0|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
 1033|       |
 1034|       |		/* We've "eaten" 22 bits */
 1035|      0|		BitstreamSkip(bs, 22);
 1036|       |
 1037|      0|		return (level << 24) >> 24;
 1038|      0|	}
 1039|       |
 1040|  18.6k|	if ((mode = GET_BITS(cache, 2)) < 3) {
  ------------------
  |  |  988|  18.6k|#define GET_BITS(cache, n) ((cache)>>(32-(n)))
  ------------------
  |  Branch (1040:6): [True: 16.4k, False: 2.18k]
  ------------------
 1041|  16.4k|		const int skip[3] = {1, 1, 2};
 1042|  16.4k|		cache <<= skip[mode];
 1043|       |
 1044|  16.4k|		reverse_event = &DCT3D[intra][GET_BITS(cache, 12)];
  ------------------
  |  |  988|  16.4k|#define GET_BITS(cache, n) ((cache)>>(32-(n)))
  ------------------
 1045|       |
 1046|  16.4k|		if ((level = reverse_event->event.level) == 0)
  ------------------
  |  Branch (1046:7): [True: 5.89k, False: 10.5k]
  ------------------
 1047|  5.89k|			goto error;
 1048|       |
 1049|  10.5k|		*last = reverse_event->event.last;
 1050|  10.5k|		*run  = reverse_event->event.run;
 1051|       |
 1052|  10.5k|		if (mode < 2) {
  ------------------
  |  Branch (1052:7): [True: 8.29k, False: 2.27k]
  ------------------
 1053|       |			/* first escape mode, level is offset */
 1054|  8.29k|			level += max_level[intra][*last][*run];
 1055|  8.29k|		} else {
 1056|       |			/* second escape mode, run is offset */
 1057|  2.27k|			*run += max_run[intra][*last][level] + 1;
 1058|  2.27k|		}
 1059|       |		
 1060|       |		/* Update bitstream position */
 1061|  10.5k|		BitstreamSkip(bs, 7 + skip[mode] + reverse_event->len + 1);
 1062|       |
 1063|  10.5k|		return (GET_BITS(cache, reverse_event->len+1)&0x01) ? -level : level;
  ------------------
  |  |  988|  10.5k|#define GET_BITS(cache, n) ((cache)>>(32-(n)))
  ------------------
  |  Branch (1063:10): [True: 3.95k, False: 6.60k]
  ------------------
 1064|  16.4k|	}
 1065|       |
 1066|       |	/* third escape mode - fixed length codes */
 1067|  2.18k|	cache <<= 2;
 1068|  2.18k|	*last =  GET_BITS(cache, 1);
  ------------------
  |  |  988|  2.18k|#define GET_BITS(cache, n) ((cache)>>(32-(n)))
  ------------------
 1069|  2.18k|	*run  = (GET_BITS(cache, 7)&0x3f);
  ------------------
  |  |  988|  2.18k|#define GET_BITS(cache, n) ((cache)>>(32-(n)))
  ------------------
 1070|  2.18k|	level = (GET_BITS(cache, 20)&0xfff);
  ------------------
  |  |  988|  2.18k|#define GET_BITS(cache, n) ((cache)>>(32-(n)))
  ------------------
 1071|       |	
 1072|       |	/* Update bitstream position */
 1073|  2.18k|	BitstreamSkip(bs, 30);
 1074|       |
 1075|  2.18k|	return (level << 20) >> 20;
 1076|       |
 1077|   241M|  error:
 1078|   241M|	*run = 64;
 1079|   241M|	return 0;
 1080|  18.6k|}

idct_int32:
  311|   242M|{
  312|   242M|  int i, Rows = 0x07;
  313|       |
  314|   242M|  Idct_Row(In + 0*8, Tab04, Rnd0);
  ------------------
  |  |   49|   242M|#define Rnd0 65536 // 1<<(COL_SHIFT+ROW_SHIFT-1);
  ------------------
  315|   242M|  Idct_Row(In + 1*8, Tab17, Rnd1);
  ------------------
  |  |   50|   242M|#define Rnd1 3597  // FIX (1.75683487303);
  ------------------
  316|   242M|  Idct_Row(In + 2*8, Tab26, Rnd2);
  ------------------
  |  |   51|   242M|#define Rnd2 2260  // FIX (1.10355339059);
  ------------------
  317|   242M|  if (Idct_Row(In + 3*8, Tab35, Rnd3)) Rows |= 0x08;
  ------------------
  |  |   52|   242M|#define Rnd3 1203  // FIX (0.587788325588);
  ------------------
  |  Branch (317:7): [True: 175k, False: 241M]
  ------------------
  318|   242M|  if (Idct_Row(In + 4*8, Tab04, Rnd4)) Rows |= 0x10;
  ------------------
  |  |   53|   242M|#define Rnd4 0
  ------------------
  |  Branch (318:7): [True: 82.5k, False: 241M]
  ------------------
  319|   242M|  if (Idct_Row(In + 5*8, Tab35, Rnd5)) Rows |= 0x20;
  ------------------
  |  |   54|   242M|#define Rnd5 120   // FIX (0.058658283817);
  ------------------
  |  Branch (319:7): [True: 46.6k, False: 241M]
  ------------------
  320|   242M|  if (Idct_Row(In + 6*8, Tab26, Rnd6)) Rows |= 0x40;
  ------------------
  |  |   55|   242M|#define Rnd6 512   // FIX (0.25);
  ------------------
  |  Branch (320:7): [True: 41.3k, False: 241M]
  ------------------
  321|   242M|  if (Idct_Row(In + 7*8, Tab17, Rnd7)) Rows |= 0x80;
  ------------------
  |  |   56|   242M|#define Rnd7 512   // FIX (0.25);
  ------------------
  |  Branch (321:7): [True: 349k, False: 241M]
  ------------------
  322|       |
  323|   242M|  if (Rows&0xf0) {
  ------------------
  |  Branch (323:7): [True: 421k, False: 241M]
  ------------------
  324|  3.79M|    for(i=0; i<8; i++)
  ------------------
  |  Branch (324:14): [True: 3.37M, False: 421k]
  ------------------
  325|  3.37M|      Idct_Col_8(In + i);
  326|   421k|  }
  327|   241M|  else if (Rows&0x08) {
  ------------------
  |  Branch (327:12): [True: 109k, False: 241M]
  ------------------
  328|   982k|    for(i=0; i<8; i++)
  ------------------
  |  Branch (328:14): [True: 873k, False: 109k]
  ------------------
  329|   873k|      Idct_Col_4(In + i);
  330|   109k|  }
  331|   241M|  else {
  332|  2.17G|    for(i=0; i<8; i++)
  ------------------
  |  Branch (332:14): [True: 1.93G, False: 241M]
  ------------------
  333|  1.93G|      Idct_Col_3(In + i);
  334|   241M|  }
  335|   242M|}
idct.c:Idct_Row:
   65|  1.93G|{
   66|  1.93G|  const int C1 = Tab[0];
   67|  1.93G|  const int C2 = Tab[1];
   68|  1.93G|  const int C3 = Tab[2];
   69|  1.93G|  const int C4 = Tab[3];
   70|  1.93G|  const int C5 = Tab[4];
   71|  1.93G|  const int C6 = Tab[5];
   72|  1.93G|  const int C7 = Tab[6];
   73|       |
   74|  1.93G|  const int Right = In[5]|In[6]|In[7];
   75|  1.93G|  const int Left  = In[1]|In[2]|In[3];
   76|  1.93G|  if (!(Right | In[4]))
  ------------------
  |  Branch (76:7): [True: 1.93G, False: 488k]
  ------------------
   77|  1.93G|  {
   78|  1.93G|    const int K = C4*In[0] + Rnd;
   79|  1.93G|    if (Left)
  ------------------
  |  Branch (79:9): [True: 784k, False: 1.93G]
  ------------------
   80|   784k|    {
   81|   784k|      const int a0 = K + C2*In[2];
   82|   784k|      const int a1 = K + C6*In[2];
   83|   784k|      const int a2 = K - C6*In[2];
   84|   784k|      const int a3 = K - C2*In[2];
   85|       |
   86|   784k|      const int b0 = C1*In[1] + C3*In[3];
   87|   784k|      const int b1 = C3*In[1] - C7*In[3];
   88|   784k|      const int b2 = C5*In[1] - C1*In[3];
   89|   784k|      const int b3 = C7*In[1] - C5*In[3];
   90|       |
   91|   784k|      In[0] = (a0 + b0) >> ROW_SHIFT;
  ------------------
  |  |   45|   784k|#define ROW_SHIFT 11
  ------------------
   92|   784k|      In[1] = (a1 + b1) >> ROW_SHIFT;
  ------------------
  |  |   45|   784k|#define ROW_SHIFT 11
  ------------------
   93|   784k|      In[2] = (a2 + b2) >> ROW_SHIFT;
  ------------------
  |  |   45|   784k|#define ROW_SHIFT 11
  ------------------
   94|   784k|      In[3] = (a3 + b3) >> ROW_SHIFT;
  ------------------
  |  |   45|   784k|#define ROW_SHIFT 11
  ------------------
   95|   784k|      In[4] = (a3 - b3) >> ROW_SHIFT;
  ------------------
  |  |   45|   784k|#define ROW_SHIFT 11
  ------------------
   96|   784k|      In[5] = (a2 - b2) >> ROW_SHIFT;
  ------------------
  |  |   45|   784k|#define ROW_SHIFT 11
  ------------------
   97|   784k|      In[6] = (a1 - b1) >> ROW_SHIFT;
  ------------------
  |  |   45|   784k|#define ROW_SHIFT 11
  ------------------
   98|   784k|      In[7] = (a0 - b0) >> ROW_SHIFT;
  ------------------
  |  |   45|   784k|#define ROW_SHIFT 11
  ------------------
   99|   784k|    }
  100|  1.93G|    else
  101|  1.93G|    {
  102|  1.93G|      const int a0 = K >> ROW_SHIFT;
  ------------------
  |  |   45|  1.93G|#define ROW_SHIFT 11
  ------------------
  103|  1.93G|      if (a0) {
  ------------------
  |  Branch (103:11): [True: 725M, False: 1.20G]
  ------------------
  104|   725M|        In[0] = In[1] = In[2] = In[3] =
  105|   725M|        In[4] = In[5] = In[6] = In[7] = a0;
  106|   725M|      }
  107|  1.20G|      else return 0;
  108|  1.93G|    }
  109|  1.93G|  }
  110|   488k|  else if (!(Left|Right))
  ------------------
  |  Branch (110:12): [True: 13.6k, False: 475k]
  ------------------
  111|  13.6k|  {
  112|  13.6k|    const int a0 = (Rnd + C4*(In[0]+In[4])) >> ROW_SHIFT;
  ------------------
  |  |   45|  13.6k|#define ROW_SHIFT 11
  ------------------
  113|  13.6k|    const int a1 = (Rnd + C4*(In[0]-In[4])) >> ROW_SHIFT;
  ------------------
  |  |   45|  13.6k|#define ROW_SHIFT 11
  ------------------
  114|       |
  115|  13.6k|    In[0] = a0;
  116|  13.6k|    In[3] = a0;
  117|  13.6k|    In[4] = a0;
  118|  13.6k|    In[7] = a0;
  119|  13.6k|    In[1] = a1;
  120|  13.6k|    In[2] = a1;
  121|  13.6k|    In[5] = a1;
  122|  13.6k|    In[6] = a1;
  123|  13.6k|  }
  124|   475k|  else
  125|   475k|  {
  126|   475k|    const int K = C4*In[0] + Rnd;
  127|   475k|    const int a0 = K + C2*In[2] + C4*In[4] + C6*In[6];
  128|   475k|    const int a1 = K + C6*In[2] - C4*In[4] - C2*In[6];
  129|   475k|    const int a2 = K - C6*In[2] - C4*In[4] + C2*In[6];
  130|   475k|    const int a3 = K - C2*In[2] + C4*In[4] - C6*In[6];
  131|       |
  132|   475k|    const int b0 = C1*In[1] + C3*In[3] + C5*In[5] + C7*In[7];
  133|   475k|    const int b1 = C3*In[1] - C7*In[3] - C1*In[5] - C5*In[7];
  134|   475k|    const int b2 = C5*In[1] - C1*In[3] + C7*In[5] + C3*In[7];
  135|   475k|    const int b3 = C7*In[1] - C5*In[3] + C3*In[5] - C1*In[7];
  136|       |
  137|   475k|    In[0] = (a0 + b0) >> ROW_SHIFT;
  ------------------
  |  |   45|   475k|#define ROW_SHIFT 11
  ------------------
  138|   475k|    In[1] = (a1 + b1) >> ROW_SHIFT;
  ------------------
  |  |   45|   475k|#define ROW_SHIFT 11
  ------------------
  139|   475k|    In[2] = (a2 + b2) >> ROW_SHIFT;
  ------------------
  |  |   45|   475k|#define ROW_SHIFT 11
  ------------------
  140|   475k|    In[3] = (a3 + b3) >> ROW_SHIFT;
  ------------------
  |  |   45|   475k|#define ROW_SHIFT 11
  ------------------
  141|   475k|    In[4] = (a3 - b3) >> ROW_SHIFT;
  ------------------
  |  |   45|   475k|#define ROW_SHIFT 11
  ------------------
  142|   475k|    In[5] = (a2 - b2) >> ROW_SHIFT;
  ------------------
  |  |   45|   475k|#define ROW_SHIFT 11
  ------------------
  143|   475k|    In[6] = (a1 - b1) >> ROW_SHIFT;
  ------------------
  |  |   45|   475k|#define ROW_SHIFT 11
  ------------------
  144|   475k|    In[7] = (a0 - b0) >> ROW_SHIFT;
  ------------------
  |  |   45|   475k|#define ROW_SHIFT 11
  ------------------
  145|   475k|  }
  146|   726M|  return 1;
  147|  1.93G|}
idct.c:Idct_Col_8:
  168|  3.37M|{
  169|  3.37M|  int mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, Spill;
  170|       |
  171|       |    // odd
  172|       |
  173|  3.37M|  mm4 = (int)In[7*8];
  174|  3.37M|  mm5 = (int)In[5*8];
  175|  3.37M|  mm6 = (int)In[3*8];
  176|  3.37M|  mm7 = (int)In[1*8];
  177|       |
  178|  3.37M|  mm0 = MULT(Tan1, mm4, 16) + mm7;
  ------------------
  |  |  154|  3.37M|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  179|  3.37M|  mm1 = MULT(Tan1, mm7, 16) - mm4;
  ------------------
  |  |  154|  3.37M|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  180|  3.37M|  mm2 = MULT(Tan3, mm5, 16) + mm6;
  ------------------
  |  |  154|  3.37M|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  181|  3.37M|  mm3 = MULT(Tan3, mm6, 16) - mm5;
  ------------------
  |  |  154|  3.37M|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  182|       |
  183|  3.37M|  mm7 = mm0 + mm2;
  184|  3.37M|  mm4 = mm1 - mm3;
  185|  3.37M|  mm0 = mm0 - mm2;
  186|  3.37M|  mm1 = mm1 + mm3;
  187|  3.37M|  mm6 = mm0 + mm1;
  188|  3.37M|  mm5 = mm0 - mm1;
  189|  3.37M|  mm5 = 2*MULT(Sqrt2, mm5, 16);  // 2*sqrt2
  ------------------
  |  |  154|  3.37M|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  190|  3.37M|  mm6 = 2*MULT(Sqrt2, mm6, 16);  // Watch out: precision loss but done to match
  ------------------
  |  |  154|  3.37M|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  191|       |                                 // the pmulhw used in mmx/sse versions
  192|       |  
  193|       |    // even
  194|       |
  195|  3.37M|  mm1 = (int)In[2*8];
  196|  3.37M|  mm2 = (int)In[6*8];
  197|  3.37M|  mm3 = MULT(Tan2,mm2, 16) + mm1;
  ------------------
  |  |  154|  3.37M|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  198|  3.37M|  mm2 = MULT(Tan2,mm1, 16) - mm2;
  ------------------
  |  |  154|  3.37M|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  199|       |
  200|  3.37M|  LOAD_BUTF(mm0, mm1, 0*8, 4*8, Spill, In);
  ------------------
  |  |  164|  3.37M|  (m1) = (S)[(a)] + (S)[(b)];           \
  |  |  165|  3.37M|  (m2) = (S)[(a)] - (S)[(b)]
  ------------------
  201|       |
  202|  3.37M|  BUTF(mm0, mm3, Spill);
  ------------------
  |  |  159|  3.37M|  (tmp) = (a)+(b);      \
  |  |  160|  3.37M|  (b)   = (a)-(b);      \
  |  |  161|  3.37M|  (a)   = (tmp)
  ------------------
  203|  3.37M|  BUTF(mm0, mm7, Spill);
  ------------------
  |  |  159|  3.37M|  (tmp) = (a)+(b);      \
  |  |  160|  3.37M|  (b)   = (a)-(b);      \
  |  |  161|  3.37M|  (a)   = (tmp)
  ------------------
  204|  3.37M|  In[8*0] = (int16_t) (mm0 >> COL_SHIFT);
  ------------------
  |  |   46|  3.37M|#define COL_SHIFT 6
  ------------------
  205|  3.37M|  In[8*7] = (int16_t) (mm7 >> COL_SHIFT);
  ------------------
  |  |   46|  3.37M|#define COL_SHIFT 6
  ------------------
  206|  3.37M|  BUTF(mm3, mm4, mm0);
  ------------------
  |  |  159|  3.37M|  (tmp) = (a)+(b);      \
  |  |  160|  3.37M|  (b)   = (a)-(b);      \
  |  |  161|  3.37M|  (a)   = (tmp)
  ------------------
  207|  3.37M|  In[8*3] = (int16_t) (mm3 >> COL_SHIFT);
  ------------------
  |  |   46|  3.37M|#define COL_SHIFT 6
  ------------------
  208|  3.37M|  In[8*4] = (int16_t) (mm4 >> COL_SHIFT);
  ------------------
  |  |   46|  3.37M|#define COL_SHIFT 6
  ------------------
  209|       |
  210|  3.37M|  BUTF(mm1, mm2, mm0);
  ------------------
  |  |  159|  3.37M|  (tmp) = (a)+(b);      \
  |  |  160|  3.37M|  (b)   = (a)-(b);      \
  |  |  161|  3.37M|  (a)   = (tmp)
  ------------------
  211|  3.37M|  BUTF(mm1, mm6, mm0);
  ------------------
  |  |  159|  3.37M|  (tmp) = (a)+(b);      \
  |  |  160|  3.37M|  (b)   = (a)-(b);      \
  |  |  161|  3.37M|  (a)   = (tmp)
  ------------------
  212|  3.37M|  In[8*1] = (int16_t) (mm1 >> COL_SHIFT);
  ------------------
  |  |   46|  3.37M|#define COL_SHIFT 6
  ------------------
  213|  3.37M|  In[8*6] = (int16_t) (mm6 >> COL_SHIFT);
  ------------------
  |  |   46|  3.37M|#define COL_SHIFT 6
  ------------------
  214|  3.37M|  BUTF(mm2, mm5, mm0);
  ------------------
  |  |  159|  3.37M|  (tmp) = (a)+(b);      \
  |  |  160|  3.37M|  (b)   = (a)-(b);      \
  |  |  161|  3.37M|  (a)   = (tmp)
  ------------------
  215|  3.37M|  In[8*2] = (int16_t) (mm2 >> COL_SHIFT);
  ------------------
  |  |   46|  3.37M|#define COL_SHIFT 6
  ------------------
  216|  3.37M|  In[8*5] = (int16_t) (mm5 >> COL_SHIFT);
  ------------------
  |  |   46|  3.37M|#define COL_SHIFT 6
  ------------------
  217|  3.37M|}
idct.c:Idct_Col_4:
  220|   873k|{
  221|   873k|  int mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, Spill;
  222|       |
  223|       |    // odd
  224|       |
  225|   873k|  mm0 = (int)In[1*8];
  226|   873k|  mm2 = (int)In[3*8];
  227|       |
  228|   873k|  mm1 = MULT(Tan1, mm0, 16);
  ------------------
  |  |  154|   873k|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  229|   873k|  mm3 = MULT(Tan3, mm2, 16);
  ------------------
  |  |  154|   873k|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  230|       |
  231|   873k|  mm7 = mm0 + mm2;
  232|   873k|  mm4 = mm1 - mm3;
  233|   873k|  mm0 = mm0 - mm2;
  234|   873k|  mm1 = mm1 + mm3;
  235|   873k|  mm6 = mm0 + mm1;
  236|   873k|  mm5 = mm0 - mm1;
  237|   873k|  mm6 = 2*MULT(Sqrt2, mm6, 16);  // 2*sqrt2
  ------------------
  |  |  154|   873k|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  238|   873k|  mm5 = 2*MULT(Sqrt2, mm5, 16);
  ------------------
  |  |  154|   873k|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  239|       |
  240|       |    // even
  241|       |
  242|   873k|  mm0 = mm1 = (int)In[0*8];
  243|   873k|  mm3 = (int)In[2*8];
  244|   873k|  mm2 = MULT(Tan2,mm3, 16);
  ------------------
  |  |  154|   873k|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  245|       |
  246|   873k|  BUTF(mm0, mm3, Spill);
  ------------------
  |  |  159|   873k|  (tmp) = (a)+(b);      \
  |  |  160|   873k|  (b)   = (a)-(b);      \
  |  |  161|   873k|  (a)   = (tmp)
  ------------------
  247|   873k|  BUTF(mm0, mm7, Spill);
  ------------------
  |  |  159|   873k|  (tmp) = (a)+(b);      \
  |  |  160|   873k|  (b)   = (a)-(b);      \
  |  |  161|   873k|  (a)   = (tmp)
  ------------------
  248|   873k|  In[8*0] = (int16_t) (mm0 >> COL_SHIFT);
  ------------------
  |  |   46|   873k|#define COL_SHIFT 6
  ------------------
  249|   873k|  In[8*7] = (int16_t) (mm7 >> COL_SHIFT);
  ------------------
  |  |   46|   873k|#define COL_SHIFT 6
  ------------------
  250|   873k|  BUTF(mm3, mm4, mm0);
  ------------------
  |  |  159|   873k|  (tmp) = (a)+(b);      \
  |  |  160|   873k|  (b)   = (a)-(b);      \
  |  |  161|   873k|  (a)   = (tmp)
  ------------------
  251|   873k|  In[8*3] = (int16_t) (mm3 >> COL_SHIFT);
  ------------------
  |  |   46|   873k|#define COL_SHIFT 6
  ------------------
  252|   873k|  In[8*4] = (int16_t) (mm4 >> COL_SHIFT);
  ------------------
  |  |   46|   873k|#define COL_SHIFT 6
  ------------------
  253|       |
  254|   873k|  BUTF(mm1, mm2, mm0);
  ------------------
  |  |  159|   873k|  (tmp) = (a)+(b);      \
  |  |  160|   873k|  (b)   = (a)-(b);      \
  |  |  161|   873k|  (a)   = (tmp)
  ------------------
  255|   873k|  BUTF(mm1, mm6, mm0);
  ------------------
  |  |  159|   873k|  (tmp) = (a)+(b);      \
  |  |  160|   873k|  (b)   = (a)-(b);      \
  |  |  161|   873k|  (a)   = (tmp)
  ------------------
  256|   873k|  In[8*1] = (int16_t) (mm1 >> COL_SHIFT);
  ------------------
  |  |   46|   873k|#define COL_SHIFT 6
  ------------------
  257|   873k|  In[8*6] = (int16_t) (mm6 >> COL_SHIFT);
  ------------------
  |  |   46|   873k|#define COL_SHIFT 6
  ------------------
  258|   873k|  BUTF(mm2, mm5, mm0);
  ------------------
  |  |  159|   873k|  (tmp) = (a)+(b);      \
  |  |  160|   873k|  (b)   = (a)-(b);      \
  |  |  161|   873k|  (a)   = (tmp)
  ------------------
  259|   873k|  In[8*2] = (int16_t) (mm2 >> COL_SHIFT);
  ------------------
  |  |   46|   873k|#define COL_SHIFT 6
  ------------------
  260|   873k|  In[8*5] = (int16_t) (mm5 >> COL_SHIFT);
  ------------------
  |  |   46|   873k|#define COL_SHIFT 6
  ------------------
  261|   873k|}
idct.c:Idct_Col_3:
  264|  1.93G|{
  265|  1.93G|  int mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, Spill;
  266|       |
  267|       |    // odd
  268|       |
  269|  1.93G|  mm7 = (int)In[1*8];
  270|  1.93G|  mm4 = MULT(Tan1, mm7, 16);
  ------------------
  |  |  154|  1.93G|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  271|       |
  272|  1.93G|  mm6 = mm7 + mm4;
  273|  1.93G|  mm5 = mm7 - mm4;
  274|  1.93G|  mm6 = 2*MULT(Sqrt2, mm6, 16);  // 2*sqrt2
  ------------------
  |  |  154|  1.93G|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  275|  1.93G|  mm5 = 2*MULT(Sqrt2, mm5, 16);
  ------------------
  |  |  154|  1.93G|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  276|       |
  277|       |    // even
  278|       |
  279|  1.93G|  mm0 = mm1 = (int)In[0*8];
  280|  1.93G|  mm3 = (int)In[2*8];
  281|  1.93G|  mm2 = MULT(Tan2,mm3, 16);
  ------------------
  |  |  154|  1.93G|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  282|       |
  283|  1.93G|  BUTF(mm0, mm3, Spill);
  ------------------
  |  |  159|  1.93G|  (tmp) = (a)+(b);      \
  |  |  160|  1.93G|  (b)   = (a)-(b);      \
  |  |  161|  1.93G|  (a)   = (tmp)
  ------------------
  284|  1.93G|  BUTF(mm0, mm7, Spill);
  ------------------
  |  |  159|  1.93G|  (tmp) = (a)+(b);      \
  |  |  160|  1.93G|  (b)   = (a)-(b);      \
  |  |  161|  1.93G|  (a)   = (tmp)
  ------------------
  285|  1.93G|  In[8*0] = (int16_t) (mm0 >> COL_SHIFT);
  ------------------
  |  |   46|  1.93G|#define COL_SHIFT 6
  ------------------
  286|  1.93G|  In[8*7] = (int16_t) (mm7 >> COL_SHIFT);
  ------------------
  |  |   46|  1.93G|#define COL_SHIFT 6
  ------------------
  287|  1.93G|  BUTF(mm3, mm4, mm0);
  ------------------
  |  |  159|  1.93G|  (tmp) = (a)+(b);      \
  |  |  160|  1.93G|  (b)   = (a)-(b);      \
  |  |  161|  1.93G|  (a)   = (tmp)
  ------------------
  288|  1.93G|  In[8*3] = (int16_t) (mm3 >> COL_SHIFT);
  ------------------
  |  |   46|  1.93G|#define COL_SHIFT 6
  ------------------
  289|  1.93G|  In[8*4] = (int16_t) (mm4 >> COL_SHIFT);
  ------------------
  |  |   46|  1.93G|#define COL_SHIFT 6
  ------------------
  290|       |
  291|  1.93G|  BUTF(mm1, mm2, mm0);
  ------------------
  |  |  159|  1.93G|  (tmp) = (a)+(b);      \
  |  |  160|  1.93G|  (b)   = (a)-(b);      \
  |  |  161|  1.93G|  (a)   = (tmp)
  ------------------
  292|  1.93G|  BUTF(mm1, mm6, mm0);
  ------------------
  |  |  159|  1.93G|  (tmp) = (a)+(b);      \
  |  |  160|  1.93G|  (b)   = (a)-(b);      \
  |  |  161|  1.93G|  (a)   = (tmp)
  ------------------
  293|  1.93G|  In[8*1] = (int16_t) (mm1 >> COL_SHIFT);
  ------------------
  |  |   46|  1.93G|#define COL_SHIFT 6
  ------------------
  294|  1.93G|  In[8*6] = (int16_t) (mm6 >> COL_SHIFT);
  ------------------
  |  |   46|  1.93G|#define COL_SHIFT 6
  ------------------
  295|  1.93G|  BUTF(mm2, mm5, mm0);
  ------------------
  |  |  159|  1.93G|  (tmp) = (a)+(b);      \
  |  |  160|  1.93G|  (b)   = (a)-(b);      \
  |  |  161|  1.93G|  (a)   = (tmp)
  ------------------
  296|  1.93G|  In[8*2] = (int16_t) (mm2 >> COL_SHIFT);
  ------------------
  |  |   46|  1.93G|#define COL_SHIFT 6
  ------------------
  297|  1.93G|  In[8*5] = (int16_t) (mm5 >> COL_SHIFT);
  ------------------
  |  |   46|  1.93G|#define COL_SHIFT 6
  ------------------
  298|  1.93G|}

decoder_create:
  151|  10.5k|{
  152|  10.5k|  int ret = 0;
  153|  10.5k|  DECODER *dec;
  154|       |
  155|  10.5k|  if (XVID_VERSION_MAJOR(create->version) != 1) /* v1.x.x */
  ------------------
  |  |   63|  10.5k|#define XVID_VERSION_MAJOR(a)    ((char)(((a)>>16) & 0xff))
  ------------------
  |  Branch (155:7): [True: 0, False: 10.5k]
  ------------------
  156|      0|    return XVID_ERR_VERSION;
  ------------------
  |  |   98|      0|#define XVID_ERR_VERSION	-4		/* structure version not supported */
  ------------------
  157|       |
  158|  10.5k|  dec = xvid_malloc(sizeof(DECODER), CACHE_LINE);
  ------------------
  |  |  131|  10.5k|#    define CACHE_LINE  64
  ------------------
  159|  10.5k|  if (dec == NULL) {
  ------------------
  |  Branch (159:7): [True: 0, False: 10.5k]
  ------------------
  160|      0|    return XVID_ERR_MEMORY;
  ------------------
  |  |   96|      0|#define XVID_ERR_MEMORY		-2		/* memory allocation error */
  ------------------
  161|      0|  }
  162|       |
  163|  10.5k|  memset(dec, 0, sizeof(DECODER));
  164|       |
  165|  10.5k|  dec->mpeg_quant_matrices = xvid_malloc(sizeof(uint16_t) * 64 * 8, CACHE_LINE);
  ------------------
  |  |  131|  10.5k|#    define CACHE_LINE  64
  ------------------
  166|  10.5k|  if (dec->mpeg_quant_matrices == NULL) {
  ------------------
  |  Branch (166:7): [True: 0, False: 10.5k]
  ------------------
  167|      0|    xvid_free(dec);
  168|      0|    return XVID_ERR_MEMORY;
  ------------------
  |  |   96|      0|#define XVID_ERR_MEMORY		-2		/* memory allocation error */
  ------------------
  169|      0|  }
  170|       |
  171|  10.5k|  create->handle = dec;
  172|       |
  173|  10.5k|  dec->width = MAX(0, create->width);
  ------------------
  |  |   35|  10.5k|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (35:20): [True: 0, False: 10.5k]
  |  |  ------------------
  ------------------
  174|  10.5k|  dec->height = MAX(0, create->height);
  ------------------
  |  |   35|  10.5k|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (35:20): [True: 0, False: 10.5k]
  |  |  ------------------
  ------------------
  175|       |
  176|  10.5k|  dec->num_threads = MAX(0, create->num_threads);
  ------------------
  |  |   35|  10.5k|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (35:20): [True: 0, False: 10.5k]
  |  |  ------------------
  ------------------
  177|       |
  178|  10.5k|  image_null(&dec->cur);
  179|  10.5k|  image_null(&dec->refn[0]);
  180|  10.5k|  image_null(&dec->refn[1]);
  181|  10.5k|  image_null(&dec->tmp);
  182|  10.5k|  image_null(&dec->qtmp);
  183|       |
  184|       |  /* image based GMC */
  185|  10.5k|  image_null(&dec->gmc);
  186|       |
  187|  10.5k|  dec->mbs = NULL;
  188|  10.5k|  dec->last_mbs = NULL;
  189|  10.5k|  dec->qscale = NULL;
  190|       |
  191|  10.5k|  init_timer();
  192|  10.5k|  init_postproc(&dec->postproc);
  193|  10.5k|  init_mpeg_matrix(dec->mpeg_quant_matrices);
  194|       |
  195|       |  /* For B-frame support (used to save reference frame's time */
  196|  10.5k|  dec->frames = 0;
  197|  10.5k|  dec->time = dec->time_base = dec->last_time_base = 0;
  198|  10.5k|  dec->low_delay = 0;
  199|  10.5k|  dec->packed_mode = 0;
  200|  10.5k|  dec->time_inc_resolution = 1; /* until VOL header says otherwise */
  201|  10.5k|  dec->ver_id = 1;
  202|       |
  203|  10.5k|  if (create->fourcc == ((int)('X')|((int)('V')<<8)| 
  ------------------
  |  Branch (203:7): [True: 0, False: 10.5k]
  ------------------
  204|  10.5k|                         ((int)('I')<<16)|((int)('D')<<24))) { /* XVID */
  205|      0|    dec->bs_version = 0; /* Initially assume oldest xvid version */ 
  206|      0|  }
  207|  10.5k|  else {
  208|  10.5k|	dec->bs_version = 0xffff; /* Initialize to very high value -> assume bugfree stream */
  209|  10.5k|  }
  210|       |
  211|  10.5k|  dec->fixed_dimensions = (dec->width > 0 && dec->height > 0);
  ------------------
  |  Branch (211:28): [True: 0, False: 10.5k]
  |  Branch (211:46): [True: 0, False: 0]
  ------------------
  212|       |
  213|  10.5k|  ret = decoder_resize(dec);
  214|  10.5k|  if (ret == XVID_ERR_MEMORY) create->handle = NULL;
  ------------------
  |  |   96|  10.5k|#define XVID_ERR_MEMORY		-2		/* memory allocation error */
  ------------------
  |  Branch (214:7): [True: 0, False: 10.5k]
  ------------------
  215|       |
  216|  10.5k|  return ret;
  217|  10.5k|}
decoder_destroy:
  222|  10.5k|{
  223|  10.5k|  xvid_free(dec->last_mbs);
  224|  10.5k|  xvid_free(dec->mbs);
  225|  10.5k|  xvid_free(dec->qscale);
  226|       |
  227|       |  /* image based GMC */
  228|  10.5k|  image_destroy(&dec->gmc, dec->edged_width, dec->edged_height);
  229|       |
  230|  10.5k|  image_destroy(&dec->refn[0], dec->edged_width, dec->edged_height);
  231|  10.5k|  image_destroy(&dec->refn[1], dec->edged_width, dec->edged_height);
  232|  10.5k|  image_destroy(&dec->tmp, dec->edged_width, dec->edged_height);
  233|  10.5k|  image_destroy(&dec->qtmp, dec->edged_width, dec->edged_height);
  234|  10.5k|  image_destroy(&dec->cur, dec->edged_width, dec->edged_height);
  235|  10.5k|  xvid_free(dec->mpeg_quant_matrices);
  236|  10.5k|  xvid_free(dec);
  237|       |
  238|  10.5k|  write_timer();
  239|  10.5k|  return 0;
  240|  10.5k|}
decoder_decode:
 1566|  72.0k|{
 1567|       |
 1568|  72.0k|  Bitstream bs;
 1569|  72.0k|  uint32_t rounding = 0;
 1570|  72.0k|  uint32_t quant = 2;
 1571|  72.0k|  uint32_t fcode_forward = 0;
 1572|  72.0k|  uint32_t fcode_backward = 0;
 1573|  72.0k|  uint32_t intra_dc_threshold = 0;
 1574|  72.0k|  WARPPOINTS gmc_warp;
 1575|  72.0k|  int coding_type = -1;
 1576|  72.0k|  int success, output, seen_something;
 1577|       |
 1578|  72.0k|  if (XVID_VERSION_MAJOR(frame->version) != 1 || (stats && XVID_VERSION_MAJOR(stats->version) != 1))  /* v1.x.x */
  ------------------
  |  |   63|  72.0k|#define XVID_VERSION_MAJOR(a)    ((char)(((a)>>16) & 0xff))
  ------------------
                if (XVID_VERSION_MAJOR(frame->version) != 1 || (stats && XVID_VERSION_MAJOR(stats->version) != 1))  /* v1.x.x */
  ------------------
  |  |   63|  72.0k|#define XVID_VERSION_MAJOR(a)    ((char)(((a)>>16) & 0xff))
  ------------------
  |  Branch (1578:7): [True: 0, False: 72.0k]
  |  Branch (1578:51): [True: 72.0k, False: 0]
  |  Branch (1578:60): [True: 0, False: 72.0k]
  ------------------
 1579|      0|    return XVID_ERR_VERSION;
  ------------------
  |  |   98|      0|#define XVID_ERR_VERSION	-4		/* structure version not supported */
  ------------------
 1580|       |
 1581|  72.0k|  start_global_timer();
 1582|  72.0k|  memset((void *)&gmc_warp, 0, sizeof(WARPPOINTS));
 1583|       |
 1584|  72.0k|  dec->low_delay_default = (frame->general & XVID_LOWDELAY);
  ------------------
  |  |  268|  72.0k|#define XVID_LOWDELAY      (1<<0) /* lowdelay mode  */
  ------------------
 1585|  72.0k|  if ((frame->general & XVID_DISCONTINUITY))
  ------------------
  |  |  269|  72.0k|#define XVID_DISCONTINUITY (1<<1) /* indicates break in stream */
  ------------------
  |  Branch (1585:7): [True: 0, False: 72.0k]
  ------------------
 1586|      0|    dec->frames = 0;
 1587|  72.0k|  dec->out_frm = (frame->output.csp == XVID_CSP_SLICE) ? &frame->output : NULL;
  ------------------
  |  |  124|  72.0k|#define XVID_CSP_SLICE    (1<<12) /* decoder only: 4:2:0 planar, per slice rendering */
  ------------------
  |  Branch (1587:18): [True: 0, False: 72.0k]
  ------------------
 1588|       |
 1589|  72.0k|  if(frame->length<0) {  /* decoder flush */
  ------------------
  |  Branch (1589:6): [True: 0, False: 72.0k]
  ------------------
 1590|      0|    int ret;
 1591|       |    /* if not decoding "low_delay/packed", and this isn't low_delay and
 1592|       |      we have a reference frame, then outout the reference frame */
 1593|      0|    if (!(dec->low_delay_default && dec->packed_mode) && !dec->low_delay && dec->frames>0) {
  ------------------
  |  Branch (1593:11): [True: 0, False: 0]
  |  Branch (1593:37): [True: 0, False: 0]
  |  Branch (1593:58): [True: 0, False: 0]
  |  Branch (1593:77): [True: 0, False: 0]
  ------------------
 1594|      0|      decoder_output(dec, &dec->refn[0], dec->last_mbs, frame, stats, dec->last_coding_type, quant);
 1595|      0|      dec->frames = 0;
 1596|      0|      ret = 0;
 1597|      0|    } else {
 1598|      0|      if (stats) stats->type = XVID_TYPE_NOTHING;
  ------------------
  |  |  166|      0|#define XVID_TYPE_NOTHING  0 /* decoder only (encoder stats): nothing was decoded/encoded */
  ------------------
  |  Branch (1598:11): [True: 0, False: 0]
  ------------------
 1599|      0|      ret = XVID_ERR_END;
  ------------------
  |  |   99|      0|#define XVID_ERR_END		-5		/* encoder only; end of stream reached */
  ------------------
 1600|      0|    }
 1601|       |
 1602|      0|    emms();
 1603|      0|    stop_global_timer();
 1604|      0|    return ret;
 1605|      0|  }
 1606|       |
 1607|  72.0k|  BitstreamInit(&bs, frame->bitstream, frame->length);
 1608|       |
 1609|       |  /* XXX: 0x7f is only valid whilst decoding vfw xvid/divx5 avi's */
 1610|  72.0k|  if(dec->low_delay_default && frame->length == 1 && BitstreamShowBits(&bs, 8) == 0x7f)
  ------------------
  |  Branch (1610:6): [True: 0, False: 72.0k]
  |  Branch (1610:32): [True: 0, False: 0]
  |  Branch (1610:54): [True: 0, False: 0]
  ------------------
 1611|      0|  {
 1612|      0|    image_output(&dec->refn[0], dec->width, dec->height, dec->edged_width,
 1613|      0|           (uint8_t**)frame->output.plane, frame->output.stride, frame->output.csp, dec->interlacing);
 1614|      0|    if (stats) stats->type = XVID_TYPE_NOTHING;
  ------------------
  |  |  166|      0|#define XVID_TYPE_NOTHING  0 /* decoder only (encoder stats): nothing was decoded/encoded */
  ------------------
  |  Branch (1614:9): [True: 0, False: 0]
  ------------------
 1615|      0|    emms();
 1616|      0|    return 1; /* one byte consumed */
 1617|      0|  }
 1618|       |
 1619|  72.0k|  success = 0;
 1620|  72.0k|  output = 0;
 1621|  72.0k|  seen_something = 0;
 1622|       |
 1623|  2.11M|repeat:
 1624|       |
 1625|  2.11M|  coding_type = BitstreamReadHeaders(&bs, dec, &rounding,
 1626|  2.11M|      &quant, &fcode_forward, &fcode_backward, &intra_dc_threshold, &gmc_warp);
 1627|       |
 1628|  2.11M|  DPRINTF(XVID_DEBUG_HEADER, "coding_type=%i,  packed=%i,  time=%"
  ------------------
  |  |  198|  2.11M|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
 1629|       |#if defined(_MSC_VER)
 1630|       |    "I64"
 1631|       |#else
 1632|  2.11M|    "ll"
 1633|  2.11M|#endif
 1634|  2.11M|    "i,  time_pp=%i,  time_bp=%i\n",
 1635|  2.11M|              coding_type,  dec->packed_mode, dec->time, dec->time_pp, dec->time_bp);
 1636|       |
 1637|  2.11M|  if (coding_type == -1) { /* nothing */
  ------------------
  |  Branch (1637:7): [True: 18.8k, False: 2.09M]
  ------------------
 1638|  18.8k|    if (success) goto done;
  ------------------
  |  Branch (1638:9): [True: 0, False: 18.8k]
  ------------------
 1639|  18.8k|    if (stats) stats->type = XVID_TYPE_NOTHING;
  ------------------
  |  |  166|  18.8k|#define XVID_TYPE_NOTHING  0 /* decoder only (encoder stats): nothing was decoded/encoded */
  ------------------
  |  Branch (1639:9): [True: 18.8k, False: 0]
  ------------------
 1640|  18.8k|    emms();
 1641|  18.8k|    return BitstreamPos(&bs)/8;
 1642|  18.8k|  }
 1643|       |
 1644|  2.09M|  if (coding_type == -2 || coding_type == -3) { /* vol and/or resize */
  ------------------
  |  Branch (1644:7): [True: 21.9k, False: 2.07M]
  |  Branch (1644:28): [True: 13.1k, False: 2.05M]
  ------------------
 1645|       |
 1646|  35.0k|    if (coding_type == -3)
  ------------------
  |  Branch (1646:9): [True: 13.1k, False: 21.9k]
  ------------------
 1647|  13.1k|      if (decoder_resize(dec)) return XVID_ERR_MEMORY;
  ------------------
  |  |   96|      0|#define XVID_ERR_MEMORY		-2		/* memory allocation error */
  ------------------
  |  Branch (1647:11): [True: 0, False: 13.1k]
  ------------------
 1648|       |
 1649|  35.0k|    if(stats) {
  ------------------
  |  Branch (1649:8): [True: 35.0k, False: 0]
  ------------------
 1650|  35.0k|      stats->type = XVID_TYPE_VOL;
  ------------------
  |  |  165|  35.0k|#define XVID_TYPE_VOL     -1 /* decoder only: vol was decoded */
  ------------------
 1651|  35.0k|      stats->data.vol.general = 0;
 1652|  35.0k|	  stats->data.vop.general = 0;
 1653|  35.0k|	  if (dec->interlacing) {
  ------------------
  |  Branch (1653:8): [True: 10.4k, False: 24.6k]
  ------------------
 1654|  10.4k|		stats->data.vol.general |= XVID_VOL_INTERLACING;
  ------------------
  |  |  661|  10.4k|#define XVID_VOL_INTERLACING    (1<<5) /* enable interlaced encoding */
  ------------------
 1655|  10.4k|		if (dec->top_field_first) {
  ------------------
  |  Branch (1655:7): [True: 924, False: 9.49k]
  ------------------
 1656|    924|	      stats->data.vop.general |= XVID_VOP_TOPFIELDFIRST;
  ------------------
  |  |  686|    924|#define XVID_VOP_TOPFIELDFIRST        (1<< 9) /* set top-field-first flag  */
  ------------------
 1657|    924|		}
 1658|  10.4k|	  }
 1659|  35.0k|      stats->data.vol.width = dec->width;
 1660|  35.0k|      stats->data.vol.height = dec->height;
 1661|  35.0k|      stats->data.vol.par = dec->aspect_ratio;
 1662|  35.0k|      stats->data.vol.par_width = dec->par_width;
 1663|  35.0k|      stats->data.vol.par_height = dec->par_height;
 1664|  35.0k|      emms();
 1665|  35.0k|      return BitstreamPos(&bs)/8; /* number of bytes consumed */
 1666|  35.0k|    }
 1667|      0|    goto repeat;
 1668|  35.0k|  }
 1669|       |
 1670|  2.05M|  if((dec->frames == 0 && coding_type != I_VOP) || (!dec->width || !dec->height)) {
  ------------------
  |  |  104|  2.04M|#define I_VOP	0
  ------------------
  |  Branch (1670:7): [True: 2.04M, False: 10.9k]
  |  Branch (1670:27): [True: 2.02M, False: 26.5k]
  |  Branch (1670:53): [True: 19.4k, False: 18.0k]
  |  Branch (1670:68): [True: 0, False: 18.0k]
  ------------------
 1671|       |    /* 1st frame is not an i-vop */
 1672|  2.04M|    goto repeat;
 1673|  2.04M|  }
 1674|       |
 1675|  18.0k|  dec->p_bmv.x = dec->p_bmv.y = dec->p_fmv.x = dec->p_fmv.y = 0;  /* init pred vector to 0 */
 1676|       |
 1677|       |  /* packed_mode: special-N_VOP treament */
 1678|  18.0k|  if (dec->packed_mode && coding_type == N_VOP) {
  ------------------
  |  |  108|    729|#define N_VOP	4
  ------------------
  |  Branch (1678:7): [True: 729, False: 17.3k]
  |  Branch (1678:27): [True: 375, False: 354]
  ------------------
 1679|    375|    if (dec->low_delay_default && dec->frames > 0) {
  ------------------
  |  Branch (1679:9): [True: 0, False: 375]
  |  Branch (1679:35): [True: 0, False: 0]
  ------------------
 1680|      0|      decoder_output(dec, &dec->refn[0], dec->last_mbs, frame, stats, dec->last_coding_type, quant);
 1681|      0|      output = 1;
 1682|      0|    }
 1683|       |    /* ignore otherwise */
 1684|  17.7k|  } else if (coding_type != B_VOP) {
  ------------------
  |  |  106|  17.7k|#define B_VOP	2
  ------------------
  |  Branch (1684:14): [True: 10.6k, False: 7.10k]
  ------------------
 1685|  10.6k|    switch(coding_type) {
  ------------------
  |  Branch (1685:12): [True: 0, False: 10.6k]
  ------------------
 1686|  7.12k|    case I_VOP :
  ------------------
  |  |  104|  7.12k|#define I_VOP	0
  ------------------
  |  Branch (1686:5): [True: 7.12k, False: 3.48k]
  ------------------
 1687|  7.12k|      decoder_iframe(dec, &bs, quant, intra_dc_threshold);
 1688|  7.12k|      break;
 1689|    971|    case P_VOP :
  ------------------
  |  |  105|    971|#define P_VOP	1
  ------------------
  |  Branch (1689:5): [True: 971, False: 9.64k]
  ------------------
 1690|    971|      decoder_pframe(dec, &bs, rounding, quant,
 1691|    971|                        fcode_forward, intra_dc_threshold, NULL);
 1692|    971|      break;
 1693|  2.48k|    case S_VOP :
  ------------------
  |  |  107|  2.48k|#define S_VOP	3
  ------------------
  |  Branch (1693:5): [True: 2.48k, False: 8.12k]
  ------------------
 1694|  2.48k|      decoder_pframe(dec, &bs, rounding, quant,
 1695|  2.48k|                        fcode_forward, intra_dc_threshold, &gmc_warp);
 1696|  2.48k|      break;
 1697|     29|    case N_VOP :
  ------------------
  |  |  108|     29|#define N_VOP	4
  ------------------
  |  Branch (1697:5): [True: 29, False: 10.5k]
  ------------------
 1698|       |      /* XXX: not_coded vops are not used for forward prediction */
 1699|       |      /* we should not swap(last_mbs,mbs) */
 1700|     29|      image_copy(&dec->cur, &dec->refn[0], dec->edged_width, dec->height);
 1701|     29|      SWAP(MACROBLOCK *, dec->mbs, dec->last_mbs); /* it will be swapped back */
  ------------------
  |  |  264|     29|#define SWAP(_T_,A,B)    { _T_ tmp = A; A = B; B = tmp; }
  ------------------
 1702|     29|      break;
 1703|  10.6k|    }
 1704|       |
 1705|       |    /* note: for packed_mode, output is performed when the special-N_VOP is decoded */
 1706|  10.6k|    if (!(dec->low_delay_default && dec->packed_mode)) {
  ------------------
  |  Branch (1706:11): [True: 0, False: 10.6k]
  |  Branch (1706:37): [True: 0, False: 0]
  ------------------
 1707|  10.6k|      if(dec->low_delay) {
  ------------------
  |  Branch (1707:10): [True: 237, False: 10.3k]
  ------------------
 1708|    237|        decoder_output(dec, &dec->cur, dec->mbs, frame, stats, coding_type, quant);
 1709|    237|        output = 1;
 1710|  10.3k|      } else if (dec->frames > 0) { /* is the reference frame valid? */
  ------------------
  |  Branch (1710:18): [True: 3.50k, False: 6.86k]
  ------------------
 1711|       |        /* output the reference frame */
 1712|  3.50k|        decoder_output(dec, &dec->refn[0], dec->last_mbs, frame, stats, dec->last_coding_type, quant);
 1713|  3.50k|        output = 1;
 1714|  3.50k|      }
 1715|  10.6k|    }
 1716|       |    
 1717|  10.6k|    image_swap(&dec->refn[0], &dec->refn[1]);
 1718|  10.6k|    dec->is_edged[1] = dec->is_edged[0];
 1719|  10.6k|    image_swap(&dec->cur, &dec->refn[0]);
 1720|  10.6k|    dec->is_edged[0] = 0;
 1721|  10.6k|    SWAP(MACROBLOCK *, dec->mbs, dec->last_mbs);
  ------------------
  |  |  264|  10.6k|#define SWAP(_T_,A,B)    { _T_ tmp = A; A = B; B = tmp; }
  ------------------
 1722|  10.6k|    dec->last_coding_type = coding_type;
 1723|       |
 1724|  10.6k|    dec->frames++;
 1725|  10.6k|    seen_something = 1;
 1726|       |
 1727|  10.6k|  } else {  /* B_VOP */
 1728|       |
 1729|  7.10k|    if (dec->low_delay) {
  ------------------
  |  Branch (1729:9): [True: 123, False: 6.98k]
  ------------------
 1730|    123|      DPRINTF(XVID_DEBUG_ERROR, "warning: bvop found in low_delay==1 stream\n");
  ------------------
  |  |  196|    123|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
 1731|    123|      dec->low_delay = 0;
 1732|    123|    }
 1733|       |
 1734|  7.10k|    if (dec->frames < 2) {
  ------------------
  |  Branch (1734:9): [True: 2.54k, False: 4.56k]
  ------------------
 1735|       |      /* attemping to decode a bvop without atleast 2 reference frames */
 1736|  2.54k|      image_printf(&dec->cur, dec->edged_width, dec->height, 16, 16,
 1737|  2.54k|            "broken b-frame, mising ref frames");
 1738|  2.54k|      if (stats) stats->type = XVID_TYPE_NOTHING;
  ------------------
  |  |  166|  2.54k|#define XVID_TYPE_NOTHING  0 /* decoder only (encoder stats): nothing was decoded/encoded */
  ------------------
  |  Branch (1738:11): [True: 2.54k, False: 0]
  ------------------
 1739|  4.56k|    } else if (dec->time_pp <= dec->time_bp) {
  ------------------
  |  Branch (1739:16): [True: 2.14k, False: 2.41k]
  ------------------
 1740|       |      /* this occurs when dx50_bvop_compatibility==0 sequences are
 1741|       |      decoded in vfw. */
 1742|  2.14k|      image_printf(&dec->cur, dec->edged_width, dec->height, 16, 16,
 1743|  2.14k|            "broken b-frame, tpp=%i tbp=%i", dec->time_pp, dec->time_bp);
 1744|  2.14k|      if (stats) stats->type = XVID_TYPE_NOTHING;
  ------------------
  |  |  166|  2.14k|#define XVID_TYPE_NOTHING  0 /* decoder only (encoder stats): nothing was decoded/encoded */
  ------------------
  |  Branch (1744:11): [True: 2.14k, False: 0]
  ------------------
 1745|  2.41k|    } else {
 1746|  2.41k|      decoder_bframe(dec, &bs, quant, fcode_forward, fcode_backward);
 1747|  2.41k|      decoder_output(dec, &dec->cur, dec->mbs, frame, stats, coding_type, quant);
 1748|  2.41k|    }
 1749|       |
 1750|  7.10k|    output = 1;
 1751|  7.10k|    dec->frames++;
 1752|  7.10k|  }
 1753|       |
 1754|       |#if 0 /* Avoids to read to much data because of 32bit reads in our BS functions */
 1755|       |   BitstreamByteAlign(&bs);
 1756|       |#endif
 1757|       |
 1758|       |  /* low_delay_default mode: repeat in packed_mode */
 1759|  18.0k|  if (dec->low_delay_default && dec->packed_mode && output == 0 && success == 0) {
  ------------------
  |  Branch (1759:7): [True: 0, False: 18.0k]
  |  Branch (1759:33): [True: 0, False: 0]
  |  Branch (1759:53): [True: 0, False: 0]
  |  Branch (1759:68): [True: 0, False: 0]
  ------------------
 1760|      0|    success = 1;
 1761|      0|    goto repeat;
 1762|      0|  }
 1763|       |
 1764|  18.0k|done :
 1765|       |
 1766|       |  /* if we reach here without outputing anything _and_
 1767|       |     the calling application has specified low_delay_default,
 1768|       |     we *must* output something.
 1769|       |     this always occurs on the first call to decode() call
 1770|       |     when bframes are present in the bitstream. it may also
 1771|       |     occur if no vops  were seen in the bitstream
 1772|       |
 1773|       |     if packed_mode is enabled, then we output the recently
 1774|       |     decoded frame (the very first ivop). otherwise we have
 1775|       |     nothing to display, and therefore output a black screen.
 1776|       |  */
 1777|  18.0k|  if (dec->low_delay_default && output == 0) {
  ------------------
  |  Branch (1777:7): [True: 0, False: 18.0k]
  |  Branch (1777:33): [True: 0, False: 0]
  ------------------
 1778|      0|    if (dec->packed_mode && seen_something) {
  ------------------
  |  Branch (1778:9): [True: 0, False: 0]
  |  Branch (1778:29): [True: 0, False: 0]
  ------------------
 1779|      0|      decoder_output(dec, &dec->refn[0], dec->last_mbs, frame, stats, dec->last_coding_type, quant);
 1780|      0|    } else {
 1781|      0|      image_clear(&dec->cur, dec->width, dec->height, dec->edged_width, 0, 128, 128);
 1782|      0|      decoder_output(dec, &dec->cur, NULL, frame, stats, P_VOP, quant);
  ------------------
  |  |  105|      0|#define P_VOP	1
  ------------------
 1783|      0|      if (stats) stats->type = XVID_TYPE_NOTHING;
  ------------------
  |  |  166|      0|#define XVID_TYPE_NOTHING  0 /* decoder only (encoder stats): nothing was decoded/encoded */
  ------------------
  |  Branch (1783:11): [True: 0, False: 0]
  ------------------
 1784|      0|    }
 1785|      0|  }
 1786|       |
 1787|  18.0k|  emms();
 1788|  18.0k|  stop_global_timer();
 1789|       |
 1790|  18.0k|  return (BitstreamPos(&bs)+7)/8; /* number of bytes consumed */
 1791|  18.0k|}
decoder.c:decoder_resize:
   70|  23.6k|{
   71|       |	/* free existing */
   72|  23.6k|	image_destroy(&dec->cur, dec->edged_width, dec->edged_height);
   73|  23.6k|	image_destroy(&dec->refn[0], dec->edged_width, dec->edged_height);
   74|  23.6k|	image_destroy(&dec->refn[1], dec->edged_width, dec->edged_height);
   75|  23.6k|	image_destroy(&dec->tmp, dec->edged_width, dec->edged_height);
   76|  23.6k|	image_destroy(&dec->qtmp, dec->edged_width, dec->edged_height);
   77|       |
   78|  23.6k|	image_destroy(&dec->gmc, dec->edged_width, dec->edged_height);
   79|       |
   80|  23.6k|  image_null(&dec->cur);
   81|  23.6k|  image_null(&dec->refn[0]);
   82|  23.6k|  image_null(&dec->refn[1]);
   83|  23.6k|  image_null(&dec->tmp);
   84|  23.6k|  image_null(&dec->qtmp);
   85|  23.6k|  image_null(&dec->gmc);
   86|       |
   87|       |
   88|  23.6k|  xvid_free(dec->last_mbs);
   89|  23.6k|  xvid_free(dec->mbs);
   90|  23.6k|  xvid_free(dec->qscale);
   91|  23.6k|  dec->last_mbs = NULL;
   92|  23.6k|  dec->mbs = NULL;
   93|  23.6k|  dec->qscale = NULL;
   94|       |
   95|       |	/* realloc */
   96|  23.6k|	dec->mb_width = (dec->width + 15) / 16;
   97|  23.6k|	dec->mb_height = (dec->height + 15) / 16;
   98|       |
   99|  23.6k|	dec->edged_width = 16 * dec->mb_width + 2 * EDGE_SIZE;
  ------------------
  |  |   36|  23.6k|#define EDGE_SIZE  64
  ------------------
  100|  23.6k|	dec->edged_height = 16 * dec->mb_height + 2 * EDGE_SIZE;
  ------------------
  |  |   36|  23.6k|#define EDGE_SIZE  64
  ------------------
  101|       |
  102|  23.6k|	if (   image_create(&dec->cur, dec->edged_width, dec->edged_height) 
  ------------------
  |  Branch (102:9): [True: 0, False: 23.6k]
  ------------------
  103|  23.6k|	    || image_create(&dec->refn[0], dec->edged_width, dec->edged_height)
  ------------------
  |  Branch (103:9): [True: 0, False: 23.6k]
  ------------------
  104|  23.6k|	    || image_create(&dec->refn[1], dec->edged_width, dec->edged_height) 	/* Support B-frame to reference last 2 frame */
  ------------------
  |  Branch (104:9): [True: 0, False: 23.6k]
  ------------------
  105|  23.6k|	    || image_create(&dec->tmp, dec->edged_width, dec->edged_height)
  ------------------
  |  Branch (105:9): [True: 0, False: 23.6k]
  ------------------
  106|  23.6k|	    || image_create(&dec->qtmp, dec->edged_width, dec->edged_height)
  ------------------
  |  Branch (106:9): [True: 0, False: 23.6k]
  ------------------
  107|  23.6k|      || image_create(&dec->gmc, dec->edged_width, dec->edged_height) )
  ------------------
  |  Branch (107:10): [True: 0, False: 23.6k]
  ------------------
  108|      0|    goto memory_error;
  109|       |
  110|  23.6k|	dec->mbs =
  111|  23.6k|		xvid_malloc(sizeof(MACROBLOCK) * dec->mb_width * dec->mb_height,
  112|  23.6k|					CACHE_LINE);
  ------------------
  |  |  131|  23.6k|#    define CACHE_LINE  64
  ------------------
  113|  23.6k|	if (dec->mbs == NULL)
  ------------------
  |  Branch (113:6): [True: 0, False: 23.6k]
  ------------------
  114|      0|	  goto memory_error;
  115|  23.6k|	memset(dec->mbs, 0, sizeof(MACROBLOCK) * dec->mb_width * dec->mb_height);
  116|       |
  117|       |	/* For skip MB flag */
  118|  23.6k|	dec->last_mbs =
  119|  23.6k|		xvid_malloc(sizeof(MACROBLOCK) * dec->mb_width * dec->mb_height,
  120|  23.6k|					CACHE_LINE);
  ------------------
  |  |  131|  23.6k|#    define CACHE_LINE  64
  ------------------
  121|  23.6k|	if (dec->last_mbs == NULL)
  ------------------
  |  Branch (121:6): [True: 0, False: 23.6k]
  ------------------
  122|      0|	  goto memory_error;
  123|  23.6k|	memset(dec->last_mbs, 0, sizeof(MACROBLOCK) * dec->mb_width * dec->mb_height);
  124|       |
  125|       |	/* nothing happens if that fails */
  126|  23.6k|	dec->qscale =
  127|  23.6k|		xvid_malloc(sizeof(int) * dec->mb_width * dec->mb_height, CACHE_LINE);
  ------------------
  |  |  131|  23.6k|#    define CACHE_LINE  64
  ------------------
  128|       |	
  129|  23.6k|	if (dec->qscale)
  ------------------
  |  Branch (129:6): [True: 23.6k, False: 0]
  ------------------
  130|  23.6k|		memset(dec->qscale, 0, sizeof(int) * dec->mb_width * dec->mb_height);
  131|       |
  132|  23.6k|	return 0;
  133|       |
  134|      0|memory_error:
  135|       |        /* Most structures were deallocated / nullifieded, so it should be safe */
  136|       |        /* decoder_destroy(dec) minus the write_timer */
  137|      0|  xvid_free(dec->mbs);
  138|      0|  image_destroy(&dec->cur, dec->edged_width, dec->edged_height);
  139|      0|  image_destroy(&dec->refn[0], dec->edged_width, dec->edged_height);
  140|      0|  image_destroy(&dec->refn[1], dec->edged_width, dec->edged_height);
  141|      0|  image_destroy(&dec->tmp, dec->edged_width, dec->edged_height);
  142|      0|  image_destroy(&dec->qtmp, dec->edged_width, dec->edged_height);
  143|       |
  144|      0|  xvid_free(dec);
  145|      0|  return XVID_ERR_MEMORY;
  ------------------
  |  |   96|      0|#define XVID_ERR_MEMORY		-2		/* memory allocation error */
  ------------------
  146|  23.6k|}
decoder.c:decoder_output:
 1524|  6.15k|{
 1525|  6.15k|  const int brightness = XVID_VERSION_MINOR(frame->version) >= 1 ? frame->brightness : 0;
  ------------------
  |  |   64|  6.15k|#define XVID_VERSION_MINOR(a)    ((char)(((a)>> 8) & 0xff))
  ------------------
  |  Branch (1525:26): [True: 6.15k, False: 0]
  ------------------
 1526|       |
 1527|  6.15k|  if (dec->cartoon_mode)
  ------------------
  |  Branch (1527:7): [True: 15, False: 6.14k]
  ------------------
 1528|     15|    frame->general &= ~XVID_FILMEFFECT;
  ------------------
  |  |  272|     15|#define XVID_FILMEFFECT    (1<<4) /* adds film grain */
  ------------------
 1529|       |
 1530|  6.15k|  if ((frame->general & (XVID_DEBLOCKY|XVID_DEBLOCKUV|XVID_FILMEFFECT) || brightness!=0)
  ------------------
  |  |  270|  6.15k|#define XVID_DEBLOCKY      (1<<2) /* perform luma deblocking */
  ------------------
                if ((frame->general & (XVID_DEBLOCKY|XVID_DEBLOCKUV|XVID_FILMEFFECT) || brightness!=0)
  ------------------
  |  |  271|  6.15k|#define XVID_DEBLOCKUV     (1<<3) /* perform chroma deblocking */
  ------------------
                if ((frame->general & (XVID_DEBLOCKY|XVID_DEBLOCKUV|XVID_FILMEFFECT) || brightness!=0)
  ------------------
  |  |  272|  6.15k|#define XVID_FILMEFFECT    (1<<4) /* adds film grain */
  ------------------
  |  Branch (1530:8): [True: 0, False: 6.15k]
  |  Branch (1530:75): [True: 0, False: 6.15k]
  ------------------
 1531|  6.15k|    && mbs != NULL) /* post process */
  ------------------
  |  Branch (1531:8): [True: 0, False: 0]
  ------------------
 1532|      0|  {
 1533|       |    /* note: image is stored to tmp */
 1534|      0|    image_copy(&dec->tmp, img, dec->edged_width, dec->height);
 1535|      0|    image_postproc(&dec->postproc, &dec->tmp, dec->edged_width,
 1536|      0|             mbs, dec->mb_width, dec->mb_height, dec->mb_width,
 1537|      0|             frame->general, brightness, dec->frames, (coding_type == B_VOP), dec->num_threads);
  ------------------
  |  |  106|      0|#define B_VOP	2
  ------------------
 1538|      0|    img = &dec->tmp;
 1539|      0|  }
 1540|       |
 1541|  6.15k|  if ((frame->output.csp == XVID_CSP_INTERNAL) || 
  ------------------
  |  |  125|  6.15k|#define XVID_CSP_INTERNAL (1<<13) /* decoder only: 4:2:0 planar, returns ptrs to internal buffers */
  ------------------
  |  Branch (1541:7): [True: 0, False: 6.15k]
  ------------------
 1542|  6.15k|      ((frame->output.plane[0] != NULL) && (frame->output.stride[0] >= dec->width))) {
  ------------------
  |  Branch (1542:8): [True: 6.15k, False: 0]
  |  Branch (1542:44): [True: 6.15k, False: 0]
  ------------------
 1543|  6.15k|    image_output(img, dec->width, dec->height,
 1544|  6.15k|           dec->edged_width, (uint8_t**)frame->output.plane, frame->output.stride,
 1545|  6.15k|           frame->output.csp, dec->interlacing);
 1546|  6.15k|  }
 1547|       |
 1548|  6.15k|  if (stats) {
  ------------------
  |  Branch (1548:7): [True: 6.15k, False: 0]
  ------------------
 1549|  6.15k|    stats->type = coding2type(coding_type);
 1550|  6.15k|    stats->data.vop.time_base = (int)dec->time_base;
 1551|  6.15k|    stats->data.vop.time_increment = 0; /* XXX: todo */
 1552|  6.15k|    stats->data.vop.qscale_stride = dec->mb_width;
 1553|  6.15k|    stats->data.vop.qscale = dec->qscale;
 1554|  6.15k|    if (stats->data.vop.qscale != NULL && mbs != NULL) {
  ------------------
  |  Branch (1554:9): [True: 6.15k, False: 0]
  |  Branch (1554:43): [True: 6.15k, False: 0]
  ------------------
 1555|  6.15k|      unsigned int i;
 1556|  37.9M|      for (i = 0; i < dec->mb_width*dec->mb_height; i++)
  ------------------
  |  Branch (1556:19): [True: 37.9M, False: 6.15k]
  ------------------
 1557|  37.9M|        stats->data.vop.qscale[i] = mbs[i].quant;
 1558|  6.15k|    } else
 1559|      0|      stats->data.vop.qscale = NULL;
 1560|  6.15k|  }
 1561|  6.15k|}
decoder.c:decoder_iframe:
  735|  7.12k|{
  736|  7.12k|  uint32_t bound;
  737|  7.12k|  uint32_t x, y;
  738|  7.12k|  const uint32_t mb_width = dec->mb_width;
  739|  7.12k|  const uint32_t mb_height = dec->mb_height;
  740|       |
  741|  7.12k|  bound = 0;
  742|       |
  743|   171k|  for (y = 0; y < mb_height; y++) {
  ------------------
  |  Branch (743:15): [True: 164k, False: 7.12k]
  ------------------
  744|  24.1M|    for (x = 0; x < mb_width; x++) {
  ------------------
  |  Branch (744:17): [True: 23.9M, False: 164k]
  ------------------
  745|  23.9M|      MACROBLOCK *mb;
  746|  23.9M|      uint32_t mcbpc;
  747|  23.9M|      uint32_t cbpc;
  748|  23.9M|      uint32_t acpred_flag;
  749|  23.9M|      uint32_t cbpy;
  750|  23.9M|      uint32_t cbp;
  751|       |
  752|  23.9M|      while (BitstreamShowBits(bs, 9) == 1)
  ------------------
  |  Branch (752:14): [True: 1.21k, False: 23.9M]
  ------------------
  753|  1.21k|        BitstreamSkip(bs, 9);
  754|       |
  755|  23.9M|      if (check_resync_marker(bs, 0))
  ------------------
  |  Branch (755:11): [True: 12.8k, False: 23.9M]
  ------------------
  756|  12.8k|      {
  757|  12.8k|        bound = read_video_packet_header(bs, dec, 0,
  758|  12.8k|              &quant, NULL, NULL, &intra_dc_threshold);
  759|  12.8k|        x = bound % mb_width;
  760|  12.8k|        y = MIN((bound / mb_width), (mb_height-1));
  ------------------
  |  |   34|  12.8k|#define MIN(X, Y) ((X)<(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (34:20): [True: 9.13k, False: 3.74k]
  |  |  ------------------
  ------------------
  761|  12.8k|      }
  762|  23.9M|      mb = &dec->mbs[y * dec->mb_width + x];
  763|       |
  764|  23.9M|      DPRINTF(XVID_DEBUG_MB, "macroblock (%i,%i) %08x\n", x, y, BitstreamShowBits(bs, 32));
  ------------------
  |  |  200|  23.9M|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
  765|       |
  766|  23.9M|      mcbpc = get_mcbpc_intra(bs);
  767|  23.9M|      mb->mode = mcbpc & 7;
  768|  23.9M|      cbpc = (mcbpc >> 4);
  769|       |
  770|  23.9M|      acpred_flag = BitstreamGetBit(bs);
  771|       |
  772|  23.9M|      cbpy = get_cbpy(bs, 1);
  773|  23.9M|      cbp = (cbpy << 2) | cbpc;
  774|       |
  775|  23.9M|      if (mb->mode == MODE_INTRA_Q) {
  ------------------
  |  |   38|  23.9M|#define MODE_INTRA_Q	4
  ------------------
  |  Branch (775:11): [True: 12.3k, False: 23.9M]
  ------------------
  776|  12.3k|        quant += dquant_table[BitstreamGetBits(bs, 2)];
  777|  12.3k|        if (quant > 31) {
  ------------------
  |  Branch (777:13): [True: 2.62k, False: 9.69k]
  ------------------
  778|  2.62k|          quant = 31;
  779|  9.69k|        } else if (quant < 1) {
  ------------------
  |  Branch (779:20): [True: 2.82k, False: 6.87k]
  ------------------
  780|  2.82k|          quant = 1;
  781|  2.82k|        }
  782|  12.3k|      }
  783|  23.9M|      mb->quant = quant;
  784|  23.9M|      mb->mvs[0].x = mb->mvs[0].y =
  785|  23.9M|      mb->mvs[1].x = mb->mvs[1].y =
  786|  23.9M|      mb->mvs[2].x = mb->mvs[2].y =
  787|  23.9M|      mb->mvs[3].x = mb->mvs[3].y =0;
  788|       |
  789|  23.9M|      if (dec->interlacing) {
  ------------------
  |  Branch (789:11): [True: 17.8M, False: 6.16M]
  ------------------
  790|  17.8M|        mb->field_dct = BitstreamGetBit(bs);
  791|  17.8M|        DPRINTF(XVID_DEBUG_MB,"deci: field_dct: %i\n", mb->field_dct);
  ------------------
  |  |  200|  17.8M|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
  792|  17.8M|      }
  793|       |
  794|  23.9M|      decoder_mbintra(dec, mb, x, y, acpred_flag, cbp, bs, quant,
  795|  23.9M|              intra_dc_threshold, bound);
  796|       |
  797|  23.9M|    }
  798|   164k|    if(dec->out_frm)
  ------------------
  |  Branch (798:8): [True: 0, False: 164k]
  ------------------
  799|      0|      output_slice(&dec->cur, dec->edged_width,dec->width,dec->out_frm,0,y,mb_width);
  800|   164k|  }
  801|       |
  802|  7.12k|}
decoder.c:decoder_mbintra:
  258|  40.2M|{
  259|       |
  260|  40.2M|  DECLARE_ALIGNED_MATRIX(block, 6, 64, int16_t, CACHE_LINE);
  ------------------
  |  |  287|  40.2M|	type name##_storage[(sizex)*(sizey)+(alignment)-1]; \
  |  |  288|  40.2M|type * name = (type *) (((ptr_t) name##_storage+(alignment - 1)) & ~((ptr_t)(alignment)-1))
  ------------------
  261|  40.2M|  DECLARE_ALIGNED_MATRIX(data, 6, 64, int16_t, CACHE_LINE);
  ------------------
  |  |  287|  40.2M|	type name##_storage[(sizex)*(sizey)+(alignment)-1]; \
  |  |  288|  40.2M|type * name = (type *) (((ptr_t) name##_storage+(alignment - 1)) & ~((ptr_t)(alignment)-1))
  ------------------
  262|       |
  263|  40.2M|  uint32_t stride = dec->edged_width;
  264|  40.2M|  uint32_t stride2 = stride / 2;
  265|  40.2M|  uint32_t next_block = stride * 8;
  266|  40.2M|  uint32_t i;
  267|  40.2M|  uint32_t iQuant = MAX(1, pMB->quant);
  ------------------
  |  |   35|  40.2M|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (35:20): [True: 998k, False: 39.2M]
  |  |  ------------------
  ------------------
  268|  40.2M|  uint8_t *pY_Cur, *pU_Cur, *pV_Cur;
  269|       |
  270|  40.2M|  pY_Cur = dec->cur.y + (y_pos << 4) * stride + (x_pos << 4);
  271|  40.2M|  pU_Cur = dec->cur.u + (y_pos << 3) * stride2 + (x_pos << 3);
  272|  40.2M|  pV_Cur = dec->cur.v + (y_pos << 3) * stride2 + (x_pos << 3);
  273|       |
  274|  40.2M|  memset(block, 0, 6 * 64 * sizeof(int16_t)); /* clear */
  275|       |
  276|   281M|  for (i = 0; i < 6; i++) {
  ------------------
  |  Branch (276:15): [True: 241M, False: 40.2M]
  ------------------
  277|   241M|    uint32_t iDcScaler = get_dc_scaler(iQuant, i < 4);
  278|   241M|    int16_t predictors[8];
  279|   241M|    int start_coeff;
  280|       |
  281|   241M|    start_timer();
  282|   241M|    predict_acdc(dec->mbs, x_pos, y_pos, dec->mb_width, i, &block[i * 64],
  283|   241M|           iQuant, iDcScaler, predictors, bound);
  284|   241M|    if (!acpred_flag) {
  ------------------
  |  Branch (284:9): [True: 240M, False: 438k]
  ------------------
  285|   240M|      pMB->acpred_directions[i] = 0;
  286|   240M|    }
  287|   241M|    stop_prediction_timer();
  288|       |
  289|   241M|    if (quant < intra_dc_threshold) {
  ------------------
  |  Branch (289:9): [True: 121M, False: 119M]
  ------------------
  290|   121M|      int dc_size;
  291|   121M|      int dc_dif;
  292|       |
  293|   121M|      dc_size = i < 4 ? get_dc_size_lum(bs) : get_dc_size_chrom(bs);
  ------------------
  |  Branch (293:17): [True: 81.0M, False: 40.5M]
  ------------------
  294|   121M|      dc_dif = dc_size ? get_dc_dif(bs, dc_size) : 0;
  ------------------
  |  Branch (294:16): [True: 40.7M, False: 80.8M]
  ------------------
  295|       |
  296|   121M|      if (dc_size > 8) {
  ------------------
  |  Branch (296:11): [True: 98.0k, False: 121M]
  ------------------
  297|  98.0k|        BitstreamSkip(bs, 1); /* marker */
  298|  98.0k|      }
  299|       |
  300|   121M|      block[i * 64 + 0] = dc_dif;
  301|   121M|      start_coeff = 1;
  302|       |
  303|   121M|      DPRINTF(XVID_DEBUG_COEFF,"block[0] %i\n", dc_dif);
  ------------------
  |  |  201|   121M|#define XVID_DEBUG_COEFF     (1<< 5)
  ------------------
  304|   121M|    } else {
  305|   119M|      start_coeff = 0;
  306|   119M|    }
  307|       |
  308|   241M|    start_timer();
  309|   241M|    if (cbp & (1 << (5 - i))) /* coded */
  ------------------
  |  Branch (309:9): [True: 241M, False: 339k]
  ------------------
  310|   241M|    {
  311|   241M|      int direction = dec->alternate_vertical_scan ?
  ------------------
  |  Branch (311:23): [True: 76.5M, False: 164M]
  ------------------
  312|   164M|        2 : pMB->acpred_directions[i];
  313|       |
  314|   241M|      get_intra_block(bs, &block[i * 64], direction, start_coeff);
  315|   241M|    }
  316|   241M|    stop_coding_timer();
  317|       |
  318|   241M|    start_timer();
  319|   241M|    add_acdc(pMB, i, &block[i * 64], iDcScaler, predictors, dec->bs_version);
  320|   241M|    stop_prediction_timer();
  321|       |
  322|   241M|    start_timer();
  323|   241M|    if (dec->quant_type == 0) {
  ------------------
  |  Branch (323:9): [True: 140M, False: 101M]
  ------------------
  324|   140M|      dequant_h263_intra(&data[i * 64], &block[i * 64], iQuant, iDcScaler, dec->mpeg_quant_matrices);
  325|   140M|    } else {
  326|   101M|      dequant_mpeg_intra(&data[i * 64], &block[i * 64], iQuant, iDcScaler, dec->mpeg_quant_matrices);
  327|   101M|    }
  328|   241M|    stop_iquant_timer();
  329|       |
  330|   241M|    start_timer();
  331|   241M|    idct((short * const)&data[i * 64]);
  332|   241M|    stop_idct_timer();
  333|       |
  334|   241M|  }
  335|       |
  336|  40.2M|  if (dec->interlacing && pMB->field_dct) {
  ------------------
  |  Branch (336:7): [True: 30.0M, False: 10.1M]
  |  Branch (336:27): [True: 87.1k, False: 29.9M]
  ------------------
  337|  87.1k|    next_block = stride;
  338|  87.1k|    stride *= 2;
  339|  87.1k|  }
  340|       |
  341|  40.2M|  start_timer();
  342|  40.2M|  transfer_16to8copy(pY_Cur, &data[0 * 64], stride);
  343|  40.2M|  transfer_16to8copy(pY_Cur + 8, &data[1 * 64], stride);
  344|  40.2M|  transfer_16to8copy(pY_Cur + next_block, &data[2 * 64], stride);
  345|  40.2M|  transfer_16to8copy(pY_Cur + 8 + next_block, &data[3 * 64], stride);
  346|  40.2M|  transfer_16to8copy(pU_Cur, &data[4 * 64], stride2);
  347|  40.2M|  transfer_16to8copy(pV_Cur, &data[5 * 64], stride2);
  348|  40.2M|  stop_transfer_timer();
  349|  40.2M|}
decoder.c:decoder_pframe:
  948|  3.46k|{
  949|  3.46k|  uint32_t x, y;
  950|  3.46k|  uint32_t bound;
  951|  3.46k|  int cp_mb, st_mb;
  952|  3.46k|  const uint32_t mb_width = dec->mb_width;
  953|  3.46k|  const uint32_t mb_height = dec->mb_height;
  954|       |
  955|  3.46k|  if (!dec->is_edged[0]) {
  ------------------
  |  Branch (955:7): [True: 3.46k, False: 0]
  ------------------
  956|  3.46k|    start_timer();
  957|  3.46k|    image_setedges(&dec->refn[0], dec->edged_width, dec->edged_height,
  958|  3.46k|            dec->width, dec->height, dec->bs_version);
  959|  3.46k|    dec->is_edged[0] = 1;
  960|  3.46k|    stop_edges_timer();
  961|  3.46k|  }
  962|       |
  963|  3.46k|  if (gmc_warp) {
  ------------------
  |  Branch (963:7): [True: 2.48k, False: 971]
  ------------------
  964|       |    /* accuracy: 0==1/2, 1=1/4, 2=1/8, 3=1/16 */
  965|  2.48k|    generate_GMCparameters( dec->sprite_warping_points,
  966|  2.48k|        dec->sprite_warping_accuracy, gmc_warp,
  967|  2.48k|        dec->width, dec->height, &dec->new_gmc_data);
  968|       |
  969|       |    /* image warping is done block-based in decoder_mbgmc(), now */
  970|  2.48k|  }
  971|       |
  972|  3.46k|  bound = 0;
  973|       |
  974|   445k|  for (y = 0; y < mb_height; y++) {
  ------------------
  |  Branch (974:15): [True: 442k, False: 3.46k]
  ------------------
  975|   442k|    cp_mb = st_mb = 0;
  976|  17.6M|    for (x = 0; x < mb_width; x++) {
  ------------------
  |  Branch (976:17): [True: 17.1M, False: 442k]
  ------------------
  977|  17.1M|      MACROBLOCK *mb;
  978|       |
  979|       |      /* skip stuffing */
  980|  17.2M|      while (BitstreamShowBits(bs, 10) == 1)
  ------------------
  |  Branch (980:14): [True: 43.8k, False: 17.1M]
  ------------------
  981|  43.8k|        BitstreamSkip(bs, 10);
  982|       |
  983|  17.1M|      if (check_resync_marker(bs, fcode - 1)) {
  ------------------
  |  Branch (983:11): [True: 14.5k, False: 17.1M]
  ------------------
  984|  14.5k|        bound = read_video_packet_header(bs, dec, fcode - 1,
  985|  14.5k|          &quant, &fcode, NULL, &intra_dc_threshold);
  986|  14.5k|        x = bound % mb_width;
  987|  14.5k|        y = MIN((bound / mb_width), (mb_height-1));
  ------------------
  |  |   34|  14.5k|#define MIN(X, Y) ((X)<(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (34:20): [True: 13.5k, False: 984]
  |  |  ------------------
  ------------------
  988|  14.5k|      }
  989|  17.1M|      mb = &dec->mbs[y * dec->mb_width + x];
  990|       |
  991|  17.1M|      DPRINTF(XVID_DEBUG_MB, "macroblock (%i,%i) %08x\n", x, y, BitstreamShowBits(bs, 32));
  ------------------
  |  |  200|  17.1M|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
  992|       |
  993|  17.1M|      if (!(BitstreamGetBit(bs))) { /* block _is_ coded */
  ------------------
  |  Branch (993:11): [True: 16.5M, False: 656k]
  ------------------
  994|  16.5M|        uint32_t mcbpc, cbpc, cbpy, cbp;
  995|  16.5M|        uint32_t intra, acpred_flag = 0;
  996|  16.5M|        int mcsel = 0;    /* mcsel: '0'=local motion, '1'=GMC */
  997|       |
  998|  16.5M|        cp_mb++;
  999|  16.5M|        mcbpc = get_mcbpc_inter(bs);
 1000|  16.5M|        mb->mode = mcbpc & 7;
 1001|  16.5M|        cbpc = (mcbpc >> 4);
 1002|       |
 1003|  16.5M|        DPRINTF(XVID_DEBUG_MB, "mode %i\n", mb->mode);
  ------------------
  |  |  200|  16.5M|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
 1004|  16.5M|        DPRINTF(XVID_DEBUG_MB, "cbpc %i\n", cbpc);
  ------------------
  |  |  200|  16.5M|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
 1005|       |
 1006|  16.5M|        intra = (mb->mode == MODE_INTRA || mb->mode == MODE_INTRA_Q);
  ------------------
  |  |   37|  33.0M|#define	MODE_INTRA		3
  ------------------
                      intra = (mb->mode == MODE_INTRA || mb->mode == MODE_INTRA_Q);
  ------------------
  |  |   38|  16.5M|#define MODE_INTRA_Q	4
  ------------------
  |  Branch (1006:18): [True: 26.5k, False: 16.5M]
  |  Branch (1006:44): [True: 19.6k, False: 16.4M]
  ------------------
 1007|       |
 1008|  16.5M|        if (gmc_warp && (mb->mode == MODE_INTER || mb->mode == MODE_INTER_Q))
  ------------------
  |  |   34|  26.5M|#define MODE_INTER		0
  ------------------
                      if (gmc_warp && (mb->mode == MODE_INTER || mb->mode == MODE_INTER_Q))
  ------------------
  |  |   35|  13.1M|#define MODE_INTER_Q	1
  ------------------
  |  Branch (1008:13): [True: 13.2M, False: 3.26M]
  |  Branch (1008:26): [True: 135k, False: 13.1M]
  |  Branch (1008:52): [True: 26.7k, False: 13.1M]
  ------------------
 1009|   162k|          mcsel = BitstreamGetBit(bs);
 1010|  16.3M|        else if (intra)
  ------------------
  |  Branch (1010:18): [True: 46.1k, False: 16.3M]
  ------------------
 1011|  46.1k|          acpred_flag = BitstreamGetBit(bs);
 1012|       |
 1013|  16.5M|        cbpy = get_cbpy(bs, intra);
 1014|  16.5M|        DPRINTF(XVID_DEBUG_MB, "cbpy %i mcsel %i \n", cbpy,mcsel);
  ------------------
  |  |  200|  16.5M|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
 1015|       |
 1016|  16.5M|        cbp = (cbpy << 2) | cbpc;
 1017|       |
 1018|  16.5M|        if (mb->mode == MODE_INTER_Q || mb->mode == MODE_INTRA_Q) {
  ------------------
  |  |   35|  33.0M|#define MODE_INTER_Q	1
  ------------------
                      if (mb->mode == MODE_INTER_Q || mb->mode == MODE_INTRA_Q) {
  ------------------
  |  |   38|  16.5M|#define MODE_INTRA_Q	4
  ------------------
  |  Branch (1018:13): [True: 35.7k, False: 16.5M]
  |  Branch (1018:41): [True: 19.6k, False: 16.4M]
  ------------------
 1019|  55.3k|          int dquant = dquant_table[BitstreamGetBits(bs, 2)];
 1020|  55.3k|          DPRINTF(XVID_DEBUG_MB, "dquant %i\n", dquant);
  ------------------
  |  |  200|  55.3k|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
 1021|  55.3k|          quant += dquant;
 1022|  55.3k|          if (quant > 31) {
  ------------------
  |  Branch (1022:15): [True: 7.26k, False: 48.0k]
  ------------------
 1023|  7.26k|            quant = 31;
 1024|  48.0k|          } else if (quant < 1) {
  ------------------
  |  Branch (1024:22): [True: 19.3k, False: 28.7k]
  ------------------
 1025|  19.3k|            quant = 1;
 1026|  19.3k|          }
 1027|  55.3k|          DPRINTF(XVID_DEBUG_MB, "quant %i\n", quant);
  ------------------
  |  |  200|  55.3k|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
 1028|  55.3k|        }
 1029|  16.5M|        mb->quant = quant;
 1030|       |
 1031|  16.5M|        mb->field_pred=0;
 1032|  16.5M|        if (dec->interlacing) {
  ------------------
  |  Branch (1032:13): [True: 12.3M, False: 4.14M]
  ------------------
 1033|  12.3M|          if (cbp || intra) {
  ------------------
  |  Branch (1033:15): [True: 12.3M, False: 52.9k]
  |  Branch (1033:22): [True: 2.03k, False: 50.9k]
  ------------------
 1034|  12.3M|            mb->field_dct = BitstreamGetBit(bs);
 1035|  12.3M|            DPRINTF(XVID_DEBUG_MB,"decp: field_dct: %i\n", mb->field_dct);
  ------------------
  |  |  200|  12.3M|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
 1036|  12.3M|          }
 1037|       |
 1038|  12.3M|          if ((mb->mode == MODE_INTER || mb->mode == MODE_INTER_Q) && !mcsel) {
  ------------------
  |  |   34|  24.7M|#define MODE_INTER		0
  ------------------
                        if ((mb->mode == MODE_INTER || mb->mode == MODE_INTER_Q) && !mcsel) {
  ------------------
  |  |   35|  12.2M|#define MODE_INTER_Q	1
  ------------------
  |  Branch (1038:16): [True: 117k, False: 12.2M]
  |  Branch (1038:42): [True: 24.7k, False: 12.2M]
  |  Branch (1038:71): [True: 86.8k, False: 55.4k]
  ------------------
 1039|  86.8k|            mb->field_pred = BitstreamGetBit(bs);
 1040|  86.8k|            DPRINTF(XVID_DEBUG_MB, "decp: field_pred: %i\n", mb->field_pred);
  ------------------
  |  |  200|  86.8k|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
 1041|       |
 1042|  86.8k|            if (mb->field_pred) {
  ------------------
  |  Branch (1042:17): [True: 29.4k, False: 57.3k]
  ------------------
 1043|  29.4k|              mb->field_for_top = BitstreamGetBit(bs);
 1044|  29.4k|              DPRINTF(XVID_DEBUG_MB,"decp: field_for_top: %i\n", mb->field_for_top);
  ------------------
  |  |  200|  29.4k|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
 1045|  29.4k|              mb->field_for_bot = BitstreamGetBit(bs);
 1046|  29.4k|              DPRINTF(XVID_DEBUG_MB,"decp: field_for_bot: %i\n", mb->field_for_bot);
  ------------------
  |  |  200|  29.4k|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
 1047|  29.4k|            }
 1048|  86.8k|          }
 1049|  12.3M|        }
 1050|       |
 1051|  16.5M|        if (mcsel) {
  ------------------
  |  Branch (1051:13): [True: 78.5k, False: 16.4M]
  ------------------
 1052|  78.5k|          decoder_mbgmc(dec, mb, x, y, fcode, cbp, bs, rounding);
 1053|  78.5k|          continue;
 1054|       |
 1055|  16.4M|        } else if (mb->mode == MODE_INTER || mb->mode == MODE_INTER_Q) {
  ------------------
  |  |   34|  32.9M|#define MODE_INTER		0
  ------------------
                      } else if (mb->mode == MODE_INTER || mb->mode == MODE_INTER_Q) {
  ------------------
  |  |   35|  16.3M|#define MODE_INTER_Q	1
  ------------------
  |  Branch (1055:20): [True: 132k, False: 16.3M]
  |  Branch (1055:46): [True: 25.7k, False: 16.3M]
  ------------------
 1056|       |
 1057|   157k|          if(dec->interlacing) {
  ------------------
  |  Branch (1057:14): [True: 86.8k, False: 70.9k]
  ------------------
 1058|       |            /* Get motion vectors interlaced, field_pred is handled there */
 1059|  86.8k|            get_motion_vector_interlaced(dec, bs, x, y, 0, mb, fcode, bound);
 1060|  86.8k|          } else {
 1061|  70.9k|            get_motion_vector(dec, bs, x, y, 0, &mb->mvs[0], fcode, bound);
 1062|  70.9k|            mb->mvs[1] = mb->mvs[2] = mb->mvs[3] = mb->mvs[0];
 1063|  70.9k|          }
 1064|  16.3M|        } else if (mb->mode == MODE_INTER4V ) {
  ------------------
  |  |   36|  16.3M|#define MODE_INTER4V	2
  ------------------
  |  Branch (1064:20): [True: 39.1k, False: 16.2M]
  ------------------
 1065|       |          /* interlaced missing here */
 1066|  39.1k|          get_motion_vector(dec, bs, x, y, 0, &mb->mvs[0], fcode, bound);
 1067|  39.1k|          get_motion_vector(dec, bs, x, y, 1, &mb->mvs[1], fcode, bound);
 1068|  39.1k|          get_motion_vector(dec, bs, x, y, 2, &mb->mvs[2], fcode, bound);
 1069|  39.1k|          get_motion_vector(dec, bs, x, y, 3, &mb->mvs[3], fcode, bound);
 1070|  16.2M|        } else { /* MODE_INTRA, MODE_INTRA_Q */
 1071|  16.2M|          mb->mvs[0].x = mb->mvs[1].x = mb->mvs[2].x = mb->mvs[3].x = 0;
 1072|  16.2M|          mb->mvs[0].y = mb->mvs[1].y = mb->mvs[2].y = mb->mvs[3].y = 0;
 1073|  16.2M|          decoder_mbintra(dec, mb, x, y, acpred_flag, cbp, bs, quant,
 1074|  16.2M|                  intra_dc_threshold, bound);
 1075|  16.2M|          continue;
 1076|  16.2M|        }
 1077|       |
 1078|       |        /* See how to decode */
 1079|   196k|        if(!mb->field_pred)
  ------------------
  |  Branch (1079:12): [True: 167k, False: 29.4k]
  ------------------
 1080|   167k|         decoder_mbinter(dec, mb, x, y, cbp, bs, rounding, 0, 0);
 1081|  29.4k|        else 
 1082|  29.4k|         decoder_mbinter_field(dec, mb, x, y, cbp, bs, rounding, 0, 0);
 1083|       |
 1084|   656k|      } else if (gmc_warp) {  /* a not coded S(GMC)-VOP macroblock */
  ------------------
  |  Branch (1084:18): [True: 445k, False: 210k]
  ------------------
 1085|   445k|        mb->mode = MODE_NOT_CODED_GMC;
  ------------------
  |  |   40|   445k|#define MODE_NOT_CODED_GMC	17
  ------------------
 1086|   445k|        mb->quant = quant;
 1087|   445k|        decoder_mbgmc(dec, mb, x, y, fcode, 0x00, bs, rounding);
 1088|       |
 1089|   445k|        if(dec->out_frm && cp_mb > 0) {
  ------------------
  |  Branch (1089:12): [True: 0, False: 445k]
  |  Branch (1089:28): [True: 0, False: 0]
  ------------------
 1090|      0|          output_slice(&dec->cur, dec->edged_width,dec->width,dec->out_frm,st_mb,y,cp_mb);
 1091|      0|          cp_mb = 0;
 1092|      0|        }
 1093|   445k|        st_mb = x+1;
 1094|   445k|      } else { /* not coded P_VOP macroblock */
 1095|   210k|        mb->mode = MODE_NOT_CODED;
  ------------------
  |  |   39|   210k|#define MODE_NOT_CODED	16
  ------------------
 1096|   210k|        mb->quant = quant;
 1097|       |
 1098|   210k|        mb->mvs[0].x = mb->mvs[1].x = mb->mvs[2].x = mb->mvs[3].x = 0;
 1099|   210k|        mb->mvs[0].y = mb->mvs[1].y = mb->mvs[2].y = mb->mvs[3].y = 0;
 1100|   210k|        mb->field_pred=0; /* (!) */
 1101|       |
 1102|   210k|        decoder_mbinter(dec, mb, x, y, 0, bs, 
 1103|   210k|                                rounding, 0, 0);
 1104|       |
 1105|   210k|        if(dec->out_frm && cp_mb > 0) {
  ------------------
  |  Branch (1105:12): [True: 0, False: 210k]
  |  Branch (1105:28): [True: 0, False: 0]
  ------------------
 1106|      0|          output_slice(&dec->cur, dec->edged_width,dec->width,dec->out_frm,st_mb,y,cp_mb);
 1107|      0|          cp_mb = 0;
 1108|      0|        }
 1109|   210k|        st_mb = x+1;
 1110|   210k|      }
 1111|  17.1M|    }
 1112|       |
 1113|   442k|    if(dec->out_frm && cp_mb > 0)
  ------------------
  |  Branch (1113:8): [True: 0, False: 442k]
  |  Branch (1113:24): [True: 0, False: 0]
  ------------------
 1114|      0|      output_slice(&dec->cur, dec->edged_width,dec->width,dec->out_frm,st_mb,y,cp_mb);
 1115|   442k|  }
 1116|  3.46k|}
decoder.c:decoder_mbgmc:
  690|   524k|{
  691|   524k|  const uint32_t stride = dec->edged_width;
  692|   524k|  const uint32_t stride2 = stride / 2;
  693|       |
  694|   524k|  uint8_t *const pY_Cur=dec->cur.y + (y_pos << 4) * stride + (x_pos << 4);
  695|   524k|  uint8_t *const pU_Cur=dec->cur.u + (y_pos << 3) * stride2 + (x_pos << 3);
  696|   524k|  uint8_t *const pV_Cur=dec->cur.v + (y_pos << 3) * stride2 + (x_pos << 3);
  697|       |
  698|   524k|  NEW_GMC_DATA * gmc_data = &dec->new_gmc_data;
  699|       |
  700|   524k|  pMB->mvs[0] = pMB->mvs[1] = pMB->mvs[2] = pMB->mvs[3] = pMB->amv;
  701|       |
  702|   524k|  start_timer();
  703|       |
  704|       |/* this is where the calculations are done */
  705|       |
  706|   524k|  gmc_data->predict_16x16(gmc_data,
  707|   524k|      dec->cur.y + y_pos*16*stride + x_pos*16, dec->refn[0].y,
  708|   524k|      stride, stride, x_pos, y_pos, rounding);
  709|       |
  710|   524k|  gmc_data->predict_8x8(gmc_data,
  711|   524k|      dec->cur.u + y_pos*8*stride2 + x_pos*8, dec->refn[0].u,
  712|   524k|      dec->cur.v + y_pos*8*stride2 + x_pos*8, dec->refn[0].v,
  713|   524k|      stride2, stride2, x_pos, y_pos, rounding);
  714|       |
  715|   524k|  gmc_data->get_average_mv(gmc_data, &pMB->amv, x_pos, y_pos, dec->quarterpel);
  716|       |
  717|   524k|  pMB->amv.x = gmc_sanitize(pMB->amv.x, dec->quarterpel, fcode);
  718|   524k|  pMB->amv.y = gmc_sanitize(pMB->amv.y, dec->quarterpel, fcode);
  719|       |
  720|   524k|  pMB->mvs[0] = pMB->mvs[1] = pMB->mvs[2] = pMB->mvs[3] = pMB->amv;
  721|       |
  722|   524k|  stop_transfer_timer();
  723|       |
  724|   524k|  if (cbp)
  ------------------
  |  Branch (724:7): [True: 58.3k, False: 466k]
  ------------------
  725|  58.3k|    decoder_mb_decode(dec, cbp, bs, pY_Cur, pU_Cur, pV_Cur, pMB);
  726|       |
  727|   524k|}
decoder.c:decoder_mb_decode:
  359|   317k|{
  360|   317k|  DECLARE_ALIGNED_MATRIX(data, 1, 64, int16_t, CACHE_LINE);
  ------------------
  |  |  287|   317k|	type name##_storage[(sizex)*(sizey)+(alignment)-1]; \
  |  |  288|   317k|type * name = (type *) (((ptr_t) name##_storage+(alignment - 1)) & ~((ptr_t)(alignment)-1))
  ------------------
  361|       |
  362|   317k|  int stride = dec->edged_width;
  363|   317k|  int i;
  364|   317k|  const uint32_t iQuant = MAX(1, pMB->quant);
  ------------------
  |  |   35|   317k|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (35:20): [True: 1.43k, False: 316k]
  |  |  ------------------
  ------------------
  365|   317k|  const int direction = dec->alternate_vertical_scan ? 2 : 0;
  ------------------
  |  Branch (365:25): [True: 24.8k, False: 292k]
  ------------------
  366|   317k|  typedef void (*get_inter_block_function_t)(
  367|   317k|      Bitstream * bs,
  368|   317k|      int16_t * block,
  369|   317k|      int direction,
  370|   317k|      const int quant,
  371|   317k|      const uint16_t *matrix);
  372|   317k|  typedef void (*add_residual_function_t)(
  373|   317k|      uint8_t *predicted_block,
  374|   317k|      const int16_t *residual,
  375|   317k|      int stride);
  376|       |
  377|   317k|  const get_inter_block_function_t get_inter_block = (dec->quant_type == 0)
  ------------------
  |  Branch (377:54): [True: 153k, False: 163k]
  ------------------
  378|   317k|    ? (get_inter_block_function_t)get_inter_block_h263
  379|   317k|    : (get_inter_block_function_t)get_inter_block_mpeg;
  380|       |
  381|   317k|  uint8_t *dst[6];
  382|   317k|  int strides[6];
  383|       |
  384|       |
  385|   317k|  if (dec->interlacing && pMB->field_dct) {
  ------------------
  |  Branch (385:7): [True: 202k, False: 115k]
  |  Branch (385:27): [True: 79.9k, False: 122k]
  ------------------
  386|  79.9k|    dst[0] = pY_Cur;
  387|  79.9k|    dst[1] = pY_Cur + 8;
  388|  79.9k|    dst[2] = pY_Cur + stride;
  389|  79.9k|    dst[3] = dst[2] + 8;
  390|  79.9k|    dst[4] = pU_Cur;
  391|  79.9k|    dst[5] = pV_Cur;
  392|  79.9k|    strides[0] = strides[1] = strides[2] = strides[3] = stride*2;
  393|  79.9k|    strides[4] = stride/2;
  394|  79.9k|    strides[5] = stride/2;
  395|   237k|  } else {
  396|   237k|    dst[0] = pY_Cur;
  397|   237k|    dst[1] = pY_Cur + 8;
  398|   237k|    dst[2] = pY_Cur + 8*stride;
  399|   237k|    dst[3] = dst[2] + 8;
  400|   237k|    dst[4] = pU_Cur;
  401|   237k|    dst[5] = pV_Cur;
  402|   237k|    strides[0] = strides[1] = strides[2] = strides[3] = stride;
  403|   237k|    strides[4] = stride/2;
  404|   237k|    strides[5] = stride/2;
  405|   237k|  }
  406|       |
  407|  2.22M|  for (i = 0; i < 6; i++) {
  ------------------
  |  Branch (407:15): [True: 1.90M, False: 317k]
  ------------------
  408|       |    /* Process only coded blocks */
  409|  1.90M|    if (cbp & (1 << (5 - i))) {
  ------------------
  |  Branch (409:9): [True: 631k, False: 1.27M]
  ------------------
  410|       |
  411|       |      /* Clear the block */
  412|   631k|      memset(&data[0], 0, 64*sizeof(int16_t));
  413|       |
  414|       |      /* Decode coeffs and dequantize on the fly */
  415|   631k|      start_timer();
  416|   631k|      get_inter_block(bs, &data[0], direction, iQuant, get_inter_matrix(dec->mpeg_quant_matrices));
  417|   631k|      stop_coding_timer();
  418|       |
  419|       |      /* iDCT */
  420|   631k|      start_timer();
  421|   631k|      idct((short * const)&data[0]);
  422|   631k|      stop_idct_timer();
  423|       |
  424|       |      /* Add this residual to the predicted block */
  425|   631k|      start_timer();
  426|   631k|      transfer_16to8add(dst[i], &data[0], strides[i]);
  427|   631k|      stop_transfer_timer();
  428|   631k|    }
  429|  1.90M|  }
  430|   317k|}
decoder.c:get_motion_vector_interlaced:
  858|  86.8k|{
  859|  86.8k|  const int scale_fac = 1 << (fcode - 1);
  860|  86.8k|  const int high = (32 * scale_fac) - 1;
  861|  86.8k|  const int low = ((-32) * scale_fac);
  862|  86.8k|  const int range = (64 * scale_fac);
  863|       |  
  864|       |  /* Get interlaced prediction */
  865|  86.8k|  const VECTOR pmv=get_pmv2_interlaced(dec->mbs,dec->mb_width,bound,x,y,k);
  866|  86.8k|  VECTOR mv,mvf1,mvf2;
  867|       |
  868|  86.8k|  if(!pMB->field_pred)
  ------------------
  |  Branch (868:6): [True: 57.3k, False: 29.4k]
  ------------------
  869|  57.3k|  {
  870|  57.3k|    mv.x = get_mv(bs,fcode);
  871|  57.3k|    mv.y = get_mv(bs,fcode);
  872|       |    
  873|  57.3k|    mv.x += pmv.x;
  874|  57.3k|    mv.y += pmv.y;
  875|       |
  876|  57.3k|    if(mv.x<low) {
  ------------------
  |  Branch (876:8): [True: 11.1k, False: 46.2k]
  ------------------
  877|  11.1k|      mv.x += range;
  878|  46.2k|    } else if (mv.x>high) {
  ------------------
  |  Branch (878:16): [True: 36.2k, False: 9.95k]
  ------------------
  879|  36.2k|      mv.x-=range;
  880|  36.2k|    }
  881|       |
  882|  57.3k|    if (mv.y < low) {
  ------------------
  |  Branch (882:9): [True: 12.6k, False: 44.7k]
  ------------------
  883|  12.6k|      mv.y += range;
  884|  44.7k|    } else if (mv.y > high) {
  ------------------
  |  Branch (884:16): [True: 34.7k, False: 9.96k]
  ------------------
  885|  34.7k|      mv.y -= range;
  886|  34.7k|    }
  887|       |    
  888|  57.3k|    pMB->mvs[0]=pMB->mvs[1]=pMB->mvs[2]=pMB->mvs[3]=mv;
  889|  57.3k|  }
  890|  29.4k|  else
  891|  29.4k|  {
  892|  29.4k|    mvf1.x = get_mv(bs, fcode);
  893|  29.4k|    mvf1.y = get_mv(bs, fcode);
  894|       |
  895|  29.4k|    mvf1.x += pmv.x;
  896|  29.4k|    mvf1.y = 2*(mvf1.y+pmv.y/2); /* It's multiple of 2 */
  897|       |
  898|  29.4k|    if (mvf1.x < low) {
  ------------------
  |  Branch (898:9): [True: 4.95k, False: 24.5k]
  ------------------
  899|  4.95k|      mvf1.x += range;
  900|  24.5k|    } else if (mvf1.x > high) {
  ------------------
  |  Branch (900:16): [True: 19.5k, False: 4.94k]
  ------------------
  901|  19.5k|      mvf1.x -= range;
  902|  19.5k|    }
  903|       |
  904|  29.4k|    if (mvf1.y < low) {
  ------------------
  |  Branch (904:9): [True: 7.22k, False: 22.2k]
  ------------------
  905|  7.22k|      mvf1.y += range;
  906|  22.2k|    } else if (mvf1.y > high) {
  ------------------
  |  Branch (906:16): [True: 17.0k, False: 5.23k]
  ------------------
  907|  17.0k|      mvf1.y -= range;
  908|  17.0k|    }
  909|       |
  910|  29.4k|    mvf2.x = get_mv(bs, fcode);
  911|  29.4k|    mvf2.y = get_mv(bs, fcode);
  912|       |
  913|  29.4k|    mvf2.x += pmv.x;
  914|  29.4k|    mvf2.y = 2*(mvf2.y+pmv.y/2); /* It's multiple of 2 */
  915|       |
  916|  29.4k|    if (mvf2.x < low) {
  ------------------
  |  Branch (916:9): [True: 3.89k, False: 25.5k]
  ------------------
  917|  3.89k|      mvf2.x += range;
  918|  25.5k|    } else if (mvf2.x > high) {
  ------------------
  |  Branch (918:16): [True: 20.3k, False: 5.21k]
  ------------------
  919|  20.3k|      mvf2.x -= range;
  920|  20.3k|    }
  921|       |
  922|  29.4k|    if (mvf2.y < low) {
  ------------------
  |  Branch (922:9): [True: 4.01k, False: 25.4k]
  ------------------
  923|  4.01k|      mvf2.y += range;
  924|  25.4k|    } else if (mvf2.y > high) {
  ------------------
  |  Branch (924:16): [True: 20.2k, False: 5.23k]
  ------------------
  925|  20.2k|      mvf2.y -= range;
  926|  20.2k|    }
  927|       |
  928|  29.4k|    pMB->mvs[0]=mvf1;
  929|  29.4k|    pMB->mvs[1]=mvf2;
  930|  29.4k|    pMB->mvs[2].x=pMB->mvs[3].x=0;
  931|  29.4k|    pMB->mvs[2].y=pMB->mvs[3].y=0;
  932|       |  
  933|       |    /* Calculate average for as it is field predicted */
  934|  29.4k|    pMB->mvs_avg.x=DIV2ROUND(pMB->mvs[0].x+pMB->mvs[1].x);
  ------------------
  |  |   64|  29.4k|#define DIV2ROUND(n)  (((n)>>1)|((n)&1))
  ------------------
  935|  29.4k|    pMB->mvs_avg.y=DIV2ROUND(pMB->mvs[0].y+pMB->mvs[1].y);
  ------------------
  |  |   64|  29.4k|#define DIV2ROUND(n)  (((n)>>1)|((n)&1))
  ------------------
  936|  29.4k|  }
  937|  86.8k|}
decoder.c:get_motion_vector:
  814|   227k|{
  815|       |
  816|   227k|  const int scale_fac = 1 << (fcode - 1);
  817|   227k|  const int high = (32 * scale_fac) - 1;
  818|   227k|  const int low = ((-32) * scale_fac);
  819|   227k|  const int range = (64 * scale_fac);
  820|       |
  821|   227k|  const VECTOR pmv = get_pmv2(dec->mbs, dec->mb_width, bound, x, y, k);
  822|   227k|  VECTOR mv;
  823|       |
  824|   227k|  mv.x = get_mv(bs, fcode);
  825|   227k|  mv.y = get_mv(bs, fcode);
  826|       |
  827|   227k|  DPRINTF(XVID_DEBUG_MV,"mv_diff (%i,%i) pred (%i,%i) result (%i,%i)\n", mv.x, mv.y, pmv.x, pmv.y, mv.x+pmv.x, mv.y+pmv.y);
  ------------------
  |  |  202|   227k|#define XVID_DEBUG_MV        (1<< 6)
  ------------------
  828|       |
  829|   227k|  mv.x += pmv.x;
  830|   227k|  mv.y += pmv.y;
  831|       |
  832|   227k|  if (mv.x < low) {
  ------------------
  |  Branch (832:7): [True: 41.7k, False: 185k]
  ------------------
  833|  41.7k|    mv.x += range;
  834|   185k|  } else if (mv.x > high) {
  ------------------
  |  Branch (834:14): [True: 151k, False: 34.0k]
  ------------------
  835|   151k|    mv.x -= range;
  836|   151k|  }
  837|       |
  838|   227k|  if (mv.y < low) {
  ------------------
  |  Branch (838:7): [True: 60.2k, False: 167k]
  ------------------
  839|  60.2k|    mv.y += range;
  840|   167k|  } else if (mv.y > high) {
  ------------------
  |  Branch (840:14): [True: 133k, False: 34.0k]
  ------------------
  841|   133k|    mv.y -= range;
  842|   133k|  }
  843|       |
  844|   227k|  ret_mv->x = mv.x;
  845|   227k|  ret_mv->y = mv.y;
  846|   227k|}
decoder.c:decoder_mbinter:
  483|   459k|{
  484|   459k|  uint32_t stride = dec->edged_width;
  485|   459k|  uint32_t stride2 = stride / 2;
  486|   459k|  uint32_t i;
  487|       |
  488|   459k|  uint8_t *pY_Cur, *pU_Cur, *pV_Cur;
  489|       |
  490|   459k|  int uv_dx, uv_dy;
  491|   459k|  VECTOR mv[4]; /* local copy of mvs */
  492|       |
  493|   459k|  pY_Cur = dec->cur.y + (y_pos << 4) * stride + (x_pos << 4);
  494|   459k|  pU_Cur = dec->cur.u + (y_pos << 3) * stride2 + (x_pos << 3);
  495|   459k|  pV_Cur = dec->cur.v + (y_pos << 3) * stride2 + (x_pos << 3);
  496|  2.29M|  for (i = 0; i < 4; i++)
  ------------------
  |  Branch (496:15): [True: 1.83M, False: 459k]
  ------------------
  497|  1.83M|    mv[i] = pMB->mvs[i];
  498|       |
  499|   459k|  validate_vector(mv, x_pos, y_pos, dec);
  500|       |
  501|   459k|  start_timer();
  502|       |
  503|   459k|  if ((pMB->mode != MODE_INTER4V) || (bvop)) { /* INTER, INTER_Q, NOT_CODED, FORWARD, BACKWARD */
  ------------------
  |  |   36|   459k|#define MODE_INTER4V	2
  ------------------
  |  Branch (503:7): [True: 367k, False: 91.5k]
  |  Branch (503:38): [True: 52.3k, False: 39.1k]
  ------------------
  504|       |
  505|   419k|    uv_dx = mv[0].x;
  506|   419k|    uv_dy = mv[0].y;
  507|   419k|    if (dec->quarterpel) {
  ------------------
  |  Branch (507:9): [True: 107k, False: 312k]
  ------------------
  508|   107k|			if (dec->bs_version <= BS_VERSION_BUGGY_CHROMA_ROUNDING) {
  ------------------
  |  |  470|   107k|#define BS_VERSION_BUGGY_CHROMA_ROUNDING 1
  ------------------
  |  Branch (508:8): [True: 1.77k, False: 105k]
  ------------------
  509|  1.77k|  				uv_dx = (uv_dx>>1) | (uv_dx&1);
  510|  1.77k|				uv_dy = (uv_dy>>1) | (uv_dy&1);
  511|  1.77k|			}
  512|   105k|			else {
  513|   105k|        uv_dx /= 2;
  514|   105k|        uv_dy /= 2;
  515|   105k|      }
  516|   107k|    }
  517|   419k|    uv_dx = (uv_dx >> 1) + roundtab_79[uv_dx & 0x3];
  518|   419k|    uv_dy = (uv_dy >> 1) + roundtab_79[uv_dy & 0x3];
  519|       |
  520|   419k|    if (dec->quarterpel)
  ------------------
  |  Branch (520:9): [True: 107k, False: 312k]
  ------------------
  521|   107k|      interpolate16x16_quarterpel(dec->cur.y, dec->refn[ref].y, dec->qtmp.y, dec->qtmp.y + 64,
  522|   107k|                  dec->qtmp.y + 128, 16*x_pos, 16*y_pos,
  523|   107k|                      mv[0].x, mv[0].y, stride, rounding);
  524|   312k|    else
  525|   312k|      interpolate16x16_switch(dec->cur.y, dec->refn[ref].y, 16*x_pos, 16*y_pos,
  526|   312k|                  mv[0].x, mv[0].y, stride, rounding);
  527|       |
  528|   419k|  } else {  /* MODE_INTER4V */
  529|       |
  530|  39.1k|    if(dec->quarterpel) {
  ------------------
  |  Branch (530:8): [True: 15.1k, False: 23.9k]
  ------------------
  531|  15.1k|			if (dec->bs_version <= BS_VERSION_BUGGY_CHROMA_ROUNDING) {
  ------------------
  |  |  470|  15.1k|#define BS_VERSION_BUGGY_CHROMA_ROUNDING 1
  ------------------
  |  Branch (531:8): [True: 1.35k, False: 13.8k]
  ------------------
  532|  1.35k|				int z;
  533|  1.35k|				uv_dx = 0; uv_dy = 0;
  534|  6.79k|				for (z = 0; z < 4; z++) {
  ------------------
  |  Branch (534:17): [True: 5.43k, False: 1.35k]
  ------------------
  535|  5.43k|				  uv_dx += ((mv[z].x>>1) | (mv[z].x&1));
  536|  5.43k|				  uv_dy += ((mv[z].y>>1) | (mv[z].y&1));
  537|  5.43k|				}
  538|  1.35k|			}
  539|  13.8k|			else {
  540|  13.8k|        uv_dx = (mv[0].x / 2) + (mv[1].x / 2) + (mv[2].x / 2) + (mv[3].x / 2);
  541|  13.8k|        uv_dy = (mv[0].y / 2) + (mv[1].y / 2) + (mv[2].y / 2) + (mv[3].y / 2);
  542|  13.8k|      }
  543|  23.9k|    } else {
  544|  23.9k|      uv_dx = mv[0].x + mv[1].x + mv[2].x + mv[3].x;
  545|  23.9k|      uv_dy = mv[0].y + mv[1].y + mv[2].y + mv[3].y;
  546|  23.9k|    }
  547|       |
  548|  39.1k|    uv_dx = (uv_dx >> 3) + roundtab_76[uv_dx & 0xf];
  549|  39.1k|    uv_dy = (uv_dy >> 3) + roundtab_76[uv_dy & 0xf];
  550|       |
  551|  39.1k|    if (dec->quarterpel) {
  ------------------
  |  Branch (551:9): [True: 15.1k, False: 23.9k]
  ------------------
  552|  15.1k|      interpolate8x8_quarterpel(dec->cur.y, dec->refn[0].y , dec->qtmp.y, dec->qtmp.y + 64,
  553|  15.1k|                  dec->qtmp.y + 128, 16*x_pos, 16*y_pos,
  554|  15.1k|                  mv[0].x, mv[0].y, stride, rounding);
  555|  15.1k|      interpolate8x8_quarterpel(dec->cur.y, dec->refn[0].y , dec->qtmp.y, dec->qtmp.y + 64,
  556|  15.1k|                  dec->qtmp.y + 128, 16*x_pos + 8, 16*y_pos,
  557|  15.1k|                  mv[1].x, mv[1].y, stride, rounding);
  558|  15.1k|      interpolate8x8_quarterpel(dec->cur.y, dec->refn[0].y , dec->qtmp.y, dec->qtmp.y + 64,
  559|  15.1k|                  dec->qtmp.y + 128, 16*x_pos, 16*y_pos + 8,
  560|  15.1k|                  mv[2].x, mv[2].y, stride, rounding);
  561|  15.1k|      interpolate8x8_quarterpel(dec->cur.y, dec->refn[0].y , dec->qtmp.y, dec->qtmp.y + 64,
  562|  15.1k|                  dec->qtmp.y + 128, 16*x_pos + 8, 16*y_pos + 8,
  563|  15.1k|                  mv[3].x, mv[3].y, stride, rounding);
  564|  23.9k|    } else {
  565|  23.9k|      interpolate8x8_switch(dec->cur.y, dec->refn[0].y , 16*x_pos, 16*y_pos,
  566|  23.9k|                mv[0].x, mv[0].y, stride, rounding);
  567|  23.9k|      interpolate8x8_switch(dec->cur.y, dec->refn[0].y , 16*x_pos + 8, 16*y_pos,
  568|  23.9k|                mv[1].x, mv[1].y, stride, rounding);
  569|  23.9k|      interpolate8x8_switch(dec->cur.y, dec->refn[0].y , 16*x_pos, 16*y_pos + 8,
  570|  23.9k|                mv[2].x, mv[2].y, stride, rounding);
  571|  23.9k|      interpolate8x8_switch(dec->cur.y, dec->refn[0].y , 16*x_pos + 8, 16*y_pos + 8,
  572|  23.9k|                mv[3].x, mv[3].y, stride, rounding);
  573|  23.9k|    }
  574|  39.1k|  }
  575|       |
  576|       |  /* chroma */
  577|   459k|  interpolate8x8_switch(dec->cur.u, dec->refn[ref].u, 8 * x_pos, 8 * y_pos,
  578|   459k|              uv_dx, uv_dy, stride2, rounding);
  579|   459k|  interpolate8x8_switch(dec->cur.v, dec->refn[ref].v, 8 * x_pos, 8 * y_pos,
  580|   459k|              uv_dx, uv_dy, stride2, rounding);
  581|       |
  582|   459k|  stop_comp_timer();
  583|       |
  584|   459k|  if (cbp)
  ------------------
  |  Branch (584:7): [True: 160k, False: 298k]
  ------------------
  585|   160k|    decoder_mb_decode(dec, cbp, bs, pY_Cur, pU_Cur, pV_Cur, pMB);
  586|   459k|}
decoder.c:validate_vector:
  434|  2.00M|{
  435|       |  /* clip a vector to valid range
  436|       |     prevents crashes if bitstream is broken
  437|       |  */
  438|  2.00M|  int shift = 5 + dec->quarterpel;
  439|  2.00M|  int xborder_high = (int)(dec->mb_width - x_pos) << shift;
  440|  2.00M|  int xborder_low = (-(int)x_pos-1) << shift;
  441|  2.00M|  int yborder_high = (int)(dec->mb_height - y_pos) << shift;
  442|  2.00M|  int yborder_low = (-(int)y_pos-1) << shift;
  443|       |
  444|  2.00M|#define CHECK_MV(mv) \
  445|  2.00M|  do { \
  446|  2.00M|  if ((mv).x > xborder_high) { \
  447|  2.00M|    DPRINTF(XVID_DEBUG_MV, "mv.x > max -- %d > %d, MB %d, %d", (mv).x, xborder_high, x_pos, y_pos); \
  448|  2.00M|    (mv).x = xborder_high; \
  449|  2.00M|  } else if ((mv).x < xborder_low) { \
  450|  2.00M|    DPRINTF(XVID_DEBUG_MV, "mv.x < min -- %d < %d, MB %d, %d", (mv).x, xborder_low, x_pos, y_pos); \
  451|  2.00M|    (mv).x = xborder_low; \
  452|  2.00M|  } \
  453|  2.00M|  if ((mv).y > yborder_high) { \
  454|  2.00M|    DPRINTF(XVID_DEBUG_MV, "mv.y > max -- %d > %d, MB %d, %d", (mv).y, yborder_high, x_pos, y_pos); \
  455|  2.00M|    (mv).y = yborder_high; \
  456|  2.00M|  } else if ((mv).y < yborder_low) { \
  457|  2.00M|    DPRINTF(XVID_DEBUG_MV, "mv.y < min -- %d < %d, MB %d, %d", (mv).y, yborder_low, x_pos, y_pos); \
  458|  2.00M|    (mv).y = yborder_low; \
  459|  2.00M|  } \
  460|  2.00M|  } while (0)
  461|       |
  462|  2.00M|  CHECK_MV(mv[0]);
  ------------------
  |  |  445|  2.00M|  do { \
  |  |  446|  2.00M|  if ((mv).x > xborder_high) { \
  |  |  ------------------
  |  |  |  Branch (446:7): [True: 41.6k, False: 1.95M]
  |  |  ------------------
  |  |  447|  41.6k|    DPRINTF(XVID_DEBUG_MV, "mv.x > max -- %d > %d, MB %d, %d", (mv).x, xborder_high, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  41.6k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  448|  41.6k|    (mv).x = xborder_high; \
  |  |  449|  1.95M|  } else if ((mv).x < xborder_low) { \
  |  |  ------------------
  |  |  |  Branch (449:14): [True: 49.0k, False: 1.91M]
  |  |  ------------------
  |  |  450|  49.0k|    DPRINTF(XVID_DEBUG_MV, "mv.x < min -- %d < %d, MB %d, %d", (mv).x, xborder_low, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  49.0k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  451|  49.0k|    (mv).x = xborder_low; \
  |  |  452|  49.0k|  } \
  |  |  453|  2.00M|  if ((mv).y > yborder_high) { \
  |  |  ------------------
  |  |  |  Branch (453:7): [True: 41.8k, False: 1.95M]
  |  |  ------------------
  |  |  454|  41.8k|    DPRINTF(XVID_DEBUG_MV, "mv.y > max -- %d > %d, MB %d, %d", (mv).y, yborder_high, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  41.8k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  455|  41.8k|    (mv).y = yborder_high; \
  |  |  456|  1.95M|  } else if ((mv).y < yborder_low) { \
  |  |  ------------------
  |  |  |  Branch (456:14): [True: 47.0k, False: 1.91M]
  |  |  ------------------
  |  |  457|  47.0k|    DPRINTF(XVID_DEBUG_MV, "mv.y < min -- %d < %d, MB %d, %d", (mv).y, yborder_low, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  47.0k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  458|  47.0k|    (mv).y = yborder_low; \
  |  |  459|  47.0k|  } \
  |  |  460|  2.00M|  } while (0)
  |  |  ------------------
  |  |  |  Branch (460:12): [Folded - Ignored]
  |  |  ------------------
  ------------------
  463|  2.00M|  CHECK_MV(mv[1]);
  ------------------
  |  |  445|  2.00M|  do { \
  |  |  446|  2.00M|  if ((mv).x > xborder_high) { \
  |  |  ------------------
  |  |  |  Branch (446:7): [True: 39.3k, False: 1.96M]
  |  |  ------------------
  |  |  447|  39.3k|    DPRINTF(XVID_DEBUG_MV, "mv.x > max -- %d > %d, MB %d, %d", (mv).x, xborder_high, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  39.3k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  448|  39.3k|    (mv).x = xborder_high; \
  |  |  449|  1.96M|  } else if ((mv).x < xborder_low) { \
  |  |  ------------------
  |  |  |  Branch (449:14): [True: 49.7k, False: 1.91M]
  |  |  ------------------
  |  |  450|  49.7k|    DPRINTF(XVID_DEBUG_MV, "mv.x < min -- %d < %d, MB %d, %d", (mv).x, xborder_low, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  49.7k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  451|  49.7k|    (mv).x = xborder_low; \
  |  |  452|  49.7k|  } \
  |  |  453|  2.00M|  if ((mv).y > yborder_high) { \
  |  |  ------------------
  |  |  |  Branch (453:7): [True: 41.3k, False: 1.96M]
  |  |  ------------------
  |  |  454|  41.3k|    DPRINTF(XVID_DEBUG_MV, "mv.y > max -- %d > %d, MB %d, %d", (mv).y, yborder_high, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  41.3k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  455|  41.3k|    (mv).y = yborder_high; \
  |  |  456|  1.96M|  } else if ((mv).y < yborder_low) { \
  |  |  ------------------
  |  |  |  Branch (456:14): [True: 47.4k, False: 1.91M]
  |  |  ------------------
  |  |  457|  47.4k|    DPRINTF(XVID_DEBUG_MV, "mv.y < min -- %d < %d, MB %d, %d", (mv).y, yborder_low, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  47.4k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  458|  47.4k|    (mv).y = yborder_low; \
  |  |  459|  47.4k|  } \
  |  |  460|  2.00M|  } while (0)
  |  |  ------------------
  |  |  |  Branch (460:12): [Folded - Ignored]
  |  |  ------------------
  ------------------
  464|  2.00M|  CHECK_MV(mv[2]);
  ------------------
  |  |  445|  2.00M|  do { \
  |  |  446|  2.00M|  if ((mv).x > xborder_high) { \
  |  |  ------------------
  |  |  |  Branch (446:7): [True: 37.4k, False: 1.96M]
  |  |  ------------------
  |  |  447|  37.4k|    DPRINTF(XVID_DEBUG_MV, "mv.x > max -- %d > %d, MB %d, %d", (mv).x, xborder_high, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  37.4k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  448|  37.4k|    (mv).x = xborder_high; \
  |  |  449|  1.96M|  } else if ((mv).x < xborder_low) { \
  |  |  ------------------
  |  |  |  Branch (449:14): [True: 45.2k, False: 1.91M]
  |  |  ------------------
  |  |  450|  45.2k|    DPRINTF(XVID_DEBUG_MV, "mv.x < min -- %d < %d, MB %d, %d", (mv).x, xborder_low, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  45.2k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  451|  45.2k|    (mv).x = xborder_low; \
  |  |  452|  45.2k|  } \
  |  |  453|  2.00M|  if ((mv).y > yborder_high) { \
  |  |  ------------------
  |  |  |  Branch (453:7): [True: 40.0k, False: 1.96M]
  |  |  ------------------
  |  |  454|  40.0k|    DPRINTF(XVID_DEBUG_MV, "mv.y > max -- %d > %d, MB %d, %d", (mv).y, yborder_high, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  40.0k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  455|  40.0k|    (mv).y = yborder_high; \
  |  |  456|  1.96M|  } else if ((mv).y < yborder_low) { \
  |  |  ------------------
  |  |  |  Branch (456:14): [True: 46.5k, False: 1.91M]
  |  |  ------------------
  |  |  457|  46.5k|    DPRINTF(XVID_DEBUG_MV, "mv.y < min -- %d < %d, MB %d, %d", (mv).y, yborder_low, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  46.5k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  458|  46.5k|    (mv).y = yborder_low; \
  |  |  459|  46.5k|  } \
  |  |  460|  2.00M|  } while (0)
  |  |  ------------------
  |  |  |  Branch (460:12): [Folded - Ignored]
  |  |  ------------------
  ------------------
  465|  2.00M|  CHECK_MV(mv[3]);
  ------------------
  |  |  445|  2.00M|  do { \
  |  |  446|  2.00M|  if ((mv).x > xborder_high) { \
  |  |  ------------------
  |  |  |  Branch (446:7): [True: 37.6k, False: 1.96M]
  |  |  ------------------
  |  |  447|  37.6k|    DPRINTF(XVID_DEBUG_MV, "mv.x > max -- %d > %d, MB %d, %d", (mv).x, xborder_high, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  37.6k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  448|  37.6k|    (mv).x = xborder_high; \
  |  |  449|  1.96M|  } else if ((mv).x < xborder_low) { \
  |  |  ------------------
  |  |  |  Branch (449:14): [True: 45.6k, False: 1.91M]
  |  |  ------------------
  |  |  450|  45.6k|    DPRINTF(XVID_DEBUG_MV, "mv.x < min -- %d < %d, MB %d, %d", (mv).x, xborder_low, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  45.6k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  451|  45.6k|    (mv).x = xborder_low; \
  |  |  452|  45.6k|  } \
  |  |  453|  2.00M|  if ((mv).y > yborder_high) { \
  |  |  ------------------
  |  |  |  Branch (453:7): [True: 40.6k, False: 1.96M]
  |  |  ------------------
  |  |  454|  40.6k|    DPRINTF(XVID_DEBUG_MV, "mv.y > max -- %d > %d, MB %d, %d", (mv).y, yborder_high, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  40.6k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  455|  40.6k|    (mv).y = yborder_high; \
  |  |  456|  1.96M|  } else if ((mv).y < yborder_low) { \
  |  |  ------------------
  |  |  |  Branch (456:14): [True: 46.3k, False: 1.91M]
  |  |  ------------------
  |  |  457|  46.3k|    DPRINTF(XVID_DEBUG_MV, "mv.y < min -- %d < %d, MB %d, %d", (mv).y, yborder_low, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  46.3k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  458|  46.3k|    (mv).y = yborder_low; \
  |  |  459|  46.3k|  } \
  |  |  460|  2.00M|  } while (0)
  |  |  ------------------
  |  |  |  Branch (460:12): [Folded - Ignored]
  |  |  ------------------
  ------------------
  466|  2.00M|}
decoder.c:decoder_mbinter_field:
  599|  29.4k|{
  600|  29.4k|  uint32_t stride = dec->edged_width;
  601|  29.4k|  uint32_t stride2 = stride / 2;
  602|       |
  603|  29.4k|  uint8_t *pY_Cur, *pU_Cur, *pV_Cur;
  604|       |
  605|  29.4k|  int uvtop_dx, uvtop_dy;
  606|  29.4k|  int uvbot_dx, uvbot_dy;
  607|  29.4k|  VECTOR mv[4]; /* local copy of mvs */
  608|       |
  609|       |  /* Get pointer to memory areas */
  610|  29.4k|  pY_Cur = dec->cur.y + (y_pos << 4) * stride + (x_pos << 4);
  611|  29.4k|  pU_Cur = dec->cur.u + (y_pos << 3) * stride2 + (x_pos << 3);
  612|  29.4k|  pV_Cur = dec->cur.v + (y_pos << 3) * stride2 + (x_pos << 3);
  613|       |  
  614|  29.4k|  mv[0] = pMB->mvs[0];
  615|  29.4k|  mv[1] = pMB->mvs[1];
  616|  29.4k|  memset(&mv[2],0,2*sizeof(VECTOR));
  617|       |
  618|  29.4k|  validate_vector(mv, x_pos, y_pos, dec);
  619|       |
  620|  29.4k|  start_timer();
  621|       |
  622|  29.4k|  if((pMB->mode!=MODE_INTER4V) || (bvop))   /* INTER, INTER_Q, NOT_CODED, FORWARD, BACKWARD */
  ------------------
  |  |   36|  29.4k|#define MODE_INTER4V	2
  ------------------
  |  Branch (622:6): [True: 29.4k, False: 0]
  |  Branch (622:35): [True: 0, False: 0]
  ------------------
  623|  29.4k|  { 
  624|       |    /* Prepare top field vector */
  625|  29.4k|    uvtop_dx = DIV2ROUND(mv[0].x);
  ------------------
  |  |   64|  29.4k|#define DIV2ROUND(n)  (((n)>>1)|((n)&1))
  ------------------
  626|  29.4k|    uvtop_dy = DIV2ROUND(mv[0].y);
  ------------------
  |  |   64|  29.4k|#define DIV2ROUND(n)  (((n)>>1)|((n)&1))
  ------------------
  627|       |
  628|       |    /* Prepare bottom field vector */
  629|  29.4k|    uvbot_dx = DIV2ROUND(mv[1].x);
  ------------------
  |  |   64|  29.4k|#define DIV2ROUND(n)  (((n)>>1)|((n)&1))
  ------------------
  630|  29.4k|    uvbot_dy = DIV2ROUND(mv[1].y);
  ------------------
  |  |   64|  29.4k|#define DIV2ROUND(n)  (((n)>>1)|((n)&1))
  ------------------
  631|       |
  632|  29.4k|    if(dec->quarterpel)
  ------------------
  |  Branch (632:8): [True: 8.92k, False: 20.5k]
  ------------------
  633|  8.92k|    {
  634|       |      /* NOT supported */
  635|  8.92k|    }
  636|  20.5k|    else
  637|  20.5k|    {
  638|       |      /* Interpolate top field left part(we use double stride for every 2nd line) */
  639|  20.5k|      interpolate8x8_switch(dec->cur.y,dec->refn[ref].y+pMB->field_for_top*stride,
  640|  20.5k|                            16*x_pos,8*y_pos,mv[0].x, mv[0].y>>1,2*stride, rounding);
  641|       |      /* top field right part */
  642|  20.5k|      interpolate8x8_switch(dec->cur.y,dec->refn[ref].y+pMB->field_for_top*stride,
  643|  20.5k|                            16*x_pos+8,8*y_pos,mv[0].x, mv[0].y>>1,2*stride, rounding);
  644|       |
  645|       |      /* Interpolate bottom field left part(we use double stride for every 2nd line) */
  646|  20.5k|      interpolate8x8_switch(dec->cur.y+stride,dec->refn[ref].y+pMB->field_for_bot*stride,
  647|  20.5k|                            16*x_pos,8*y_pos,mv[1].x, mv[1].y>>1,2*stride, rounding);
  648|       |      /* Bottom field right part */
  649|  20.5k|      interpolate8x8_switch(dec->cur.y+stride,dec->refn[ref].y+pMB->field_for_bot*stride,
  650|  20.5k|                            16*x_pos+8,8*y_pos,mv[1].x, mv[1].y>>1,2*stride, rounding);
  651|       |
  652|       |      /* Interpolate field1 U */
  653|  20.5k|      interpolate8x4_switch(dec->cur.u,dec->refn[ref].u+pMB->field_for_top*stride2,
  654|  20.5k|                            8*x_pos,4*y_pos,uvtop_dx,DIV2ROUND(uvtop_dy),stride,rounding);
  ------------------
  |  |   64|  20.5k|#define DIV2ROUND(n)  (((n)>>1)|((n)&1))
  ------------------
  655|       |      
  656|       |      /* Interpolate field1 V */
  657|  20.5k|      interpolate8x4_switch(dec->cur.v,dec->refn[ref].v+pMB->field_for_top*stride2,
  658|  20.5k|                            8*x_pos,4*y_pos,uvtop_dx,DIV2ROUND(uvtop_dy),stride,rounding);
  ------------------
  |  |   64|  20.5k|#define DIV2ROUND(n)  (((n)>>1)|((n)&1))
  ------------------
  659|       |    
  660|       |      /* Interpolate field2 U */
  661|  20.5k|      interpolate8x4_switch(dec->cur.u+stride2,dec->refn[ref].u+pMB->field_for_bot*stride2,
  662|  20.5k|                            8*x_pos,4*y_pos,uvbot_dx,DIV2ROUND(uvbot_dy),stride,rounding);
  ------------------
  |  |   64|  20.5k|#define DIV2ROUND(n)  (((n)>>1)|((n)&1))
  ------------------
  663|       |    
  664|       |      /* Interpolate field2 V */
  665|  20.5k|      interpolate8x4_switch(dec->cur.v+stride2,dec->refn[ref].v+pMB->field_for_bot*stride2,
  666|  20.5k|                            8*x_pos,4*y_pos,uvbot_dx,DIV2ROUND(uvbot_dy),stride,rounding);
  ------------------
  |  |   64|  20.5k|#define DIV2ROUND(n)  (((n)>>1)|((n)&1))
  ------------------
  667|  20.5k|    }
  668|  29.4k|  } 
  669|      0|  else 
  670|      0|  {
  671|       |    /* We don't expect 4 motion vectors in interlaced mode */
  672|      0|  }
  673|       |
  674|  29.4k|  stop_comp_timer();
  675|       |
  676|       |  /* Must add error correction? */
  677|  29.4k|  if(cbp)
  ------------------
  |  Branch (677:6): [True: 17.9k, False: 11.5k]
  ------------------
  678|  17.9k|   decoder_mb_decode(dec, cbp, bs, pY_Cur, pU_Cur, pV_Cur, pMB);
  679|  29.4k|}
decoder.c:decoder_bframe:
 1360|  2.41k|{
 1361|  2.41k|  uint32_t x, y;
 1362|  2.41k|  VECTOR mv;
 1363|  2.41k|  const VECTOR zeromv = {0,0};
 1364|  2.41k|  int i;
 1365|  2.41k|  int resync_len;
 1366|       |
 1367|  2.41k|  if (!dec->is_edged[0]) {
  ------------------
  |  Branch (1367:7): [True: 2.41k, False: 0]
  ------------------
 1368|  2.41k|    start_timer();
 1369|  2.41k|    image_setedges(&dec->refn[0], dec->edged_width, dec->edged_height,
 1370|  2.41k|            dec->width, dec->height, dec->bs_version);
 1371|  2.41k|    dec->is_edged[0] = 1;
 1372|  2.41k|    stop_edges_timer();
 1373|  2.41k|  }
 1374|       |
 1375|  2.41k|  if (!dec->is_edged[1]) {
  ------------------
  |  Branch (1375:7): [True: 2.41k, False: 0]
  ------------------
 1376|  2.41k|    start_timer();
 1377|  2.41k|    image_setedges(&dec->refn[1], dec->edged_width, dec->edged_height,
 1378|  2.41k|            dec->width, dec->height, dec->bs_version);
 1379|  2.41k|    dec->is_edged[1] = 1;
 1380|  2.41k|    stop_edges_timer();
 1381|  2.41k|  }
 1382|       |
 1383|  2.41k|  resync_len = get_resync_len_b(fcode_backward, fcode_forward);
 1384|   128k|  for (y = 0; y < dec->mb_height; y++) {
  ------------------
  |  Branch (1384:15): [True: 125k, False: 2.41k]
  ------------------
 1385|       |    /* Initialize Pred Motion Vector */
 1386|   125k|    dec->p_fmv = dec->p_bmv = zeromv;
 1387|  5.11M|    for (x = 0; x < dec->mb_width; x++) {
  ------------------
  |  Branch (1387:17): [True: 4.98M, False: 125k]
  ------------------
 1388|  4.98M|      MACROBLOCK *mb = &dec->mbs[y * dec->mb_width + x];
 1389|  4.98M|      MACROBLOCK *last_mb = &dec->last_mbs[y * dec->mb_width + x];
 1390|  4.98M|      int intra_dc_threshold; /* fake variable */
 1391|       |
 1392|  4.98M|      mv =
 1393|  4.98M|      mb->b_mvs[0] = mb->b_mvs[1] = mb->b_mvs[2] = mb->b_mvs[3] =
 1394|  4.98M|      mb->mvs[0] = mb->mvs[1] = mb->mvs[2] = mb->mvs[3] = zeromv;
 1395|  4.98M|      mb->quant = quant;
 1396|       |
 1397|       |      /*
 1398|       |       * skip if the co-located P_VOP macroblock is not coded
 1399|       |       * if not codec in co-located S_VOP macroblock is _not_
 1400|       |       * automatically skipped
 1401|       |       */
 1402|       |
 1403|  4.98M|      if (last_mb->mode == MODE_NOT_CODED) {
  ------------------
  |  |   39|  4.98M|#define MODE_NOT_CODED	16
  ------------------
  |  Branch (1403:11): [True: 0, False: 4.98M]
  ------------------
 1404|      0|        mb->cbp = 0;
 1405|      0|        mb->mode = MODE_FORWARD;
  ------------------
  |  |   47|      0|#define MODE_FORWARD		3
  ------------------
 1406|      0|        decoder_mbinter(dec, mb, x, y, mb->cbp, bs, 0, 1, 1);
 1407|      0|        continue;
 1408|      0|      }
 1409|       |
 1410|  4.98M|      if (check_resync_marker(bs, resync_len)) {
  ------------------
  |  Branch (1410:11): [True: 4.14k, False: 4.98M]
  ------------------
 1411|  4.14k|        int bound = read_video_packet_header(bs, dec, resync_len, &quant,
 1412|  4.14k|                           &fcode_forward, &fcode_backward, &intra_dc_threshold);
 1413|       |
 1414|  4.14k|		bound = MAX(0, bound-1); /* valid bound must always be >0 */
  ------------------
  |  |   35|  4.14k|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (35:20): [True: 1.30k, False: 2.83k]
  |  |  ------------------
  ------------------
 1415|  4.14k|        x = bound % dec->mb_width;
 1416|  4.14k|        y = MIN((bound / dec->mb_width), (dec->mb_height-1));
  ------------------
  |  |   34|  4.14k|#define MIN(X, Y) ((X)<(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (34:20): [True: 3.48k, False: 657]
  |  |  ------------------
  ------------------
 1417|       |        /* reset predicted macroblocks */
 1418|  4.14k|        dec->p_fmv = dec->p_bmv = zeromv;
 1419|       |        /* update resync len with new fcodes */
 1420|  4.14k|        resync_len = get_resync_len_b(fcode_backward, fcode_forward);
 1421|  4.14k|		continue; /* re-init loop */
 1422|  4.14k|	  }
 1423|       |
 1424|  4.98M|      if (!BitstreamGetBit(bs)) { /* modb=='0' */
  ------------------
  |  Branch (1424:11): [True: 4.49M, False: 488k]
  ------------------
 1425|  4.49M|        const uint8_t modb2 = BitstreamGetBit(bs);
 1426|       |
 1427|  4.49M|        mb->mode = get_mbtype(bs);
 1428|       |
 1429|  4.49M|        if (!modb2)   /* modb=='00' */
  ------------------
  |  Branch (1429:13): [True: 4.30M, False: 188k]
  ------------------
 1430|  4.30M|          mb->cbp = BitstreamGetBits(bs, 6);
 1431|   188k|        else
 1432|   188k|          mb->cbp = 0;
 1433|       |
 1434|  4.49M|        if (mb->mode && mb->cbp) {
  ------------------
  |  Branch (1434:13): [True: 4.33M, False: 154k]
  |  Branch (1434:25): [True: 108k, False: 4.23M]
  ------------------
 1435|   108k|          quant += get_dbquant(bs);
 1436|   108k|          if (quant > 31)
  ------------------
  |  Branch (1436:15): [True: 7.16k, False: 101k]
  ------------------
 1437|  7.16k|            quant = 31;
 1438|   101k|          else if (quant < 1)
  ------------------
  |  Branch (1438:20): [True: 7.13k, False: 94.4k]
  ------------------
 1439|  7.13k|            quant = 1;
 1440|   108k|        }
 1441|  4.49M|        mb->quant = quant;
 1442|       |
 1443|  4.49M|        if (dec->interlacing) {
  ------------------
  |  Branch (1443:13): [True: 3.99M, False: 499k]
  ------------------
 1444|  3.99M|          if (mb->cbp) {
  ------------------
  |  Branch (1444:15): [True: 117k, False: 3.87M]
  ------------------
 1445|   117k|            mb->field_dct = BitstreamGetBit(bs);
 1446|   117k|            DPRINTF(XVID_DEBUG_MB,"decp: field_dct: %i\n", mb->field_dct);
  ------------------
  |  |  200|   117k|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
 1447|   117k|          }
 1448|       |
 1449|  3.99M|          if (mb->mode) {
  ------------------
  |  Branch (1449:15): [True: 3.87M, False: 115k]
  ------------------
 1450|  3.87M|            mb->field_pred = BitstreamGetBit(bs);
 1451|  3.87M|            DPRINTF(XVID_DEBUG_MB, "decp: field_pred: %i\n", mb->field_pred);
  ------------------
  |  |  200|  3.87M|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
 1452|       |
 1453|  3.87M|            if (mb->field_pred) {
  ------------------
  |  Branch (1453:17): [True: 83.6k, False: 3.79M]
  ------------------
 1454|  83.6k|              mb->field_for_top = BitstreamGetBit(bs);
 1455|  83.6k|              DPRINTF(XVID_DEBUG_MB,"decp: field_for_top: %i\n", mb->field_for_top);
  ------------------
  |  |  200|  83.6k|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
 1456|  83.6k|              mb->field_for_bot = BitstreamGetBit(bs);
 1457|  83.6k|              DPRINTF(XVID_DEBUG_MB,"decp: field_for_bot: %i\n", mb->field_for_bot);
  ------------------
  |  |  200|  83.6k|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
 1458|  83.6k|            }
 1459|  3.87M|          }
 1460|  3.99M|        }
 1461|       |
 1462|  4.49M|      } else {
 1463|   488k|        mb->mode = MODE_DIRECT_NONE_MV;
  ------------------
  |  |   48|   488k|#define MODE_DIRECT_NONE_MV	4
  ------------------
 1464|   488k|        mb->cbp = 0;
 1465|   488k|      }
 1466|       |
 1467|  4.98M|      switch (mb->mode) {
 1468|   154k|      case MODE_DIRECT:
  ------------------
  |  |   44|   154k|#define MODE_DIRECT			0
  ------------------
  |  Branch (1468:7): [True: 154k, False: 4.82M]
  ------------------
 1469|   154k|        get_b_motion_vector(bs, &mv, 1, zeromv, dec, x, y);
 1470|       |
 1471|   643k|      case MODE_DIRECT_NONE_MV:
  ------------------
  |  |   48|   643k|#define MODE_DIRECT_NONE_MV	4
  ------------------
  |  Branch (1471:7): [True: 488k, False: 4.49M]
  ------------------
 1472|  3.21M|        for (i = 0; i < 4; i++) {
  ------------------
  |  Branch (1472:21): [True: 2.57M, False: 643k]
  ------------------
 1473|  2.57M|          mb->mvs[i].x = last_mb->mvs[i].x*dec->time_bp/dec->time_pp + mv.x;
 1474|  2.57M|          mb->mvs[i].y = last_mb->mvs[i].y*dec->time_bp/dec->time_pp + mv.y;
 1475|       |
 1476|  2.57M|          mb->b_mvs[i].x = (mv.x)
  ------------------
  |  Branch (1476:28): [True: 259k, False: 2.31M]
  ------------------
 1477|  2.57M|            ?  mb->mvs[i].x - last_mb->mvs[i].x
 1478|  2.57M|            : last_mb->mvs[i].x*(dec->time_bp - dec->time_pp)/dec->time_pp;
 1479|  2.57M|          mb->b_mvs[i].y = (mv.y)
  ------------------
  |  Branch (1479:28): [True: 196k, False: 2.37M]
  ------------------
 1480|  2.57M|            ? mb->mvs[i].y - last_mb->mvs[i].y
 1481|  2.57M|            : last_mb->mvs[i].y*(dec->time_bp - dec->time_pp)/dec->time_pp;
 1482|  2.57M|        }
 1483|       |
 1484|   643k|        decoder_bf_interpolate_mbinter(dec, dec->refn[1], dec->refn[0],
 1485|   643k|                        mb, x, y, bs, 1);
 1486|   643k|        break;
 1487|       |
 1488|   113k|      case MODE_INTERPOLATE:
  ------------------
  |  |   45|   113k|#define MODE_INTERPOLATE	1
  ------------------
  |  Branch (1488:7): [True: 113k, False: 4.86M]
  ------------------
 1489|   113k|        get_b_motion_vector(bs, &mb->mvs[0], fcode_forward, dec->p_fmv, dec, x, y);
 1490|   113k|        dec->p_fmv = mb->mvs[1] = mb->mvs[2] = mb->mvs[3] = mb->mvs[0];
 1491|       |
 1492|   113k|        get_b_motion_vector(bs, &mb->b_mvs[0], fcode_backward, dec->p_bmv, dec, x, y);
 1493|   113k|        dec->p_bmv = mb->b_mvs[1] = mb->b_mvs[2] = mb->b_mvs[3] = mb->b_mvs[0];
 1494|       |
 1495|   113k|        decoder_bf_interpolate_mbinter(dec, dec->refn[1], dec->refn[0],
 1496|   113k|                      mb, x, y, bs, 0);
 1497|   113k|        break;
 1498|       |
 1499|  52.3k|      case MODE_BACKWARD:
  ------------------
  |  |   46|  52.3k|#define MODE_BACKWARD		2
  ------------------
  |  Branch (1499:7): [True: 52.3k, False: 4.93M]
  ------------------
 1500|  52.3k|        get_b_motion_vector(bs, &mb->mvs[0], fcode_backward, dec->p_bmv, dec, x, y);
 1501|  52.3k|        dec->p_bmv = mb->mvs[1] = mb->mvs[2] = mb->mvs[3] = mb->mvs[0];
 1502|       |
 1503|  52.3k|        decoder_mbinter(dec, mb, x, y, mb->cbp, bs, 0, 0, 1);
 1504|  52.3k|        break;
 1505|       |
 1506|  28.7k|      case MODE_FORWARD:
  ------------------
  |  |   47|  28.7k|#define MODE_FORWARD		3
  ------------------
  |  Branch (1506:7): [True: 28.7k, False: 4.95M]
  ------------------
 1507|  28.7k|        get_b_motion_vector(bs, &mb->mvs[0], fcode_forward, dec->p_fmv, dec, x, y);
 1508|  28.7k|        dec->p_fmv = mb->mvs[1] = mb->mvs[2] = mb->mvs[3] = mb->mvs[0];
 1509|       |
 1510|  28.7k|        decoder_mbinter(dec, mb, x, y, mb->cbp, bs, 0, 1, 1);
 1511|  28.7k|        break;
 1512|       |
 1513|  4.14M|      default:
  ------------------
  |  Branch (1513:7): [True: 4.14M, False: 837k]
  ------------------
 1514|  4.14M|        DPRINTF(XVID_DEBUG_ERROR,"Not supported B-frame mb_type = %i\n", mb->mode);
  ------------------
  |  |  196|  4.14M|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
 1515|  4.98M|      }
 1516|  4.98M|    } /* End of for */
 1517|   125k|  }
 1518|  2.41k|}
decoder.c:get_resync_len_b:
 1348|  6.56k|                                     const int fcode_forward) {
 1349|  6.56k|  int resync_len = ((fcode_forward>fcode_backward) ? fcode_forward : fcode_backward) - 1;
  ------------------
  |  Branch (1349:21): [True: 1.53k, False: 5.02k]
  ------------------
 1350|  6.56k|  if (resync_len < 1) resync_len = 1;
  ------------------
  |  Branch (1350:7): [True: 2.98k, False: 3.57k]
  ------------------
 1351|  6.56k|  return resync_len;
 1352|  6.56k|}
decoder.c:get_mbtype:
 1337|  4.49M|{
 1338|  4.49M|  int32_t mb_type;
 1339|       |
 1340|  21.3M|  for (mb_type = 0; mb_type <= 3; mb_type++)
  ------------------
  |  Branch (1340:21): [True: 17.2M, False: 4.14M]
  ------------------
 1341|  17.2M|    if (BitstreamGetBit(bs))
  ------------------
  |  Branch (1341:9): [True: 349k, False: 16.8M]
  ------------------
 1342|   349k|      return (mb_type);
 1343|       |
 1344|  4.14M|  return -1;
 1345|  4.49M|}
decoder.c:get_dbquant:
 1318|   108k|{
 1319|   108k|  if (!BitstreamGetBit(bs))   /*  '0' */
  ------------------
  |  Branch (1319:7): [True: 69.3k, False: 39.3k]
  ------------------
 1320|  69.3k|    return (0);
 1321|  39.3k|  else if (!BitstreamGetBit(bs))  /* '10' */
  ------------------
  |  Branch (1321:12): [True: 20.0k, False: 19.3k]
  ------------------
 1322|  20.0k|    return (-2);
 1323|  19.3k|  else              /* '11' */
 1324|  19.3k|    return (2);
 1325|   108k|}
decoder.c:get_b_motion_vector:
 1127|   462k|{
 1128|   462k|  const int scale_fac = 1 << (fcode - 1);
 1129|   462k|  const int high = (32 * scale_fac) - 1;
 1130|   462k|  const int low = ((-32) * scale_fac);
 1131|   462k|  const int range = (64 * scale_fac);
 1132|       |
 1133|   462k|  int mv_x = get_mv(bs, fcode);
 1134|   462k|  int mv_y = get_mv(bs, fcode);
 1135|       |
 1136|   462k|  mv_x += pmv.x;
 1137|   462k|  mv_y += pmv.y;
 1138|       |
 1139|   462k|  if (mv_x < low)
  ------------------
  |  Branch (1139:7): [True: 73.6k, False: 389k]
  ------------------
 1140|  73.6k|    mv_x += range;
 1141|   389k|  else if (mv_x > high)
  ------------------
  |  Branch (1141:12): [True: 121k, False: 268k]
  ------------------
 1142|   121k|    mv_x -= range;
 1143|       |
 1144|   462k|  if (mv_y < low)
  ------------------
  |  Branch (1144:7): [True: 69.4k, False: 393k]
  ------------------
 1145|  69.4k|    mv_y += range;
 1146|   393k|  else if (mv_y > high)
  ------------------
  |  Branch (1146:12): [True: 125k, False: 268k]
  ------------------
 1147|   125k|    mv_y -= range;
 1148|       |
 1149|   462k|  mv->x = mv_x;
 1150|   462k|  mv->y = mv_y;
 1151|   462k|}
decoder.c:decoder_bf_interpolate_mbinter:
 1163|   756k|{
 1164|   756k|  uint32_t stride = dec->edged_width;
 1165|   756k|  uint32_t stride2 = stride / 2;
 1166|   756k|  int uv_dx, uv_dy;
 1167|   756k|  int b_uv_dx, b_uv_dy;
 1168|   756k|  uint8_t *pY_Cur, *pU_Cur, *pV_Cur;
 1169|   756k|  const uint32_t cbp = pMB->cbp;
 1170|       |
 1171|   756k|  pY_Cur = dec->cur.y + (y_pos << 4) * stride + (x_pos << 4);
 1172|   756k|  pU_Cur = dec->cur.u + (y_pos << 3) * stride2 + (x_pos << 3);
 1173|   756k|  pV_Cur = dec->cur.v + (y_pos << 3) * stride2 + (x_pos << 3);
 1174|       |
 1175|   756k|  validate_vector(pMB->mvs, x_pos, y_pos, dec);
 1176|   756k|  validate_vector(pMB->b_mvs, x_pos, y_pos, dec);
 1177|       |
 1178|   756k|  if (!direct) {
  ------------------
  |  Branch (1178:7): [True: 113k, False: 643k]
  ------------------
 1179|   113k|    uv_dx = pMB->mvs[0].x;
 1180|   113k|    uv_dy = pMB->mvs[0].y;
 1181|   113k|    b_uv_dx = pMB->b_mvs[0].x;
 1182|   113k|    b_uv_dy = pMB->b_mvs[0].y;
 1183|       |
 1184|   113k|    if (dec->quarterpel) {
  ------------------
  |  Branch (1184:9): [True: 103k, False: 9.49k]
  ------------------
 1185|   103k|			if (dec->bs_version <= BS_VERSION_BUGGY_CHROMA_ROUNDING) {
  ------------------
  |  |  470|   103k|#define BS_VERSION_BUGGY_CHROMA_ROUNDING 1
  ------------------
  |  Branch (1185:8): [True: 391, False: 103k]
  ------------------
 1186|    391|				uv_dx = (uv_dx>>1) | (uv_dx&1);
 1187|    391|				uv_dy = (uv_dy>>1) | (uv_dy&1);
 1188|    391|				b_uv_dx = (b_uv_dx>>1) | (b_uv_dx&1);
 1189|    391|				b_uv_dy = (b_uv_dy>>1) | (b_uv_dy&1);
 1190|    391|			}
 1191|   103k|			else {
 1192|   103k|        uv_dx /= 2;
 1193|   103k|        uv_dy /= 2;
 1194|   103k|        b_uv_dx /= 2;
 1195|   103k|        b_uv_dy /= 2;
 1196|   103k|      }
 1197|   103k|    }
 1198|       |
 1199|   113k|    uv_dx = (uv_dx >> 1) + roundtab_79[uv_dx & 0x3];
 1200|   113k|    uv_dy = (uv_dy >> 1) + roundtab_79[uv_dy & 0x3];
 1201|   113k|    b_uv_dx = (b_uv_dx >> 1) + roundtab_79[b_uv_dx & 0x3];
 1202|   113k|    b_uv_dy = (b_uv_dy >> 1) + roundtab_79[b_uv_dy & 0x3];
 1203|       |
 1204|   643k|  } else {
 1205|   643k|	  if (dec->quarterpel) { /* for qpel the /2 shall be done before summation. We've done it right in the encoder in the past. */
  ------------------
  |  Branch (1205:8): [True: 461k, False: 181k]
  ------------------
 1206|       |							 /* TODO: figure out if we ever did it wrong on the encoder side. If yes, add some workaround */
 1207|   461k|		if (dec->bs_version <= BS_VERSION_BUGGY_CHROMA_ROUNDING) {
  ------------------
  |  |  470|   461k|#define BS_VERSION_BUGGY_CHROMA_ROUNDING 1
  ------------------
  |  Branch (1207:7): [True: 1.23k, False: 460k]
  ------------------
 1208|  1.23k|			int z;
 1209|  1.23k|			uv_dx = 0; uv_dy = 0;
 1210|  1.23k|			b_uv_dx = 0; b_uv_dy = 0;
 1211|  6.17k|			for (z = 0; z < 4; z++) {
  ------------------
  |  Branch (1211:16): [True: 4.93k, False: 1.23k]
  ------------------
 1212|  4.93k|			  uv_dx += ((pMB->mvs[z].x>>1) | (pMB->mvs[z].x&1));
 1213|  4.93k|			  uv_dy += ((pMB->mvs[z].y>>1) | (pMB->mvs[z].y&1));
 1214|  4.93k|			  b_uv_dx += ((pMB->b_mvs[z].x>>1) | (pMB->b_mvs[z].x&1));
 1215|  4.93k|			  b_uv_dy += ((pMB->b_mvs[z].y>>1) | (pMB->b_mvs[z].y&1));
 1216|  4.93k|			}
 1217|  1.23k|		}
 1218|   460k|		else {
 1219|   460k|			uv_dx = (pMB->mvs[0].x / 2) + (pMB->mvs[1].x / 2) + (pMB->mvs[2].x / 2) + (pMB->mvs[3].x / 2);
 1220|   460k|			uv_dy = (pMB->mvs[0].y / 2) + (pMB->mvs[1].y / 2) + (pMB->mvs[2].y / 2) + (pMB->mvs[3].y / 2);
 1221|   460k|			b_uv_dx = (pMB->b_mvs[0].x / 2) + (pMB->b_mvs[1].x / 2) + (pMB->b_mvs[2].x / 2) + (pMB->b_mvs[3].x / 2);
 1222|   460k|			b_uv_dy = (pMB->b_mvs[0].y / 2) + (pMB->b_mvs[1].y / 2) + (pMB->b_mvs[2].y / 2) + (pMB->b_mvs[3].y / 2);
 1223|   460k|		} 
 1224|   461k|	} else {
 1225|   181k|      uv_dx = pMB->mvs[0].x + pMB->mvs[1].x + pMB->mvs[2].x + pMB->mvs[3].x;
 1226|   181k|      uv_dy = pMB->mvs[0].y + pMB->mvs[1].y + pMB->mvs[2].y + pMB->mvs[3].y;
 1227|   181k|      b_uv_dx = pMB->b_mvs[0].x + pMB->b_mvs[1].x + pMB->b_mvs[2].x + pMB->b_mvs[3].x;
 1228|   181k|      b_uv_dy = pMB->b_mvs[0].y + pMB->b_mvs[1].y + pMB->b_mvs[2].y + pMB->b_mvs[3].y;
 1229|   181k|    }
 1230|       |
 1231|   643k|    uv_dx = (uv_dx >> 3) + roundtab_76[uv_dx & 0xf];
 1232|   643k|    uv_dy = (uv_dy >> 3) + roundtab_76[uv_dy & 0xf];
 1233|   643k|    b_uv_dx = (b_uv_dx >> 3) + roundtab_76[b_uv_dx & 0xf];
 1234|   643k|    b_uv_dy = (b_uv_dy >> 3) + roundtab_76[b_uv_dy & 0xf];
 1235|   643k|  }
 1236|       |
 1237|   756k|  start_timer();
 1238|   756k|  if(dec->quarterpel) {
  ------------------
  |  Branch (1238:6): [True: 565k, False: 191k]
  ------------------
 1239|   565k|    if(!direct) {
  ------------------
  |  Branch (1239:8): [True: 103k, False: 461k]
  ------------------
 1240|   103k|      interpolate16x16_quarterpel(dec->cur.y, forward.y, dec->qtmp.y, dec->qtmp.y + 64,
 1241|   103k|                    dec->qtmp.y + 128, 16*x_pos, 16*y_pos,
 1242|   103k|                    pMB->mvs[0].x, pMB->mvs[0].y, stride, 0);
 1243|   461k|    } else {
 1244|   461k|      interpolate8x8_quarterpel(dec->cur.y, forward.y, dec->qtmp.y, dec->qtmp.y + 64,
 1245|   461k|                    dec->qtmp.y + 128, 16*x_pos, 16*y_pos,
 1246|   461k|                    pMB->mvs[0].x, pMB->mvs[0].y, stride, 0);
 1247|   461k|      interpolate8x8_quarterpel(dec->cur.y, forward.y, dec->qtmp.y, dec->qtmp.y + 64,
 1248|   461k|                    dec->qtmp.y + 128, 16*x_pos + 8, 16*y_pos,
 1249|   461k|                    pMB->mvs[1].x, pMB->mvs[1].y, stride, 0);
 1250|   461k|      interpolate8x8_quarterpel(dec->cur.y, forward.y, dec->qtmp.y, dec->qtmp.y + 64,
 1251|   461k|                    dec->qtmp.y + 128, 16*x_pos, 16*y_pos + 8,
 1252|   461k|                    pMB->mvs[2].x, pMB->mvs[2].y, stride, 0);
 1253|   461k|      interpolate8x8_quarterpel(dec->cur.y, forward.y, dec->qtmp.y, dec->qtmp.y + 64,
 1254|   461k|                    dec->qtmp.y + 128, 16*x_pos + 8, 16*y_pos + 8,
 1255|   461k|                    pMB->mvs[3].x, pMB->mvs[3].y, stride, 0);
 1256|   461k|    }
 1257|   565k|  } else {
 1258|   191k|    interpolate8x8_switch(dec->cur.y, forward.y, 16 * x_pos, 16 * y_pos,
 1259|   191k|              pMB->mvs[0].x, pMB->mvs[0].y, stride, 0);
 1260|   191k|    interpolate8x8_switch(dec->cur.y, forward.y, 16 * x_pos + 8, 16 * y_pos,
 1261|   191k|              pMB->mvs[1].x, pMB->mvs[1].y, stride, 0);
 1262|   191k|    interpolate8x8_switch(dec->cur.y, forward.y, 16 * x_pos, 16 * y_pos + 8,
 1263|   191k|              pMB->mvs[2].x, pMB->mvs[2].y, stride, 0);
 1264|   191k|    interpolate8x8_switch(dec->cur.y, forward.y, 16 * x_pos + 8, 16 * y_pos + 8,
 1265|   191k|              pMB->mvs[3].x, pMB->mvs[3].y, stride, 0);
 1266|   191k|  }
 1267|       |
 1268|   756k|  interpolate8x8_switch(dec->cur.u, forward.u, 8 * x_pos, 8 * y_pos, uv_dx,
 1269|   756k|            uv_dy, stride2, 0);
 1270|   756k|  interpolate8x8_switch(dec->cur.v, forward.v, 8 * x_pos, 8 * y_pos, uv_dx,
 1271|   756k|            uv_dy, stride2, 0);
 1272|       |
 1273|       |
 1274|   756k|  if(dec->quarterpel) {
  ------------------
  |  Branch (1274:6): [True: 565k, False: 191k]
  ------------------
 1275|   565k|    if(!direct) {
  ------------------
  |  Branch (1275:8): [True: 103k, False: 461k]
  ------------------
 1276|   103k|      interpolate16x16_add_quarterpel(dec->cur.y, backward.y, dec->qtmp.y, dec->qtmp.y + 64,
 1277|   103k|          dec->qtmp.y + 128, 16*x_pos, 16*y_pos,
 1278|   103k|          pMB->b_mvs[0].x, pMB->b_mvs[0].y, stride, 0);
 1279|   461k|    } else {
 1280|   461k|      interpolate8x8_add_quarterpel(dec->cur.y, backward.y, dec->qtmp.y, dec->qtmp.y + 64,
 1281|   461k|          dec->qtmp.y + 128, 16*x_pos, 16*y_pos,
 1282|   461k|          pMB->b_mvs[0].x, pMB->b_mvs[0].y, stride, 0);
 1283|   461k|      interpolate8x8_add_quarterpel(dec->cur.y, backward.y, dec->qtmp.y, dec->qtmp.y + 64,
 1284|   461k|          dec->qtmp.y + 128, 16*x_pos + 8, 16*y_pos,
 1285|   461k|          pMB->b_mvs[1].x, pMB->b_mvs[1].y, stride, 0);
 1286|   461k|      interpolate8x8_add_quarterpel(dec->cur.y, backward.y, dec->qtmp.y, dec->qtmp.y + 64,
 1287|   461k|          dec->qtmp.y + 128, 16*x_pos, 16*y_pos + 8,
 1288|   461k|          pMB->b_mvs[2].x, pMB->b_mvs[2].y, stride, 0);
 1289|   461k|      interpolate8x8_add_quarterpel(dec->cur.y, backward.y, dec->qtmp.y, dec->qtmp.y + 64,
 1290|   461k|          dec->qtmp.y + 128, 16*x_pos + 8, 16*y_pos + 8,
 1291|   461k|          pMB->b_mvs[3].x, pMB->b_mvs[3].y, stride, 0);
 1292|   461k|    }
 1293|   565k|  } else {
 1294|   191k|    interpolate8x8_add_switch(dec->cur.y, backward.y, 16 * x_pos, 16 * y_pos,
 1295|   191k|        pMB->b_mvs[0].x, pMB->b_mvs[0].y, stride, 0);
 1296|   191k|    interpolate8x8_add_switch(dec->cur.y, backward.y, 16 * x_pos + 8,
 1297|   191k|        16 * y_pos, pMB->b_mvs[1].x, pMB->b_mvs[1].y, stride, 0);
 1298|   191k|    interpolate8x8_add_switch(dec->cur.y, backward.y, 16 * x_pos,
 1299|   191k|        16 * y_pos + 8, pMB->b_mvs[2].x, pMB->b_mvs[2].y, stride, 0);
 1300|   191k|    interpolate8x8_add_switch(dec->cur.y, backward.y, 16 * x_pos + 8,
 1301|   191k|        16 * y_pos + 8, pMB->b_mvs[3].x, pMB->b_mvs[3].y, stride, 0);
 1302|   191k|  }
 1303|       |
 1304|   756k|  interpolate8x8_add_switch(dec->cur.u, backward.u, 8 * x_pos, 8 * y_pos,
 1305|   756k|      b_uv_dx, b_uv_dy, stride2, 0);
 1306|   756k|  interpolate8x8_add_switch(dec->cur.v, backward.v, 8 * x_pos, 8 * y_pos,
 1307|   756k|      b_uv_dx, b_uv_dy, stride2, 0);
 1308|       |
 1309|   756k|  stop_comp_timer();
 1310|       |
 1311|   756k|  if (cbp)
  ------------------
  |  Branch (1311:7): [True: 80.5k, False: 675k]
  ------------------
 1312|  80.5k|    decoder_mb_decode(dec, cbp, bs, pY_Cur, pU_Cur, pV_Cur, pMB);
 1313|   756k|}

decoder.c:coding2type:
   64|  6.15k|{
   65|  6.15k|	return coding_type + 1;
   66|  6.15k|}
decoder.c:get_dc_scaler:
  233|   241M|{
  234|   241M|	if (quant < 5)
  ------------------
  |  Branch (234:6): [True: 72.8M, False: 168M]
  ------------------
  235|  72.8M|		return 8;
  236|       |
  237|   168M|	if (quant < 25 && !lum)
  ------------------
  |  Branch (237:6): [True: 86.4M, False: 82.0M]
  |  Branch (237:20): [True: 28.8M, False: 57.6M]
  ------------------
  238|  28.8M|		return (quant + 13) / 2;
  239|       |
  240|   139M|	if (quant < 9)
  ------------------
  |  Branch (240:6): [True: 20.4M, False: 119M]
  ------------------
  241|  20.4M|		return 2 * quant;
  242|       |
  243|   119M|	if (quant < 25)
  ------------------
  |  Branch (243:6): [True: 37.2M, False: 82.0M]
  ------------------
  244|  37.2M|		return quant + 8;
  245|       |
  246|  82.0M|	if (lum)
  ------------------
  |  Branch (246:6): [True: 54.7M, False: 27.3M]
  ------------------
  247|  54.7M|		return 2 * quant - 16;
  248|  27.3M|	else
  249|  27.3M|		return quant - 6;
  250|  82.0M|}

yv12_to_bgr_c:
   98|  3.98k|				 int width, int height, int vflip)	\
   99|  3.98k|{	\
  100|  3.98k|	int fixed_width = (width + 1) & ~1;				\
  101|  3.98k|	int x_dif = x_stride - (SIZE)*fixed_width;		\
  102|  3.98k|	int y_dif = y_stride - fixed_width;				\
  103|  3.98k|	int uv_dif = uv_stride - (fixed_width / 2);		\
  104|  3.98k|	int x, y;										\
  105|  3.98k|	if ((x_ptr == NULL) || (x_dif < 0)) return;		\
  ------------------
  |  Branch (105:6): [True: 0, False: 3.98k]
  |  Branch (105:25): [True: 0, False: 3.98k]
  ------------------
  106|  3.98k|	if (vflip) {								\
  ------------------
  |  Branch (106:6): [True: 0, False: 3.98k]
  ------------------
  107|      0|		x_ptr += (height - 1) * x_stride;			\
  108|      0|		x_dif = -(SIZE)*fixed_width - x_stride;		\
  109|      0|		x_stride = -x_stride;						\
  110|      0|	}												\
  111|   927k|	for (y = 0; y < height; y+=(VPIXELS)) {			\
  ------------------
  |  Branch (111:14): [True: 923k, False: 3.98k]
  ------------------
  112|   923k|		FUNC##_ROW(SIZE,C1,C2,C3,C4);				\
  113|   476M|		for (x = 0; x < fixed_width; x+=(PIXELS)) {	\
  ------------------
  |  Branch (113:15): [True: 475M, False: 923k]
  ------------------
  114|   475M|			FUNC(SIZE,C1,C2,C3,C4);				\
  ------------------
  |  |  450|   475M|MAKE_COLORSPACE(yv12_to_bgr_c,     3,2,2, YV12_TO_RGB,    2,1,0,0)
  |  |  ------------------
  |  |  |  |  405|   475M|	int rgb_y;												\
  |  |  |  |  406|   475M|	int b_u0 = B_U_tab[ u_ptr[0] ];							\
  |  |  |  |  407|   475M|	int g_uv0 = G_U_tab[ u_ptr[0] ] + G_V_tab[ v_ptr[0] ];	\
  |  |  |  |  408|   475M|	int r_v0 = R_V_tab[ v_ptr[0] ];							\
  |  |  |  |  409|   475M|	WRITE_RGB(SIZE, 0, 0, C1,C2,C3,C4)						\
  |  |  |  |  ------------------
  |  |  |  |  |  |  391|   475M|	rgb_y = RGB_Y_tab[ y_ptr[(ROW)*y_stride + 0] ];						\
  |  |  |  |  |  |  392|   475M|	x_ptr[(ROW)*x_stride+(C3)] = MAX(0, MIN(255, (rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|   951M|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 326M, False: 149M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 4.74M, False: 471M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 4.74M, False: 144M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  393|   475M|	x_ptr[(ROW)*x_stride+(C2)] = MAX(0, MIN(255, (rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|   951M|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 17.7M, False: 458M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 4.78M, False: 470M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 4.78M, False: 453M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  394|   475M|	x_ptr[(ROW)*x_stride+(C1)] = MAX(0, MIN(255, (rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|   951M|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 326M, False: 149M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 4.72M, False: 471M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 4.72M, False: 144M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  395|   475M|	if ((SIZE)>3) x_ptr[(ROW)*x_stride+(C4)] = 0;									\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (395:6): [Folded - Ignored]
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  396|   475M|	rgb_y = RGB_Y_tab[ y_ptr[(ROW)*y_stride + 1] ];									\
  |  |  |  |  |  |  397|   475M|	x_ptr[(ROW)*x_stride+(SIZE)+(C3)] = MAX(0, MIN(255, (rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|   951M|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 326M, False: 149M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 4.74M, False: 471M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 4.74M, False: 144M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  398|   475M|	x_ptr[(ROW)*x_stride+(SIZE)+(C2)] = MAX(0, MIN(255, (rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|   951M|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 17.7M, False: 458M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 4.78M, False: 470M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 4.78M, False: 453M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  399|   475M|	x_ptr[(ROW)*x_stride+(SIZE)+(C1)] = MAX(0, MIN(255, (rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|   951M|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 326M, False: 149M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 4.73M, False: 471M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 4.73M, False: 144M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  400|   475M|	if ((SIZE)>3) x_ptr[(ROW)*x_stride+(SIZE)+(C4)] = 0;
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (400:6): [Folded - Ignored]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  410|   475M|	WRITE_RGB(SIZE, 1, 0, C1,C2,C3,C4)
  |  |  |  |  ------------------
  |  |  |  |  |  |  391|   475M|	rgb_y = RGB_Y_tab[ y_ptr[(ROW)*y_stride + 0] ];						\
  |  |  |  |  |  |  392|   475M|	x_ptr[(ROW)*x_stride+(C3)] = MAX(0, MIN(255, (rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|   951M|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 326M, False: 149M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 4.74M, False: 471M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 4.74M, False: 144M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  393|   475M|	x_ptr[(ROW)*x_stride+(C2)] = MAX(0, MIN(255, (rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|   951M|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 17.7M, False: 458M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 4.78M, False: 470M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 4.78M, False: 453M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  394|   475M|	x_ptr[(ROW)*x_stride+(C1)] = MAX(0, MIN(255, (rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|   951M|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 326M, False: 149M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 4.72M, False: 471M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 4.72M, False: 144M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  395|   475M|	if ((SIZE)>3) x_ptr[(ROW)*x_stride+(C4)] = 0;									\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (395:6): [Folded - Ignored]
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  396|   475M|	rgb_y = RGB_Y_tab[ y_ptr[(ROW)*y_stride + 1] ];									\
  |  |  |  |  |  |  397|   475M|	x_ptr[(ROW)*x_stride+(SIZE)+(C3)] = MAX(0, MIN(255, (rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|   951M|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 326M, False: 149M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 4.74M, False: 471M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 4.74M, False: 144M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  398|   475M|	x_ptr[(ROW)*x_stride+(SIZE)+(C2)] = MAX(0, MIN(255, (rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|   951M|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 17.7M, False: 458M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 4.78M, False: 470M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 4.78M, False: 453M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  399|   475M|	x_ptr[(ROW)*x_stride+(SIZE)+(C1)] = MAX(0, MIN(255, (rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|   951M|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 326M, False: 149M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 4.72M, False: 471M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 4.72M, False: 144M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  400|   475M|	if ((SIZE)>3) x_ptr[(ROW)*x_stride+(SIZE)+(C4)] = 0;
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (400:6): [Folded - Ignored]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  115|   475M|			x_ptr += (PIXELS)*(SIZE);				\
  116|   475M|			y_ptr += (PIXELS);						\
  117|   475M|			u_ptr += (PIXELS)/2;					\
  118|   475M|			v_ptr += (PIXELS)/2;					\
  119|   475M|		}											\
  120|   923k|		x_ptr += x_dif + (VPIXELS-1)*x_stride;		\
  121|   923k|		y_ptr += y_dif + (VPIXELS-1)*y_stride;		\
  122|   923k|		u_ptr += uv_dif + ((VPIXELS/2)-1)*uv_stride;	\
  123|   923k|		v_ptr += uv_dif + ((VPIXELS/2)-1)*uv_stride;	\
  124|   923k|	}												\
  125|  3.98k|}
yv12_to_bgri_c:
   98|  1.82k|				 int width, int height, int vflip)	\
   99|  1.82k|{	\
  100|  1.82k|	int fixed_width = (width + 1) & ~1;				\
  101|  1.82k|	int x_dif = x_stride - (SIZE)*fixed_width;		\
  102|  1.82k|	int y_dif = y_stride - fixed_width;				\
  103|  1.82k|	int uv_dif = uv_stride - (fixed_width / 2);		\
  104|  1.82k|	int x, y;										\
  105|  1.82k|	if ((x_ptr == NULL) || (x_dif < 0)) return;		\
  ------------------
  |  Branch (105:6): [True: 0, False: 1.82k]
  |  Branch (105:25): [True: 0, False: 1.82k]
  ------------------
  106|  1.82k|	if (vflip) {								\
  ------------------
  |  Branch (106:6): [True: 0, False: 1.82k]
  ------------------
  107|      0|		x_ptr += (height - 1) * x_stride;			\
  108|      0|		x_dif = -(SIZE)*fixed_width - x_stride;		\
  109|      0|		x_stride = -x_stride;						\
  110|      0|	}												\
  111|  1.89M|	for (y = 0; y < height; y+=(VPIXELS)) {			\
  ------------------
  |  Branch (111:14): [True: 1.89M, False: 1.82k]
  ------------------
  112|  1.89M|		FUNC##_ROW(SIZE,C1,C2,C3,C4);				\
  113|   964M|		for (x = 0; x < fixed_width; x+=(PIXELS)) {	\
  ------------------
  |  Branch (113:15): [True: 962M, False: 1.89M]
  ------------------
  114|   962M|			FUNC(SIZE,C1,C2,C3,C4);				\
  ------------------
  |  |  461|   962M|MAKE_COLORSPACE(yv12_to_bgri_c,    3,2,4, YV12_TO_RGBI,   2,1,0, 0)
  |  |  ------------------
  |  |  |  |  414|   962M|	int rgb_y;												\
  |  |  |  |  415|   962M|	int b_u0 = B_U_tab[ u_ptr[0] ];							\
  |  |  |  |  416|   962M|	int g_uv0 = G_U_tab[ u_ptr[0] ] + G_V_tab[ v_ptr[0] ];	\
  |  |  |  |  417|   962M|	int r_v0 = R_V_tab[ v_ptr[0] ];							\
  |  |  |  |  418|   962M|    int b_u1 = B_U_tab[ u_ptr[uv_stride] ];					\
  |  |  |  |  419|   962M|	int g_uv1 = G_U_tab[ u_ptr[uv_stride] ] + G_V_tab[ v_ptr[uv_stride] ];	\
  |  |  |  |  420|   962M|	int r_v1 = R_V_tab[ v_ptr[uv_stride] ];					\
  |  |  |  |  421|   962M|	WRITE_RGB(SIZE, 0, 0, C1,C2,C3,C4)		\
  |  |  |  |  ------------------
  |  |  |  |  |  |  391|   962M|	rgb_y = RGB_Y_tab[ y_ptr[(ROW)*y_stride + 0] ];						\
  |  |  |  |  |  |  392|   962M|	x_ptr[(ROW)*x_stride+(C3)] = MAX(0, MIN(255, (rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.92G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 622M, False: 340M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 387k, False: 961M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 387k, False: 339M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  393|   962M|	x_ptr[(ROW)*x_stride+(C2)] = MAX(0, MIN(255, (rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.92G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 21.3M, False: 940M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 638k, False: 961M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 638k, False: 940M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  394|   962M|	x_ptr[(ROW)*x_stride+(C1)] = MAX(0, MIN(255, (rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.92G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 621M, False: 340M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 389k, False: 961M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 389k, False: 339M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  395|   962M|	if ((SIZE)>3) x_ptr[(ROW)*x_stride+(C4)] = 0;									\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (395:6): [Folded - Ignored]
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  396|   962M|	rgb_y = RGB_Y_tab[ y_ptr[(ROW)*y_stride + 1] ];									\
  |  |  |  |  |  |  397|   962M|	x_ptr[(ROW)*x_stride+(SIZE)+(C3)] = MAX(0, MIN(255, (rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.92G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 622M, False: 340M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 392k, False: 961M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 392k, False: 339M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  398|   962M|	x_ptr[(ROW)*x_stride+(SIZE)+(C2)] = MAX(0, MIN(255, (rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.92G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 21.3M, False: 940M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 658k, False: 961M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 658k, False: 940M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  399|   962M|	x_ptr[(ROW)*x_stride+(SIZE)+(C1)] = MAX(0, MIN(255, (rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.92G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 621M, False: 340M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 394k, False: 961M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 394k, False: 339M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  400|   962M|	if ((SIZE)>3) x_ptr[(ROW)*x_stride+(SIZE)+(C4)] = 0;
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (400:6): [Folded - Ignored]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  422|   962M|	WRITE_RGB(SIZE, 1, 1, C1,C2,C3,C4)		\
  |  |  |  |  ------------------
  |  |  |  |  |  |  391|   962M|	rgb_y = RGB_Y_tab[ y_ptr[(ROW)*y_stride + 0] ];						\
  |  |  |  |  |  |  392|   962M|	x_ptr[(ROW)*x_stride+(C3)] = MAX(0, MIN(255, (rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.92G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 622M, False: 340M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 394k, False: 961M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 394k, False: 339M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  393|   962M|	x_ptr[(ROW)*x_stride+(C2)] = MAX(0, MIN(255, (rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.92G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 21.3M, False: 940M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 635k, False: 961M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 635k, False: 940M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  394|   962M|	x_ptr[(ROW)*x_stride+(C1)] = MAX(0, MIN(255, (rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.92G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 621M, False: 340M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 397k, False: 961M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 397k, False: 339M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  395|   962M|	if ((SIZE)>3) x_ptr[(ROW)*x_stride+(C4)] = 0;									\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (395:6): [Folded - Ignored]
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  396|   962M|	rgb_y = RGB_Y_tab[ y_ptr[(ROW)*y_stride + 1] ];									\
  |  |  |  |  |  |  397|   962M|	x_ptr[(ROW)*x_stride+(SIZE)+(C3)] = MAX(0, MIN(255, (rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.92G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 622M, False: 340M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 393k, False: 961M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 393k, False: 339M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  398|   962M|	x_ptr[(ROW)*x_stride+(SIZE)+(C2)] = MAX(0, MIN(255, (rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.92G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 21.3M, False: 940M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 639k, False: 961M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 639k, False: 940M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  399|   962M|	x_ptr[(ROW)*x_stride+(SIZE)+(C1)] = MAX(0, MIN(255, (rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.92G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 621M, False: 340M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 395k, False: 961M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 395k, False: 339M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  400|   962M|	if ((SIZE)>3) x_ptr[(ROW)*x_stride+(SIZE)+(C4)] = 0;
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (400:6): [Folded - Ignored]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  423|   962M|	WRITE_RGB(SIZE, 2, 0, C1,C2,C3,C4)		\
  |  |  |  |  ------------------
  |  |  |  |  |  |  391|   962M|	rgb_y = RGB_Y_tab[ y_ptr[(ROW)*y_stride + 0] ];						\
  |  |  |  |  |  |  392|   962M|	x_ptr[(ROW)*x_stride+(C3)] = MAX(0, MIN(255, (rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.92G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 622M, False: 340M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 408k, False: 961M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 408k, False: 339M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  393|   962M|	x_ptr[(ROW)*x_stride+(C2)] = MAX(0, MIN(255, (rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.92G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 21.3M, False: 940M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 649k, False: 961M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 649k, False: 940M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  394|   962M|	x_ptr[(ROW)*x_stride+(C1)] = MAX(0, MIN(255, (rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.92G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 621M, False: 340M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 411k, False: 961M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 411k, False: 339M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  395|   962M|	if ((SIZE)>3) x_ptr[(ROW)*x_stride+(C4)] = 0;									\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (395:6): [Folded - Ignored]
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  396|   962M|	rgb_y = RGB_Y_tab[ y_ptr[(ROW)*y_stride + 1] ];									\
  |  |  |  |  |  |  397|   962M|	x_ptr[(ROW)*x_stride+(SIZE)+(C3)] = MAX(0, MIN(255, (rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.92G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 622M, False: 340M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 404k, False: 961M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 404k, False: 339M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  398|   962M|	x_ptr[(ROW)*x_stride+(SIZE)+(C2)] = MAX(0, MIN(255, (rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.92G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 21.3M, False: 940M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 645k, False: 961M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 645k, False: 940M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  399|   962M|	x_ptr[(ROW)*x_stride+(SIZE)+(C1)] = MAX(0, MIN(255, (rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.92G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 621M, False: 340M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 409k, False: 961M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 409k, False: 339M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  400|   962M|	if ((SIZE)>3) x_ptr[(ROW)*x_stride+(SIZE)+(C4)] = 0;
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (400:6): [Folded - Ignored]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  424|   962M|	WRITE_RGB(SIZE, 3, 1, C1,C2,C3,C4)
  |  |  |  |  ------------------
  |  |  |  |  |  |  391|   962M|	rgb_y = RGB_Y_tab[ y_ptr[(ROW)*y_stride + 0] ];						\
  |  |  |  |  |  |  392|   962M|	x_ptr[(ROW)*x_stride+(C3)] = MAX(0, MIN(255, (rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.92G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 622M, False: 340M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 413k, False: 961M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 413k, False: 339M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  393|   962M|	x_ptr[(ROW)*x_stride+(C2)] = MAX(0, MIN(255, (rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.92G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 21.3M, False: 940M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 652k, False: 961M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 652k, False: 940M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  394|   962M|	x_ptr[(ROW)*x_stride+(C1)] = MAX(0, MIN(255, (rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.92G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 621M, False: 340M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 416k, False: 961M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 416k, False: 339M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  395|   962M|	if ((SIZE)>3) x_ptr[(ROW)*x_stride+(C4)] = 0;									\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (395:6): [Folded - Ignored]
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  396|   962M|	rgb_y = RGB_Y_tab[ y_ptr[(ROW)*y_stride + 1] ];									\
  |  |  |  |  |  |  397|   962M|	x_ptr[(ROW)*x_stride+(SIZE)+(C3)] = MAX(0, MIN(255, (rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.92G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 621M, False: 340M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 401k, False: 961M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 401k, False: 339M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  398|   962M|	x_ptr[(ROW)*x_stride+(SIZE)+(C2)] = MAX(0, MIN(255, (rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.92G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 21.2M, False: 940M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 651k, False: 961M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 651k, False: 940M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  399|   962M|	x_ptr[(ROW)*x_stride+(SIZE)+(C1)] = MAX(0, MIN(255, (rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.92G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 621M, False: 340M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 406k, False: 961M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 406k, False: 339M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  400|   962M|	if ((SIZE)>3) x_ptr[(ROW)*x_stride+(SIZE)+(C4)] = 0;
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (400:6): [Folded - Ignored]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  115|   962M|			x_ptr += (PIXELS)*(SIZE);				\
  116|   962M|			y_ptr += (PIXELS);						\
  117|   962M|			u_ptr += (PIXELS)/2;					\
  118|   962M|			v_ptr += (PIXELS)/2;					\
  119|   962M|		}											\
  120|  1.89M|		x_ptr += x_dif + (VPIXELS-1)*x_stride;		\
  121|  1.89M|		y_ptr += y_dif + (VPIXELS-1)*y_stride;		\
  122|  1.89M|		u_ptr += uv_dif + ((VPIXELS/2)-1)*uv_stride;	\
  123|  1.89M|		v_ptr += uv_dif + ((VPIXELS/2)-1)*uv_stride;	\
  124|  1.89M|	}												\
  125|  1.82k|}
colorspace_init:
  528|      2|{
  529|      2|	int32_t i;
  530|       |
  531|    514|	for (i = 0; i < 256; i++) {
  ------------------
  |  Branch (531:14): [True: 512, False: 2]
  ------------------
  532|    512|		RGB_Y_tab[i] = FIX_OUT(RGB_Y_OUT) * (i - Y_ADD_OUT);
  ------------------
  |  |  331|    512|#define FIX_OUT(x)		((uint16_t) ((x) * (1L<<SCALEBITS_OUT) + 0.5))
  |  |  ------------------
  |  |  |  |  330|    512|#define SCALEBITS_OUT		13
  |  |  ------------------
  ------------------
              		RGB_Y_tab[i] = FIX_OUT(RGB_Y_OUT) * (i - Y_ADD_OUT);
  ------------------
  |  |  321|    512|#define Y_ADD_OUT		16
  ------------------
  533|    512|		B_U_tab[i] = FIX_OUT(B_U_OUT) * (i - U_ADD_OUT);
  ------------------
  |  |  331|    512|#define FIX_OUT(x)		((uint16_t) ((x) * (1L<<SCALEBITS_OUT) + 0.5))
  |  |  ------------------
  |  |  |  |  330|    512|#define SCALEBITS_OUT		13
  |  |  ------------------
  ------------------
              		B_U_tab[i] = FIX_OUT(B_U_OUT) * (i - U_ADD_OUT);
  ------------------
  |  |  325|    512|#define U_ADD_OUT		128
  ------------------
  534|    512|		G_U_tab[i] = FIX_OUT(G_U_OUT) * (i - U_ADD_OUT);
  ------------------
  |  |  331|    512|#define FIX_OUT(x)		((uint16_t) ((x) * (1L<<SCALEBITS_OUT) + 0.5))
  |  |  ------------------
  |  |  |  |  330|    512|#define SCALEBITS_OUT		13
  |  |  ------------------
  ------------------
              		G_U_tab[i] = FIX_OUT(G_U_OUT) * (i - U_ADD_OUT);
  ------------------
  |  |  325|    512|#define U_ADD_OUT		128
  ------------------
  535|    512|		G_V_tab[i] = FIX_OUT(G_V_OUT) * (i - V_ADD_OUT);
  ------------------
  |  |  331|    512|#define FIX_OUT(x)		((uint16_t) ((x) * (1L<<SCALEBITS_OUT) + 0.5))
  |  |  ------------------
  |  |  |  |  330|    512|#define SCALEBITS_OUT		13
  |  |  ------------------
  ------------------
              		G_V_tab[i] = FIX_OUT(G_V_OUT) * (i - V_ADD_OUT);
  ------------------
  |  |  328|    512|#define V_ADD_OUT		128
  ------------------
  536|    512|		R_V_tab[i] = FIX_OUT(R_V_OUT) * (i - V_ADD_OUT);
  ------------------
  |  |  331|    512|#define FIX_OUT(x)		((uint16_t) ((x) * (1L<<SCALEBITS_OUT) + 0.5))
  |  |  ------------------
  |  |  |  |  330|    512|#define SCALEBITS_OUT		13
  |  |  ------------------
  ------------------
              		R_V_tab[i] = FIX_OUT(R_V_OUT) * (i - V_ADD_OUT);
  ------------------
  |  |  328|    512|#define V_ADD_OUT		128
  ------------------
  537|    512|	}
  538|      2|}

image_printf:
  587|  4.69k|{
  588|  4.69k|	va_list args;
  589|  4.69k|	char buf[FONT_BUF_SZ];
  590|  4.69k|	int i;
  591|       |
  592|  4.69k|	va_start(args, fmt);
  593|  4.69k|	vsprintf(buf, fmt, args);
  594|  4.69k|	va_end(args);
  595|       |
  596|   148k|	for (i = 0; i < buf[i]; i++) {
  ------------------
  |  Branch (596:14): [True: 143k, False: 4.69k]
  ------------------
  597|   143k|		const char * font;
  598|       |
  599|   143k|		if (buf[i] >= '!' && buf[i] <= '@')
  ------------------
  |  Branch (599:7): [True: 127k, False: 16.6k]
  |  Branch (599:24): [True: 19.6k, False: 107k]
  ------------------
  600|  19.6k|			font = ascii33[buf[i]-'!'];
  601|   124k|		else if (buf[i] >= 'A' && buf[i] <= 'Z')
  ------------------
  |  Branch (601:12): [True: 107k, False: 16.6k]
  |  Branch (601:29): [True: 0, False: 107k]
  ------------------
  602|      0|			font = ascii65[buf[i]-'A'];
  603|   124k|		else if (buf[i] >= '[' && buf[i] <= '`')
  ------------------
  |  Branch (603:12): [True: 107k, False: 16.6k]
  |  Branch (603:29): [True: 0, False: 107k]
  ------------------
  604|      0|			font = ascii91[buf[i]-'['];
  605|   124k|		else if (buf[i] >= 'a' && buf[i] <= 'z')
  ------------------
  |  Branch (605:12): [True: 107k, False: 16.6k]
  |  Branch (605:29): [True: 107k, False: 0]
  ------------------
  606|   107k|			font = ascii65[buf[i]-'a'];
  607|  16.6k|		else
  608|  16.6k|			continue;
  609|       |
  610|   127k|		draw_num(img, edged_width, height, font, x + i*FONT_ZOOM*(FONT_WIDTH+1), y);
  ------------------
  |  |  562|   127k|#define FONT_ZOOM	4
  ------------------
              		draw_num(img, edged_width, height, font, x + i*FONT_ZOOM*(FONT_WIDTH+1), y);
  ------------------
  |  |   32|   127k|#define FONT_WIDTH	4
  ------------------
  611|   127k|	}
  612|  4.69k|}
font.c:draw_num:
  567|   127k|{
  568|   127k|	int i, j;
  569|       |
  570|  1.52M|	for (j = 0; j < FONT_ZOOM * FONT_HEIGHT && y+j < height; j++)
  ------------------
  |  |  562|  1.52M|#define FONT_ZOOM	4
  ------------------
              	for (j = 0; j < FONT_ZOOM * FONT_HEIGHT && y+j < height; j++)
  ------------------
  |  |   33|  3.05M|#define FONT_HEIGHT	6
  ------------------
  |  Branch (570:14): [True: 1.47M, False: 54.0k]
  |  Branch (570:45): [True: 1.40M, False: 73.0k]
  ------------------
  571|  11.2M|		for (i = 0; i < FONT_ZOOM * FONT_WIDTH && x+i < stride; i++)
  ------------------
  |  |  562|  11.2M|#define FONT_ZOOM	4
  ------------------
              		for (i = 0; i < FONT_ZOOM * FONT_WIDTH && x+i < stride; i++)
  ------------------
  |  |   32|  22.5M|#define FONT_WIDTH	4
  ------------------
  |  Branch (571:15): [True: 10.6M, False: 611k]
  |  Branch (571:45): [True: 9.87M, False: 790k]
  ------------------
  572|  9.87M|			if (font[(j/FONT_ZOOM)*FONT_WIDTH + (i/FONT_ZOOM)])
  ------------------
  |  |  562|  9.87M|#define FONT_ZOOM	4
  ------------------
              			if (font[(j/FONT_ZOOM)*FONT_WIDTH + (i/FONT_ZOOM)])
  ------------------
  |  |   32|  9.87M|#define FONT_WIDTH	4
  ------------------
              			if (font[(j/FONT_ZOOM)*FONT_WIDTH + (i/FONT_ZOOM)])
  ------------------
  |  |  562|  9.87M|#define FONT_ZOOM	4
  ------------------
  |  Branch (572:8): [True: 5.35M, False: 4.51M]
  ------------------
  573|  5.35M|			{
  574|  5.35M|				int offset = (y+j)*stride + (x+i);
  575|  5.35M|				int offset2 =((y+j)/2)*(stride/2) + ((x+i)/2);
  576|  5.35M|				img->y[offset] = 255;
  577|  5.35M|				img->u[offset2] = 127;
  578|  5.35M|				img->v[offset2] = 127;
  579|  5.35M|			}
  580|   127k|}

image_create:
   49|   141k|{
   50|   141k|	const uint32_t edged_width2 = edged_width / 2;
   51|   141k|	const uint32_t edged_height2 = edged_height / 2;
   52|       |
   53|   141k|	image->y =
   54|   141k|		xvid_malloc(edged_width * (edged_height + 1) + SAFETY, CACHE_LINE);
  ------------------
  |  |   41|   141k|#define SAFETY	64
  ------------------
              		xvid_malloc(edged_width * (edged_height + 1) + SAFETY, CACHE_LINE);
  ------------------
  |  |  131|   141k|#    define CACHE_LINE  64
  ------------------
   55|   141k|	if (image->y == NULL) {
  ------------------
  |  Branch (55:6): [True: 0, False: 141k]
  ------------------
   56|      0|		return -1;
   57|      0|	}
   58|   141k|	memset(image->y, 0, edged_width * (edged_height + 1) + SAFETY);
  ------------------
  |  |   41|   141k|#define SAFETY	64
  ------------------
   59|       |
   60|   141k|	image->u = xvid_malloc(edged_width2 * edged_height2 + SAFETY, CACHE_LINE);
  ------------------
  |  |   41|   141k|#define SAFETY	64
  ------------------
              	image->u = xvid_malloc(edged_width2 * edged_height2 + SAFETY, CACHE_LINE);
  ------------------
  |  |  131|   141k|#    define CACHE_LINE  64
  ------------------
   61|   141k|	if (image->u == NULL) {
  ------------------
  |  Branch (61:6): [True: 0, False: 141k]
  ------------------
   62|      0|		xvid_free(image->y);
   63|      0|		image->y = NULL;
   64|      0|		return -1;
   65|      0|	}
   66|   141k|	memset(image->u, 0, edged_width2 * edged_height2 + SAFETY);
  ------------------
  |  |   41|   141k|#define SAFETY	64
  ------------------
   67|       |
   68|   141k|	image->v = xvid_malloc(edged_width2 * edged_height2 + SAFETY, CACHE_LINE);
  ------------------
  |  |   41|   141k|#define SAFETY	64
  ------------------
              	image->v = xvid_malloc(edged_width2 * edged_height2 + SAFETY, CACHE_LINE);
  ------------------
  |  |  131|   141k|#    define CACHE_LINE  64
  ------------------
   69|   141k|	if (image->v == NULL) {
  ------------------
  |  Branch (69:6): [True: 0, False: 141k]
  ------------------
   70|      0|		xvid_free(image->u);
   71|      0|		image->u = NULL;
   72|      0|		xvid_free(image->y);
   73|      0|		image->y = NULL;
   74|      0|		return -1;
   75|      0|	}
   76|   141k|	memset(image->v, 0, edged_width2 * edged_height2 + SAFETY);
  ------------------
  |  |   41|   141k|#define SAFETY	64
  ------------------
   77|       |
   78|   141k|	image->y += EDGE_SIZE * edged_width + EDGE_SIZE;
  ------------------
  |  |   36|   141k|#define EDGE_SIZE  64
  ------------------
              	image->y += EDGE_SIZE * edged_width + EDGE_SIZE;
  ------------------
  |  |   36|   141k|#define EDGE_SIZE  64
  ------------------
   79|   141k|	image->u += EDGE_SIZE2 * edged_width2 + EDGE_SIZE2;
  ------------------
  |  |   42|   141k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   141k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
              	image->u += EDGE_SIZE2 * edged_width2 + EDGE_SIZE2;
  ------------------
  |  |   42|   141k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   141k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
   80|   141k|	image->v += EDGE_SIZE2 * edged_width2 + EDGE_SIZE2;
  ------------------
  |  |   42|   141k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   141k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
              	image->v += EDGE_SIZE2 * edged_width2 + EDGE_SIZE2;
  ------------------
  |  |   42|   141k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   141k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
   81|       |
   82|   141k|	return 0;
   83|   141k|}
image_destroy:
   91|   204k|{
   92|   204k|	const uint32_t edged_width2 = edged_width / 2;
   93|       |
   94|   204k|	if (image->y) {
  ------------------
  |  Branch (94:6): [True: 141k, False: 63.0k]
  ------------------
   95|   141k|		xvid_free(image->y - (EDGE_SIZE * edged_width + EDGE_SIZE));
  ------------------
  |  |   36|   141k|#define EDGE_SIZE  64
  ------------------
              		xvid_free(image->y - (EDGE_SIZE * edged_width + EDGE_SIZE));
  ------------------
  |  |   36|   141k|#define EDGE_SIZE  64
  ------------------
   96|   141k|		image->y = NULL;
   97|   141k|	}
   98|   204k|	if (image->u) {
  ------------------
  |  Branch (98:6): [True: 141k, False: 63.0k]
  ------------------
   99|   141k|		xvid_free(image->u - (EDGE_SIZE2 * edged_width2 + EDGE_SIZE2));
  ------------------
  |  |   42|   141k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   141k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
              		xvid_free(image->u - (EDGE_SIZE2 * edged_width2 + EDGE_SIZE2));
  ------------------
  |  |   42|   141k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   141k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  100|   141k|		image->u = NULL;
  101|   141k|	}
  102|   204k|	if (image->v) {
  ------------------
  |  Branch (102:6): [True: 141k, False: 63.0k]
  ------------------
  103|   141k|		xvid_free(image->v - (EDGE_SIZE2 * edged_width2 + EDGE_SIZE2));
  ------------------
  |  |   42|   141k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   141k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
              		xvid_free(image->v - (EDGE_SIZE2 * edged_width2 + EDGE_SIZE2));
  ------------------
  |  |   42|   141k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   141k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  104|   141k|		image->v = NULL;
  105|   141k|	}
  106|   204k|}
image_swap:
  112|  21.2k|{
  113|  21.2k|    SWAP(uint8_t*, image1->y, image2->y);
  ------------------
  |  |  264|  21.2k|#define SWAP(_T_,A,B)    { _T_ tmp = A; A = B; B = tmp; }
  ------------------
  114|  21.2k|    SWAP(uint8_t*, image1->u, image2->u);
  ------------------
  |  |  264|  21.2k|#define SWAP(_T_,A,B)    { _T_ tmp = A; A = B; B = tmp; }
  ------------------
  115|  21.2k|    SWAP(uint8_t*, image1->v, image2->v);
  ------------------
  |  |  264|  21.2k|#define SWAP(_T_,A,B)    { _T_ tmp = A; A = B; B = tmp; }
  ------------------
  116|  21.2k|}
image_copy:
  124|     29|{
  125|     29|	memcpy(image1->y, image2->y, edged_width * height);
  126|     29|	memcpy(image1->u, image2->u, edged_width * height / 4);
  127|     29|	memcpy(image1->v, image2->v, edged_width * height / 4);
  128|     29|}
image_setedges:
  142|  8.28k|{
  143|  8.28k|	const uint32_t edged_width2 = edged_width / 2;
  144|  8.28k|	uint32_t width2;
  145|  8.28k|	uint32_t i;
  146|  8.28k|	uint8_t *dst;
  147|  8.28k|	uint8_t *src;
  148|       |
  149|  8.28k|	dst = image->y - (EDGE_SIZE + EDGE_SIZE * edged_width);
  ------------------
  |  |   36|  8.28k|#define EDGE_SIZE  64
  ------------------
              	dst = image->y - (EDGE_SIZE + EDGE_SIZE * edged_width);
  ------------------
  |  |   36|  8.28k|#define EDGE_SIZE  64
  ------------------
  150|  8.28k|	src = image->y;
  151|       |
  152|       |	/* According to the Standard Clause 7.6.4, padding is done starting at 16
  153|       |	 * pixel width and height multiples. This was not respected in old xvids */
  154|  8.28k|	if ((bs_version >= SETEDGES_BUG_BEFORE &&
  ------------------
  |  |  131|  16.5k|#define SETEDGES_BUG_BEFORE		18
  ------------------
  |  Branch (154:7): [True: 8.05k, False: 232]
  ------------------
  155|  8.28k|		bs_version <  SETEDGES_BUG_AFTER) || 
  ------------------
  |  |  132|  8.05k|#define SETEDGES_BUG_AFTER		57
  ------------------
  |  Branch (155:3): [True: 5, False: 8.05k]
  ------------------
  156|  8.28k|		bs_version >= SETEDGES_BUG_REFIXED) {
  ------------------
  |  |  133|  8.28k|#define SETEDGES_BUG_REFIXED		63
  ------------------
  |  Branch (156:3): [True: 8.05k, False: 232]
  ------------------
  157|  8.05k|		width  = (width+15)&~15;
  158|  8.05k|		height = (height+15)&~15;
  159|  8.05k|	}
  160|       |
  161|  8.28k|	width2 = MAX(1, width/2);
  ------------------
  |  |  258|  8.28k|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (258:20): [True: 36, False: 8.25k]
  |  |  ------------------
  ------------------
  162|       |
  163|   538k|	for (i = 0; i < EDGE_SIZE; i++) {
  ------------------
  |  |   36|   538k|#define EDGE_SIZE  64
  ------------------
  |  Branch (163:14): [True: 530k, False: 8.28k]
  ------------------
  164|   530k|		memset(dst, *src, EDGE_SIZE);
  ------------------
  |  |   36|   530k|#define EDGE_SIZE  64
  ------------------
  165|   530k|		memcpy(dst + EDGE_SIZE, src, width);
  ------------------
  |  |   36|   530k|#define EDGE_SIZE  64
  ------------------
  166|   530k|		memset(dst + edged_width - EDGE_SIZE, *(src + width - 1),
  ------------------
  |  |   36|   530k|#define EDGE_SIZE  64
  ------------------
  167|   530k|			   EDGE_SIZE);
  ------------------
  |  |   36|   530k|#define EDGE_SIZE  64
  ------------------
  168|   530k|		dst += edged_width;
  169|   530k|	}
  170|       |
  171|  10.4M|	for (i = 0; i < height; i++) {
  ------------------
  |  Branch (171:14): [True: 10.4M, False: 8.28k]
  ------------------
  172|  10.4M|		memset(dst, *src, EDGE_SIZE);
  ------------------
  |  |   36|  10.4M|#define EDGE_SIZE  64
  ------------------
  173|  10.4M|		memset(dst + edged_width - EDGE_SIZE, src[width - 1], EDGE_SIZE);
  ------------------
  |  |   36|  10.4M|#define EDGE_SIZE  64
  ------------------
              		memset(dst + edged_width - EDGE_SIZE, src[width - 1], EDGE_SIZE);
  ------------------
  |  |   36|  10.4M|#define EDGE_SIZE  64
  ------------------
  174|  10.4M|		dst += edged_width;
  175|  10.4M|		src += edged_width;
  176|  10.4M|	}
  177|       |
  178|  8.28k|	src -= edged_width;
  179|   538k|	for (i = 0; i < EDGE_SIZE; i++) {
  ------------------
  |  |   36|   538k|#define EDGE_SIZE  64
  ------------------
  |  Branch (179:14): [True: 530k, False: 8.28k]
  ------------------
  180|   530k|		memset(dst, *src, EDGE_SIZE);
  ------------------
  |  |   36|   530k|#define EDGE_SIZE  64
  ------------------
  181|   530k|		memcpy(dst + EDGE_SIZE, src, width);
  ------------------
  |  |   36|   530k|#define EDGE_SIZE  64
  ------------------
  182|   530k|		memset(dst + edged_width - EDGE_SIZE, *(src + width - 1),
  ------------------
  |  |   36|   530k|#define EDGE_SIZE  64
  ------------------
  183|   530k|				   EDGE_SIZE);
  ------------------
  |  |   36|   530k|#define EDGE_SIZE  64
  ------------------
  184|   530k|		dst += edged_width;
  185|   530k|	}
  186|       |
  187|       |
  188|       |	/* U */
  189|  8.28k|	dst = image->u - (EDGE_SIZE2 + EDGE_SIZE2 * edged_width2);
  ------------------
  |  |   42|  8.28k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|  8.28k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
              	dst = image->u - (EDGE_SIZE2 + EDGE_SIZE2 * edged_width2);
  ------------------
  |  |   42|  8.28k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|  8.28k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  190|  8.28k|	src = image->u;
  191|       |
  192|   273k|	for (i = 0; i < EDGE_SIZE2; i++) {
  ------------------
  |  |   42|   273k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   273k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  |  Branch (192:14): [True: 265k, False: 8.28k]
  ------------------
  193|   265k|		memset(dst, *src, EDGE_SIZE2);
  ------------------
  |  |   42|   265k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   265k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  194|   265k|		memcpy(dst + EDGE_SIZE2, src, width2);
  ------------------
  |  |   42|   265k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   265k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  195|   265k|		memset(dst + edged_width2 - EDGE_SIZE2, *(src + width2 - 1),
  ------------------
  |  |   42|   265k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   265k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  196|   265k|			   EDGE_SIZE2);
  ------------------
  |  |   42|   265k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   265k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  197|   265k|		dst += edged_width2;
  198|   265k|	}
  199|       |
  200|  5.23M|	for (i = 0; i < height / 2; i++) {
  ------------------
  |  Branch (200:14): [True: 5.22M, False: 8.28k]
  ------------------
  201|  5.22M|		memset(dst, *src, EDGE_SIZE2);
  ------------------
  |  |   42|  5.22M|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|  5.22M|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  202|  5.22M|		memset(dst + edged_width2 - EDGE_SIZE2, src[width2 - 1], EDGE_SIZE2);
  ------------------
  |  |   42|  5.22M|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|  5.22M|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
              		memset(dst + edged_width2 - EDGE_SIZE2, src[width2 - 1], EDGE_SIZE2);
  ------------------
  |  |   42|  5.22M|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|  5.22M|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  203|  5.22M|		dst += edged_width2;
  204|  5.22M|		src += edged_width2;
  205|  5.22M|	}
  206|  8.28k|	src -= edged_width2;
  207|   273k|	for (i = 0; i < EDGE_SIZE2; i++) {
  ------------------
  |  |   42|   273k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   273k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  |  Branch (207:14): [True: 265k, False: 8.28k]
  ------------------
  208|   265k|		memset(dst, *src, EDGE_SIZE2);
  ------------------
  |  |   42|   265k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   265k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  209|   265k|		memcpy(dst + EDGE_SIZE2, src, width2);
  ------------------
  |  |   42|   265k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   265k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  210|   265k|		memset(dst + edged_width2 - EDGE_SIZE2, *(src + width2 - 1),
  ------------------
  |  |   42|   265k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   265k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  211|   265k|			   EDGE_SIZE2);
  ------------------
  |  |   42|   265k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   265k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  212|   265k|		dst += edged_width2;
  213|   265k|	}
  214|       |
  215|       |
  216|       |	/* V */
  217|  8.28k|	dst = image->v - (EDGE_SIZE2 + EDGE_SIZE2 * edged_width2);
  ------------------
  |  |   42|  8.28k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|  8.28k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
              	dst = image->v - (EDGE_SIZE2 + EDGE_SIZE2 * edged_width2);
  ------------------
  |  |   42|  8.28k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|  8.28k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  218|  8.28k|	src = image->v;
  219|       |
  220|   273k|	for (i = 0; i < EDGE_SIZE2; i++) {
  ------------------
  |  |   42|   273k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   273k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  |  Branch (220:14): [True: 265k, False: 8.28k]
  ------------------
  221|   265k|		memset(dst, *src, EDGE_SIZE2);
  ------------------
  |  |   42|   265k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   265k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  222|   265k|		memcpy(dst + EDGE_SIZE2, src, width2);
  ------------------
  |  |   42|   265k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   265k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  223|   265k|		memset(dst + edged_width2 - EDGE_SIZE2, *(src + width2 - 1),
  ------------------
  |  |   42|   265k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   265k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  224|   265k|			   EDGE_SIZE2);
  ------------------
  |  |   42|   265k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   265k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  225|   265k|		dst += edged_width2;
  226|   265k|	}
  227|       |
  228|  5.23M|	for (i = 0; i < height / 2; i++) {
  ------------------
  |  Branch (228:14): [True: 5.22M, False: 8.28k]
  ------------------
  229|  5.22M|		memset(dst, *src, EDGE_SIZE2);
  ------------------
  |  |   42|  5.22M|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|  5.22M|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  230|  5.22M|		memset(dst + edged_width2 - EDGE_SIZE2, src[width2 - 1], EDGE_SIZE2);
  ------------------
  |  |   42|  5.22M|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|  5.22M|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
              		memset(dst + edged_width2 - EDGE_SIZE2, src[width2 - 1], EDGE_SIZE2);
  ------------------
  |  |   42|  5.22M|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|  5.22M|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  231|  5.22M|		dst += edged_width2;
  232|  5.22M|		src += edged_width2;
  233|  5.22M|	}
  234|  8.28k|	src -= edged_width2;
  235|   273k|	for (i = 0; i < EDGE_SIZE2; i++) {
  ------------------
  |  |   42|   273k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   273k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  |  Branch (235:14): [True: 265k, False: 8.28k]
  ------------------
  236|   265k|		memset(dst, *src, EDGE_SIZE2);
  ------------------
  |  |   42|   265k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   265k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  237|   265k|		memcpy(dst + EDGE_SIZE2, src, width2);
  ------------------
  |  |   42|   265k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   265k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  238|   265k|		memset(dst + edged_width2 - EDGE_SIZE2, *(src + width2 - 1),
  ------------------
  |  |   42|   265k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   265k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  239|   265k|			   EDGE_SIZE2);
  ------------------
  |  |   42|   265k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   265k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  240|   265k|		dst += edged_width2;
  241|   265k|	}
  242|  8.28k|}
image_output:
  629|  6.15k|{
  630|  6.15k|	const int edged_width2 = edged_width/2;
  631|  6.15k|	int height2 = height/2;
  632|       |
  633|       |/*
  634|       |	if (interlacing)
  635|       |		image_printf(image, edged_width, height, 5,100, "[i]=%i,%i",width,height);
  636|       |	image_dump_yuvpgm(image, edged_width, width, height, "\\decode.pgm");
  637|       |*/
  638|       |
  639|  6.15k|	switch (csp & ~XVID_CSP_VFLIP) {
  ------------------
  |  |  127|  6.15k|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  |  Branch (639:10): [True: 0, False: 6.15k]
  ------------------
  640|      0|	case XVID_CSP_RGB555:
  ------------------
  |  |  122|      0|#define XVID_CSP_RGB555   (1<<10) /* 16-bit rgb555 packed */
  ------------------
  |  Branch (640:2): [True: 0, False: 6.15k]
  ------------------
  641|      0|		safe_packed_conv(
  642|      0|			dst[0], dst_stride[0], image->y, image->u, image->v,
  643|      0|			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
  ------------------
  |  |  127|      0|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  644|      0|			interlacing?yv12_to_rgb555i  :yv12_to_rgb555,
  ------------------
  |  Branch (644:4): [True: 0, False: 0]
  ------------------
  645|      0|			interlacing?yv12_to_rgb555i_c:yv12_to_rgb555_c, 2, interlacing);
  ------------------
  |  Branch (645:4): [True: 0, False: 0]
  ------------------
  646|      0|		return 0;
  647|       |
  648|      0|	case XVID_CSP_RGB565:
  ------------------
  |  |  123|      0|#define XVID_CSP_RGB565   (1<<11) /* 16-bit rgb565 packed */
  ------------------
  |  Branch (648:2): [True: 0, False: 6.15k]
  ------------------
  649|      0|		safe_packed_conv(
  650|      0|			dst[0], dst_stride[0], image->y, image->u, image->v,
  651|      0|			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
  ------------------
  |  |  127|      0|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  652|      0|			interlacing?yv12_to_rgb565i  :yv12_to_rgb565,
  ------------------
  |  Branch (652:4): [True: 0, False: 0]
  ------------------
  653|      0|			interlacing?yv12_to_rgb565i_c:yv12_to_rgb565_c, 2, interlacing);
  ------------------
  |  Branch (653:4): [True: 0, False: 0]
  ------------------
  654|      0|		return 0;
  655|       |
  656|  6.15k|    case XVID_CSP_BGR:
  ------------------
  |  |  121|  6.15k|#define XVID_CSP_BGR      (1<< 9) /* 24-bit bgr packed */
  ------------------
  |  Branch (656:5): [True: 6.15k, False: 0]
  ------------------
  657|  6.15k|		safe_packed_conv(
  658|  6.15k|			dst[0], dst_stride[0], image->y, image->u, image->v,
  659|  6.15k|			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
  ------------------
  |  |  127|  6.15k|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  660|  6.15k|			interlacing?yv12_to_bgri  :yv12_to_bgr,
  ------------------
  |  Branch (660:4): [True: 1.94k, False: 4.21k]
  ------------------
  661|  6.15k|			interlacing?yv12_to_bgri_c:yv12_to_bgr_c, 3, interlacing);
  ------------------
  |  Branch (661:4): [True: 1.94k, False: 4.21k]
  ------------------
  662|  6.15k|		return 0;
  663|       |
  664|      0|	case XVID_CSP_BGRA:
  ------------------
  |  |  117|      0|#define XVID_CSP_BGRA     (1<< 6) /* 32-bit bgra packed */
  ------------------
  |  Branch (664:2): [True: 0, False: 6.15k]
  ------------------
  665|      0|		safe_packed_conv(
  666|      0|			dst[0], dst_stride[0], image->y, image->u, image->v,
  667|      0|			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
  ------------------
  |  |  127|      0|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  668|      0|			interlacing?yv12_to_bgrai  :yv12_to_bgra,
  ------------------
  |  Branch (668:4): [True: 0, False: 0]
  ------------------
  669|      0|			interlacing?yv12_to_bgrai_c:yv12_to_bgra_c, 4, interlacing);
  ------------------
  |  Branch (669:4): [True: 0, False: 0]
  ------------------
  670|      0|		return 0;
  671|       |
  672|      0|	case XVID_CSP_ABGR:
  ------------------
  |  |  118|      0|#define XVID_CSP_ABGR     (1<< 7) /* 32-bit abgr packed */
  ------------------
  |  Branch (672:2): [True: 0, False: 6.15k]
  ------------------
  673|      0|		safe_packed_conv(
  674|      0|			dst[0], dst_stride[0], image->y, image->u, image->v,
  675|      0|			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
  ------------------
  |  |  127|      0|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  676|      0|			interlacing?yv12_to_abgri  :yv12_to_abgr,
  ------------------
  |  Branch (676:4): [True: 0, False: 0]
  ------------------
  677|      0|			interlacing?yv12_to_abgri_c:yv12_to_abgr_c, 4, interlacing);
  ------------------
  |  Branch (677:4): [True: 0, False: 0]
  ------------------
  678|      0|		return 0;
  679|       |
  680|      0|	case XVID_CSP_RGB:
  ------------------
  |  |  116|      0|#define XVID_CSP_RGB      (1<<16) /* 24-bit rgb packed */
  ------------------
  |  Branch (680:2): [True: 0, False: 6.15k]
  ------------------
  681|      0|		safe_packed_conv(
  682|      0|			dst[0], dst_stride[0], image->y, image->u, image->v,
  683|      0|			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
  ------------------
  |  |  127|      0|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  684|      0|			interlacing?yv12_to_rgbi  :yv12_to_rgb,
  ------------------
  |  Branch (684:4): [True: 0, False: 0]
  ------------------
  685|      0|			interlacing?yv12_to_rgbi_c:yv12_to_rgb_c, 3, interlacing);
  ------------------
  |  Branch (685:4): [True: 0, False: 0]
  ------------------
  686|      0|		return 0;
  687|       |
  688|      0|	case XVID_CSP_RGBA:
  ------------------
  |  |  119|      0|#define XVID_CSP_RGBA     (1<< 8) /* 32-bit rgba packed */
  ------------------
  |  Branch (688:2): [True: 0, False: 6.15k]
  ------------------
  689|      0|		safe_packed_conv(
  690|      0|			dst[0], dst_stride[0], image->y, image->u, image->v,
  691|      0|			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
  ------------------
  |  |  127|      0|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  692|      0|			interlacing?yv12_to_rgbai  :yv12_to_rgba,
  ------------------
  |  Branch (692:4): [True: 0, False: 0]
  ------------------
  693|      0|			interlacing?yv12_to_rgbai_c:yv12_to_rgba_c, 4, interlacing);
  ------------------
  |  Branch (693:4): [True: 0, False: 0]
  ------------------
  694|      0|		return 0;
  695|       |
  696|      0|	case XVID_CSP_ARGB:
  ------------------
  |  |  120|      0|#define XVID_CSP_ARGB     (1<<15) /* 32-bit argb packed */
  ------------------
  |  Branch (696:2): [True: 0, False: 6.15k]
  ------------------
  697|      0|		safe_packed_conv(
  698|      0|			dst[0], dst_stride[0], image->y, image->u, image->v,
  699|      0|			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
  ------------------
  |  |  127|      0|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  700|      0|			interlacing?yv12_to_argbi  :yv12_to_argb,
  ------------------
  |  Branch (700:4): [True: 0, False: 0]
  ------------------
  701|      0|			interlacing?yv12_to_argbi_c:yv12_to_argb_c, 4, interlacing);
  ------------------
  |  Branch (701:4): [True: 0, False: 0]
  ------------------
  702|      0|		return 0;
  703|       |
  704|      0|	case XVID_CSP_YUY2:
  ------------------
  |  |  113|      0|#define XVID_CSP_YUY2     (1<< 3) /* 4:2:2 packed */
  ------------------
  |  Branch (704:2): [True: 0, False: 6.15k]
  ------------------
  705|      0|		safe_packed_conv(
  706|      0|			dst[0], dst_stride[0], image->y, image->u, image->v,
  707|      0|			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
  ------------------
  |  |  127|      0|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  708|      0|			interlacing?yv12_to_yuyvi  :yv12_to_yuyv,
  ------------------
  |  Branch (708:4): [True: 0, False: 0]
  ------------------
  709|      0|			interlacing?yv12_to_yuyvi_c:yv12_to_yuyv_c, 2, interlacing);
  ------------------
  |  Branch (709:4): [True: 0, False: 0]
  ------------------
  710|      0|		return 0;
  711|       |
  712|      0|	case XVID_CSP_YVYU:		/* u,v swapped */
  ------------------
  |  |  115|      0|#define XVID_CSP_YVYU     (1<< 5) /* 4:2:2 packed */
  ------------------
  |  Branch (712:2): [True: 0, False: 6.15k]
  ------------------
  713|      0|		safe_packed_conv(
  714|      0|			dst[0], dst_stride[0], image->y, image->v, image->u,
  715|      0|			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
  ------------------
  |  |  127|      0|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  716|      0|			interlacing?yv12_to_yuyvi  :yv12_to_yuyv,
  ------------------
  |  Branch (716:4): [True: 0, False: 0]
  ------------------
  717|      0|			interlacing?yv12_to_yuyvi_c:yv12_to_yuyv_c, 2, interlacing);
  ------------------
  |  Branch (717:4): [True: 0, False: 0]
  ------------------
  718|      0|		return 0;
  719|       |
  720|      0|	case XVID_CSP_UYVY:
  ------------------
  |  |  114|      0|#define XVID_CSP_UYVY     (1<< 4) /* 4:2:2 packed */
  ------------------
  |  Branch (720:2): [True: 0, False: 6.15k]
  ------------------
  721|      0|		safe_packed_conv(
  722|      0|			dst[0], dst_stride[0], image->y, image->u, image->v,
  723|      0|			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
  ------------------
  |  |  127|      0|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  724|      0|			interlacing?yv12_to_uyvyi  :yv12_to_uyvy,
  ------------------
  |  Branch (724:4): [True: 0, False: 0]
  ------------------
  725|      0|			interlacing?yv12_to_uyvyi_c:yv12_to_uyvy_c, 2, interlacing);
  ------------------
  |  Branch (725:4): [True: 0, False: 0]
  ------------------
  726|      0|		return 0;
  727|       |
  728|      0|	case XVID_CSP_I420: /* YCbCr == YUV == internal colorspace for MPEG */
  ------------------
  |  |  111|      0|#define XVID_CSP_I420     (1<< 1) /* 4:2:0 planar */
  ------------------
  |  Branch (728:2): [True: 0, False: 6.15k]
  ------------------
  729|      0|		yv12_to_yv12(dst[0], dst[0] + dst_stride[0]*height, dst[0] + dst_stride[0]*height + (dst_stride[0]/2)*height2,
  730|      0|			dst_stride[0], dst_stride[0]/2,
  731|      0|			image->y, image->u, image->v, edged_width, edged_width2,
  732|      0|			width, height, (csp & XVID_CSP_VFLIP));
  ------------------
  |  |  127|      0|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  733|      0|		return 0;
  734|       |
  735|      0|	case XVID_CSP_YV12:	/* YCrCb == YVU == U and V plane swapped */
  ------------------
  |  |  112|      0|#define XVID_CSP_YV12     (1<< 2) /* 4:2:0 planar */
  ------------------
  |  Branch (735:2): [True: 0, False: 6.15k]
  ------------------
  736|      0|		yv12_to_yv12(dst[0], dst[0] + dst_stride[0]*height, dst[0] + dst_stride[0]*height + (dst_stride[0]/2)*height2,
  737|      0|			dst_stride[0], dst_stride[0]/2,
  738|      0|			image->y, image->v, image->u, edged_width, edged_width2,
  739|      0|			width, height, (csp & XVID_CSP_VFLIP));
  ------------------
  |  |  127|      0|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  740|      0|		return 0;
  741|       |
  742|      0|	case XVID_CSP_PLANAR:  /* YCbCr with arbitrary pointers and different strides for Y and UV */
  ------------------
  |  |  109|      0|#define XVID_CSP_PLANAR   (1<< 0) /* 4:2:0 planar (==I420, except for pointers/strides) */
  ------------------
  |  Branch (742:2): [True: 0, False: 6.15k]
  ------------------
  743|      0|		yv12_to_yv12(dst[0], dst[1], dst[2],
  744|      0|			dst_stride[0], dst_stride[1],	/* v: dst_stride[2] not yet supported */
  745|      0|			image->y, image->u, image->v, edged_width, edged_width2,
  746|      0|			width, height, (csp & XVID_CSP_VFLIP));
  ------------------
  |  |  127|      0|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  747|      0|		return 0;
  748|       |
  749|      0|	case XVID_CSP_INTERNAL :
  ------------------
  |  |  125|      0|#define XVID_CSP_INTERNAL (1<<13) /* decoder only: 4:2:0 planar, returns ptrs to internal buffers */
  ------------------
  |  Branch (749:2): [True: 0, False: 6.15k]
  ------------------
  750|      0|		dst[0] = image->y;
  751|      0|		dst[1] = image->u;
  752|      0|		dst[2] = image->v;
  753|      0|		dst_stride[0] = edged_width;
  754|      0|		dst_stride[1] = edged_width/2;
  755|      0|		dst_stride[2] = edged_width/2;
  756|      0|		return 0;
  757|       |
  758|      0|	case XVID_CSP_NULL:
  ------------------
  |  |  126|      0|#define XVID_CSP_NULL     (1<<14) /* decoder only: dont output anything */
  ------------------
  |  Branch (758:2): [True: 0, False: 6.15k]
  ------------------
  759|      0|	case XVID_CSP_SLICE:
  ------------------
  |  |  124|      0|#define XVID_CSP_SLICE    (1<<12) /* decoder only: 4:2:0 planar, per slice rendering */
  ------------------
  |  Branch (759:2): [True: 0, False: 6.15k]
  ------------------
  760|      0|		return 0;
  761|       |
  762|  6.15k|	}
  763|       |
  764|      0|	return -1;
  765|  6.15k|}
image.c:safe_packed_conv:
  394|  6.15k|{
  395|  6.15k|	int width_opt, width_c, height_opt;
  396|       |
  397|  6.15k|    if (width<0 || width==1 || height==1) return; /* forget about it */
  ------------------
  |  Branch (397:9): [True: 0, False: 6.15k]
  |  Branch (397:20): [True: 215, False: 5.94k]
  |  Branch (397:32): [True: 139, False: 5.80k]
  ------------------
  398|       |
  399|  5.80k|	if (func_opt != func_c && x_stride < size*((width+15)/16)*16)
  ------------------
  |  Branch (399:6): [True: 0, False: 5.80k]
  |  Branch (399:28): [True: 0, False: 0]
  ------------------
  400|      0|	{
  401|      0|		width_opt = width & (~15);
  402|      0|		width_c = (width - width_opt) & (~1);
  403|      0|	}
  404|  5.80k|	else if (func_opt != func_c && !(width&1) && (size==3))
  ------------------
  |  Branch (404:11): [True: 0, False: 5.80k]
  |  Branch (404:33): [True: 0, False: 0]
  |  Branch (404:47): [True: 0, False: 0]
  ------------------
  405|      0|	{
  406|       |        /* MMX reads 4 bytes per pixel for RGB/BGR */
  407|      0|        width_opt = width - 2;
  408|      0|        width_c = 2;
  409|      0|    }
  410|  5.80k|    else {
  411|       |        /* Enforce the width to be divisable by two. */
  412|  5.80k|		width_opt = width & (~1);
  413|  5.80k|		width_c = 0;
  414|  5.80k|	}
  415|       |
  416|       |    /* packed conversions require height to be divisable by 2
  417|       |       (or even by 4 for interlaced conversion) */
  418|  5.80k|       if (interlacing)
  ------------------
  |  Branch (418:12): [True: 1.82k, False: 3.98k]
  ------------------
  419|  1.82k|               height_opt = height & (~3);
  420|  3.98k|       else
  421|  3.98k|               height_opt = height & (~1);
  422|       |
  423|  5.80k|	func_opt(x_ptr, x_stride,
  424|  5.80k|			y_ptr, u_ptr, v_ptr, y_stride, uv_stride,
  425|  5.80k|			width_opt, height_opt, vflip);
  426|       |
  427|  5.80k|	if (width_c)
  ------------------
  |  Branch (427:6): [True: 0, False: 5.80k]
  ------------------
  428|      0|	{
  429|      0|		func_c(x_ptr + size*width_opt, x_stride,
  430|      0|			y_ptr + width_opt, u_ptr + width_opt/2, v_ptr + width_opt/2,
  431|      0|			y_stride, uv_stride, width_c, height_opt, vflip);
  432|      0|	}
  433|  5.80k|}

decoder.c:image_null:
   43|   204k|{
   44|   204k|	image->y = image->u = image->v = NULL;
   45|   204k|}

interpolate8x8_avg2_c:
   61|  3.71M|{
   62|  3.71M|    uint32_t i;
   63|  3.71M|	const int32_t round = 1 - rounding;
   64|       |
   65|  33.4M|    for(i = 0; i < height; i++) {
  ------------------
  |  Branch (65:16): [True: 29.7M, False: 3.71M]
  ------------------
   66|  29.7M|        dst[0] = (src1[0] + src2[0] + round) >> 1;
   67|  29.7M|        dst[1] = (src1[1] + src2[1] + round) >> 1;
   68|  29.7M|        dst[2] = (src1[2] + src2[2] + round) >> 1;
   69|  29.7M|        dst[3] = (src1[3] + src2[3] + round) >> 1;
   70|  29.7M|        dst[4] = (src1[4] + src2[4] + round) >> 1;
   71|  29.7M|        dst[5] = (src1[5] + src2[5] + round) >> 1;
   72|  29.7M|        dst[6] = (src1[6] + src2[6] + round) >> 1;
   73|  29.7M|        dst[7] = (src1[7] + src2[7] + round) >> 1;
   74|       |
   75|  29.7M|        dst += stride;
   76|  29.7M|        src1 += stride;
   77|  29.7M|        src2 += stride;
   78|  29.7M|    }
   79|  3.71M|}
interpolate8x8_halfpel_add_c:
   83|  3.71M|{
   84|  3.71M|	interpolate8x8_avg2_c(dst, dst, src, stride, 0, 8);
   85|  3.71M|}
interpolate8x8_halfpel_h_c:
  117|   302k|{
  118|   302k|	uintptr_t j;
  ------------------
  |  |  138|   302k|#        define uintptr_t uint64_t
  ------------------
  119|       |
  120|   302k|	if (rounding) {
  ------------------
  |  Branch (120:6): [True: 21.4k, False: 281k]
  ------------------
  121|   193k|		for (j = 0; j < 8*stride; j+=stride) {
  ------------------
  |  Branch (121:15): [True: 171k, False: 21.4k]
  ------------------
  122|   171k|				dst[j + 0] = (uint8_t)((src[j + 0] + src[j + 1] )>>1);
  123|   171k|				dst[j + 1] = (uint8_t)((src[j + 1] + src[j + 2] )>>1);
  124|   171k|				dst[j + 2] = (uint8_t)((src[j + 2] + src[j + 3] )>>1);
  125|   171k|				dst[j + 3] = (uint8_t)((src[j + 3] + src[j + 4] )>>1);
  126|   171k|				dst[j + 4] = (uint8_t)((src[j + 4] + src[j + 5] )>>1);
  127|   171k|				dst[j + 5] = (uint8_t)((src[j + 5] + src[j + 6] )>>1);
  128|   171k|				dst[j + 6] = (uint8_t)((src[j + 6] + src[j + 7] )>>1);
  129|   171k|				dst[j + 7] = (uint8_t)((src[j + 7] + src[j + 8] )>>1);
  130|   171k|		}
  131|   281k|	} else {
  132|  2.53M|		for (j = 0; j < 8*stride; j+=stride) {
  ------------------
  |  Branch (132:15): [True: 2.25M, False: 281k]
  ------------------
  133|  2.25M|				dst[j + 0] = (uint8_t)((src[j + 0] + src[j + 1] + 1)>>1);
  134|  2.25M|				dst[j + 1] = (uint8_t)((src[j + 1] + src[j + 2] + 1)>>1);
  135|  2.25M|				dst[j + 2] = (uint8_t)((src[j + 2] + src[j + 3] + 1)>>1);
  136|  2.25M|				dst[j + 3] = (uint8_t)((src[j + 3] + src[j + 4] + 1)>>1);
  137|  2.25M|				dst[j + 4] = (uint8_t)((src[j + 4] + src[j + 5] + 1)>>1);
  138|  2.25M|				dst[j + 5] = (uint8_t)((src[j + 5] + src[j + 6] + 1)>>1);
  139|  2.25M|				dst[j + 6] = (uint8_t)((src[j + 6] + src[j + 7] + 1)>>1);
  140|  2.25M|				dst[j + 7] = (uint8_t)((src[j + 7] + src[j + 8] + 1)>>1);
  141|  2.25M|		}
  142|   281k|	}
  143|   302k|}
interpolate8x4_halfpel_h_c:
  152|  14.9k|{
  153|  14.9k|	uintptr_t j;
  ------------------
  |  |  138|  14.9k|#        define uintptr_t uint64_t
  ------------------
  154|       |
  155|  14.9k|	if (rounding) {
  ------------------
  |  Branch (155:6): [True: 7.85k, False: 7.05k]
  ------------------
  156|  39.2k|		for (j = 0; j < 4*stride; j+=stride) {
  ------------------
  |  Branch (156:15): [True: 31.4k, False: 7.85k]
  ------------------
  157|  31.4k|				dst[j + 0] = (uint8_t)((src[j + 0] + src[j + 1] )>>1);
  158|  31.4k|				dst[j + 1] = (uint8_t)((src[j + 1] + src[j + 2] )>>1);
  159|  31.4k|				dst[j + 2] = (uint8_t)((src[j + 2] + src[j + 3] )>>1);
  160|  31.4k|				dst[j + 3] = (uint8_t)((src[j + 3] + src[j + 4] )>>1);
  161|  31.4k|				dst[j + 4] = (uint8_t)((src[j + 4] + src[j + 5] )>>1);
  162|  31.4k|				dst[j + 5] = (uint8_t)((src[j + 5] + src[j + 6] )>>1);
  163|  31.4k|				dst[j + 6] = (uint8_t)((src[j + 6] + src[j + 7] )>>1);
  164|  31.4k|				dst[j + 7] = (uint8_t)((src[j + 7] + src[j + 8] )>>1);
  165|  31.4k|		}
  166|  7.85k|	} else {
  167|  35.2k|		for (j = 0; j < 4*stride; j+=stride) {
  ------------------
  |  Branch (167:15): [True: 28.2k, False: 7.05k]
  ------------------
  168|  28.2k|				dst[j + 0] = (uint8_t)((src[j + 0] + src[j + 1] + 1)>>1);
  169|  28.2k|				dst[j + 1] = (uint8_t)((src[j + 1] + src[j + 2] + 1)>>1);
  170|  28.2k|				dst[j + 2] = (uint8_t)((src[j + 2] + src[j + 3] + 1)>>1);
  171|  28.2k|				dst[j + 3] = (uint8_t)((src[j + 3] + src[j + 4] + 1)>>1);
  172|  28.2k|				dst[j + 4] = (uint8_t)((src[j + 4] + src[j + 5] + 1)>>1);
  173|  28.2k|				dst[j + 5] = (uint8_t)((src[j + 5] + src[j + 6] + 1)>>1);
  174|  28.2k|				dst[j + 6] = (uint8_t)((src[j + 6] + src[j + 7] + 1)>>1);
  175|  28.2k|				dst[j + 7] = (uint8_t)((src[j + 7] + src[j + 8] + 1)>>1);
  176|  28.2k|		}
  177|  7.05k|	}
  178|  14.9k|}
interpolate8x8_halfpel_h_add_c:
  187|   140k|{
  188|   140k|	uintptr_t j;
  ------------------
  |  |  138|   140k|#        define uintptr_t uint64_t
  ------------------
  189|       |
  190|   140k|	if (rounding) {
  ------------------
  |  Branch (190:6): [True: 0, False: 140k]
  ------------------
  191|      0|		for (j = 0; j < 8*stride; j+=stride) {
  ------------------
  |  Branch (191:15): [True: 0, False: 0]
  ------------------
  192|      0|				dst[j + 0] = (uint8_t)((((src[j + 0] + src[j + 1] )>>1) + dst[j+0] + 1)>>1);
  193|      0|				dst[j + 1] = (uint8_t)((((src[j + 1] + src[j + 2] )>>1) + dst[j+1] + 1)>>1);
  194|      0|				dst[j + 2] = (uint8_t)((((src[j + 2] + src[j + 3] )>>1) + dst[j+2] + 1)>>1);
  195|      0|				dst[j + 3] = (uint8_t)((((src[j + 3] + src[j + 4] )>>1) + dst[j+3] + 1)>>1);
  196|      0|				dst[j + 4] = (uint8_t)((((src[j + 4] + src[j + 5] )>>1) + dst[j+4] + 1)>>1);
  197|      0|				dst[j + 5] = (uint8_t)((((src[j + 5] + src[j + 6] )>>1) + dst[j+5] + 1)>>1);
  198|      0|				dst[j + 6] = (uint8_t)((((src[j + 6] + src[j + 7] )>>1) + dst[j+6] + 1)>>1);
  199|      0|				dst[j + 7] = (uint8_t)((((src[j + 7] + src[j + 8] )>>1) + dst[j+7] + 1)>>1);
  200|      0|		}
  201|   140k|	} else {
  202|  1.26M|		for (j = 0; j < 8*stride; j+=stride) {
  ------------------
  |  Branch (202:15): [True: 1.12M, False: 140k]
  ------------------
  203|  1.12M|				dst[j + 0] = (uint8_t)((((src[j + 0] + src[j + 1] + 1)>>1) + dst[j+0] + 1)>>1);
  204|  1.12M|				dst[j + 1] = (uint8_t)((((src[j + 1] + src[j + 2] + 1)>>1) + dst[j+1] + 1)>>1);
  205|  1.12M|				dst[j + 2] = (uint8_t)((((src[j + 2] + src[j + 3] + 1)>>1) + dst[j+2] + 1)>>1);
  206|  1.12M|				dst[j + 3] = (uint8_t)((((src[j + 3] + src[j + 4] + 1)>>1) + dst[j+3] + 1)>>1);
  207|  1.12M|				dst[j + 4] = (uint8_t)((((src[j + 4] + src[j + 5] + 1)>>1) + dst[j+4] + 1)>>1);
  208|  1.12M|				dst[j + 5] = (uint8_t)((((src[j + 5] + src[j + 6] + 1)>>1) + dst[j+5] + 1)>>1);
  209|  1.12M|				dst[j + 6] = (uint8_t)((((src[j + 6] + src[j + 7] + 1)>>1) + dst[j+6] + 1)>>1);
  210|  1.12M|				dst[j + 7] = (uint8_t)((((src[j + 7] + src[j + 8] + 1)>>1) + dst[j+7] + 1)>>1);
  211|  1.12M|		}
  212|   140k|	}
  213|   140k|}
interpolate8x8_halfpel_v_c:
  222|   247k|{
  223|   247k|	uintptr_t j;
  ------------------
  |  |  138|   247k|#        define uintptr_t uint64_t
  ------------------
  224|       |
  225|       |
  226|   247k|	if (rounding) {
  ------------------
  |  Branch (226:6): [True: 14.4k, False: 233k]
  ------------------
  227|   130k|		for (j = 0; j < 8*stride; j+=stride) {
  ------------------
  |  Branch (227:15): [True: 115k, False: 14.4k]
  ------------------
  228|   115k|				dst[j + 0] = (uint8_t)((src[j + 0] + src[j + stride + 0] )>>1);
  229|   115k|				dst[j + 1] = (uint8_t)((src[j + 1] + src[j + stride + 1] )>>1);
  230|   115k|				dst[j + 2] = (uint8_t)((src[j + 2] + src[j + stride + 2] )>>1);
  231|   115k|				dst[j + 3] = (uint8_t)((src[j + 3] + src[j + stride + 3] )>>1);
  232|   115k|				dst[j + 4] = (uint8_t)((src[j + 4] + src[j + stride + 4] )>>1);
  233|   115k|				dst[j + 5] = (uint8_t)((src[j + 5] + src[j + stride + 5] )>>1);
  234|   115k|				dst[j + 6] = (uint8_t)((src[j + 6] + src[j + stride + 6] )>>1);
  235|   115k|				dst[j + 7] = (uint8_t)((src[j + 7] + src[j + stride + 7] )>>1);
  236|   115k|		}
  237|   233k|	} else {
  238|  2.10M|		for (j = 0; j < 8*stride; j+=stride) {
  ------------------
  |  Branch (238:15): [True: 1.86M, False: 233k]
  ------------------
  239|  1.86M|				dst[j + 0] = (uint8_t)((src[j + 0] + src[j + stride + 0] + 1)>>1);
  240|  1.86M|				dst[j + 1] = (uint8_t)((src[j + 1] + src[j + stride + 1] + 1)>>1);
  241|  1.86M|				dst[j + 2] = (uint8_t)((src[j + 2] + src[j + stride + 2] + 1)>>1);
  242|  1.86M|				dst[j + 3] = (uint8_t)((src[j + 3] + src[j + stride + 3] + 1)>>1);
  243|  1.86M|				dst[j + 4] = (uint8_t)((src[j + 4] + src[j + stride + 4] + 1)>>1);
  244|  1.86M|				dst[j + 5] = (uint8_t)((src[j + 5] + src[j + stride + 5] + 1)>>1);
  245|  1.86M|				dst[j + 6] = (uint8_t)((src[j + 6] + src[j + stride + 6] + 1)>>1);
  246|  1.86M|				dst[j + 7] = (uint8_t)((src[j + 7] + src[j + stride + 7] + 1)>>1);
  247|  1.86M|		}
  248|   233k|	}
  249|   247k|}
interpolate8x4_halfpel_v_c:
  258|  16.3k|{
  259|  16.3k|	uintptr_t j;
  ------------------
  |  |  138|  16.3k|#        define uintptr_t uint64_t
  ------------------
  260|       |
  261|       |
  262|  16.3k|	if (rounding) {
  ------------------
  |  Branch (262:6): [True: 6.58k, False: 9.75k]
  ------------------
  263|  32.9k|		for (j = 0; j < 4*stride; j+=stride) {
  ------------------
  |  Branch (263:15): [True: 26.3k, False: 6.58k]
  ------------------
  264|  26.3k|				dst[j + 0] = (uint8_t)((src[j + 0] + src[j + stride + 0] )>>1);
  265|  26.3k|				dst[j + 1] = (uint8_t)((src[j + 1] + src[j + stride + 1] )>>1);
  266|  26.3k|				dst[j + 2] = (uint8_t)((src[j + 2] + src[j + stride + 2] )>>1);
  267|  26.3k|				dst[j + 3] = (uint8_t)((src[j + 3] + src[j + stride + 3] )>>1);
  268|  26.3k|				dst[j + 4] = (uint8_t)((src[j + 4] + src[j + stride + 4] )>>1);
  269|  26.3k|				dst[j + 5] = (uint8_t)((src[j + 5] + src[j + stride + 5] )>>1);
  270|  26.3k|				dst[j + 6] = (uint8_t)((src[j + 6] + src[j + stride + 6] )>>1);
  271|  26.3k|				dst[j + 7] = (uint8_t)((src[j + 7] + src[j + stride + 7] )>>1);
  272|  26.3k|		}
  273|  9.75k|	} else {
  274|  48.7k|		for (j = 0; j < 4*stride; j+=stride) {
  ------------------
  |  Branch (274:15): [True: 39.0k, False: 9.75k]
  ------------------
  275|  39.0k|				dst[j + 0] = (uint8_t)((src[j + 0] + src[j + stride + 0] + 1)>>1);
  276|  39.0k|				dst[j + 1] = (uint8_t)((src[j + 1] + src[j + stride + 1] + 1)>>1);
  277|  39.0k|				dst[j + 2] = (uint8_t)((src[j + 2] + src[j + stride + 2] + 1)>>1);
  278|  39.0k|				dst[j + 3] = (uint8_t)((src[j + 3] + src[j + stride + 3] + 1)>>1);
  279|  39.0k|				dst[j + 4] = (uint8_t)((src[j + 4] + src[j + stride + 4] + 1)>>1);
  280|  39.0k|				dst[j + 5] = (uint8_t)((src[j + 5] + src[j + stride + 5] + 1)>>1);
  281|  39.0k|				dst[j + 6] = (uint8_t)((src[j + 6] + src[j + stride + 6] + 1)>>1);
  282|  39.0k|				dst[j + 7] = (uint8_t)((src[j + 7] + src[j + stride + 7] + 1)>>1);
  283|  39.0k|		}
  284|  9.75k|	}
  285|  16.3k|}
interpolate8x8_halfpel_v_add_c:
  294|  76.1k|{
  295|  76.1k|	uintptr_t j;
  ------------------
  |  |  138|  76.1k|#        define uintptr_t uint64_t
  ------------------
  296|       |
  297|       |
  298|  76.1k|	if (rounding) {
  ------------------
  |  Branch (298:6): [True: 0, False: 76.1k]
  ------------------
  299|      0|		for (j = 0; j < 8*stride; j+=stride) {
  ------------------
  |  Branch (299:15): [True: 0, False: 0]
  ------------------
  300|      0|				dst[j + 0] = (uint8_t)((((src[j + 0] + src[j + stride + 0] )>>1) + dst[j+0] + 1)>>1);
  301|      0|				dst[j + 1] = (uint8_t)((((src[j + 1] + src[j + stride + 1] )>>1) + dst[j+1] + 1)>>1);
  302|      0|				dst[j + 2] = (uint8_t)((((src[j + 2] + src[j + stride + 2] )>>1) + dst[j+2] + 1)>>1);
  303|      0|				dst[j + 3] = (uint8_t)((((src[j + 3] + src[j + stride + 3] )>>1) + dst[j+3] + 1)>>1);
  304|      0|				dst[j + 4] = (uint8_t)((((src[j + 4] + src[j + stride + 4] )>>1) + dst[j+4] + 1)>>1);
  305|      0|				dst[j + 5] = (uint8_t)((((src[j + 5] + src[j + stride + 5] )>>1) + dst[j+5] + 1)>>1);
  306|      0|				dst[j + 6] = (uint8_t)((((src[j + 6] + src[j + stride + 6] )>>1) + dst[j+6] + 1)>>1);
  307|      0|				dst[j + 7] = (uint8_t)((((src[j + 7] + src[j + stride + 7] )>>1) + dst[j+7] + 1)>>1);
  308|      0|		}
  309|  76.1k|	} else {
  310|   685k|		for (j = 0; j < 8*stride; j+=stride) {
  ------------------
  |  Branch (310:15): [True: 609k, False: 76.1k]
  ------------------
  311|   609k|				dst[j + 0] = (uint8_t)((((src[j + 0] + src[j + stride + 0] + 1)>>1) + dst[j+0] + 1)>>1);
  312|   609k|				dst[j + 1] = (uint8_t)((((src[j + 1] + src[j + stride + 1] + 1)>>1) + dst[j+1] + 1)>>1);
  313|   609k|				dst[j + 2] = (uint8_t)((((src[j + 2] + src[j + stride + 2] + 1)>>1) + dst[j+2] + 1)>>1);
  314|   609k|				dst[j + 3] = (uint8_t)((((src[j + 3] + src[j + stride + 3] + 1)>>1) + dst[j+3] + 1)>>1);
  315|   609k|				dst[j + 4] = (uint8_t)((((src[j + 4] + src[j + stride + 4] + 1)>>1) + dst[j+4] + 1)>>1);
  316|   609k|				dst[j + 5] = (uint8_t)((((src[j + 5] + src[j + stride + 5] + 1)>>1) + dst[j+5] + 1)>>1);
  317|   609k|				dst[j + 6] = (uint8_t)((((src[j + 6] + src[j + stride + 6] + 1)>>1) + dst[j+6] + 1)>>1);
  318|   609k|				dst[j + 7] = (uint8_t)((((src[j + 7] + src[j + stride + 7] + 1)>>1) + dst[j+7] + 1)>>1);
  319|   609k|		}
  320|  76.1k|	}
  321|  76.1k|}
interpolate8x8_halfpel_hv_c:
  330|   136k|{
  331|   136k|	uintptr_t j;
  ------------------
  |  |  138|   136k|#        define uintptr_t uint64_t
  ------------------
  332|       |
  333|   136k|	if (rounding) {
  ------------------
  |  Branch (333:6): [True: 12.6k, False: 123k]
  ------------------
  334|   114k|		for (j = 0; j < 8*stride; j+=stride) {
  ------------------
  |  Branch (334:15): [True: 101k, False: 12.6k]
  ------------------
  335|   101k|				dst[j + 0] = (uint8_t)((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +1)>>2);
  336|   101k|				dst[j + 1] = (uint8_t)((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +1)>>2);
  337|   101k|				dst[j + 2] = (uint8_t)((src[j+2] + src[j+3] + src[j+stride+2] + src[j+stride+3] +1)>>2);
  338|   101k|				dst[j + 3] = (uint8_t)((src[j+3] + src[j+4] + src[j+stride+3] + src[j+stride+4] +1)>>2);
  339|   101k|				dst[j + 4] = (uint8_t)((src[j+4] + src[j+5] + src[j+stride+4] + src[j+stride+5] +1)>>2);
  340|   101k|				dst[j + 5] = (uint8_t)((src[j+5] + src[j+6] + src[j+stride+5] + src[j+stride+6] +1)>>2);
  341|   101k|				dst[j + 6] = (uint8_t)((src[j+6] + src[j+7] + src[j+stride+6] + src[j+stride+7] +1)>>2);
  342|   101k|				dst[j + 7] = (uint8_t)((src[j+7] + src[j+8] + src[j+stride+7] + src[j+stride+8] +1)>>2);
  343|   101k|		}
  344|   123k|	} else {
  345|  1.11M|		for (j = 0; j < 8*stride; j+=stride) {
  ------------------
  |  Branch (345:15): [True: 987k, False: 123k]
  ------------------
  346|   987k|				dst[j + 0] = (uint8_t)((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +2)>>2);
  347|   987k|				dst[j + 1] = (uint8_t)((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +2)>>2);
  348|   987k|				dst[j + 2] = (uint8_t)((src[j+2] + src[j+3] + src[j+stride+2] + src[j+stride+3] +2)>>2);
  349|   987k|				dst[j + 3] = (uint8_t)((src[j+3] + src[j+4] + src[j+stride+3] + src[j+stride+4] +2)>>2);
  350|   987k|				dst[j + 4] = (uint8_t)((src[j+4] + src[j+5] + src[j+stride+4] + src[j+stride+5] +2)>>2);
  351|   987k|				dst[j + 5] = (uint8_t)((src[j+5] + src[j+6] + src[j+stride+5] + src[j+stride+6] +2)>>2);
  352|   987k|				dst[j + 6] = (uint8_t)((src[j+6] + src[j+7] + src[j+stride+6] + src[j+stride+7] +2)>>2);
  353|   987k|				dst[j + 7] = (uint8_t)((src[j+7] + src[j+8] + src[j+stride+7] + src[j+stride+8] +2)>>2);
  354|   987k|		}
  355|   123k|	}
  356|   136k|}
interpolate8x4_halfpel_hv_c:
  365|  10.8k|{
  366|  10.8k|	uintptr_t j;
  ------------------
  |  |  138|  10.8k|#        define uintptr_t uint64_t
  ------------------
  367|       |
  368|  10.8k|	if (rounding) {
  ------------------
  |  Branch (368:6): [True: 5.76k, False: 5.09k]
  ------------------
  369|  28.8k|		for (j = 0; j < 4*stride; j+=stride) {
  ------------------
  |  Branch (369:15): [True: 23.0k, False: 5.76k]
  ------------------
  370|  23.0k|				dst[j + 0] = (uint8_t)((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +1)>>2);
  371|  23.0k|				dst[j + 1] = (uint8_t)((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +1)>>2);
  372|  23.0k|				dst[j + 2] = (uint8_t)((src[j+2] + src[j+3] + src[j+stride+2] + src[j+stride+3] +1)>>2);
  373|  23.0k|				dst[j + 3] = (uint8_t)((src[j+3] + src[j+4] + src[j+stride+3] + src[j+stride+4] +1)>>2);
  374|  23.0k|				dst[j + 4] = (uint8_t)((src[j+4] + src[j+5] + src[j+stride+4] + src[j+stride+5] +1)>>2);
  375|  23.0k|				dst[j + 5] = (uint8_t)((src[j+5] + src[j+6] + src[j+stride+5] + src[j+stride+6] +1)>>2);
  376|  23.0k|				dst[j + 6] = (uint8_t)((src[j+6] + src[j+7] + src[j+stride+6] + src[j+stride+7] +1)>>2);
  377|  23.0k|				dst[j + 7] = (uint8_t)((src[j+7] + src[j+8] + src[j+stride+7] + src[j+stride+8] +1)>>2);
  378|  23.0k|		}
  379|  5.76k|	} else {
  380|  25.4k|		for (j = 0; j < 4*stride; j+=stride) {
  ------------------
  |  Branch (380:15): [True: 20.3k, False: 5.09k]
  ------------------
  381|  20.3k|				dst[j + 0] = (uint8_t)((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +2)>>2);
  382|  20.3k|				dst[j + 1] = (uint8_t)((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +2)>>2);
  383|  20.3k|				dst[j + 2] = (uint8_t)((src[j+2] + src[j+3] + src[j+stride+2] + src[j+stride+3] +2)>>2);
  384|  20.3k|				dst[j + 3] = (uint8_t)((src[j+3] + src[j+4] + src[j+stride+3] + src[j+stride+4] +2)>>2);
  385|  20.3k|				dst[j + 4] = (uint8_t)((src[j+4] + src[j+5] + src[j+stride+4] + src[j+stride+5] +2)>>2);
  386|  20.3k|				dst[j + 5] = (uint8_t)((src[j+5] + src[j+6] + src[j+stride+5] + src[j+stride+6] +2)>>2);
  387|  20.3k|				dst[j + 6] = (uint8_t)((src[j+6] + src[j+7] + src[j+stride+6] + src[j+stride+7] +2)>>2);
  388|  20.3k|				dst[j + 7] = (uint8_t)((src[j+7] + src[j+8] + src[j+stride+7] + src[j+stride+8] +2)>>2);
  389|  20.3k|		}
  390|  5.09k|	}
  391|  10.8k|}
interpolate8x8_halfpel_hv_add_c:
  400|  51.0k|{
  401|  51.0k|	uintptr_t j;
  ------------------
  |  |  138|  51.0k|#        define uintptr_t uint64_t
  ------------------
  402|       |
  403|  51.0k|	if (rounding) {
  ------------------
  |  Branch (403:6): [True: 0, False: 51.0k]
  ------------------
  404|      0|		for (j = 0; j < 8*stride; j+=stride) {
  ------------------
  |  Branch (404:15): [True: 0, False: 0]
  ------------------
  405|      0|				dst[j + 0] = (uint8_t)((((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +1)>>2) + dst[j+0])>>1);
  406|      0|				dst[j + 1] = (uint8_t)((((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +1)>>2) + dst[j+1])>>1);
  407|      0|				dst[j + 2] = (uint8_t)((((src[j+2] + src[j+3] + src[j+stride+2] + src[j+stride+3] +1)>>2) + dst[j+2])>>1);
  408|      0|				dst[j + 3] = (uint8_t)((((src[j+3] + src[j+4] + src[j+stride+3] + src[j+stride+4] +1)>>2) + dst[j+3])>>1);
  409|      0|				dst[j + 4] = (uint8_t)((((src[j+4] + src[j+5] + src[j+stride+4] + src[j+stride+5] +1)>>2) + dst[j+4])>>1);
  410|      0|				dst[j + 5] = (uint8_t)((((src[j+5] + src[j+6] + src[j+stride+5] + src[j+stride+6] +1)>>2) + dst[j+5])>>1);
  411|      0|				dst[j + 6] = (uint8_t)((((src[j+6] + src[j+7] + src[j+stride+6] + src[j+stride+7] +1)>>2) + dst[j+6])>>1);
  412|      0|				dst[j + 7] = (uint8_t)((((src[j+7] + src[j+8] + src[j+stride+7] + src[j+stride+8] +1)>>2) + dst[j+7])>>1);
  413|      0|		}
  414|  51.0k|	} else {
  415|   459k|		for (j = 0; j < 8*stride; j+=stride) {
  ------------------
  |  Branch (415:15): [True: 408k, False: 51.0k]
  ------------------
  416|   408k|				dst[j + 0] = (uint8_t)((((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +2)>>2) + dst[j+0] + 1)>>1);
  417|   408k|				dst[j + 1] = (uint8_t)((((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +2)>>2) + dst[j+1] + 1)>>1);
  418|   408k|				dst[j + 2] = (uint8_t)((((src[j+2] + src[j+3] + src[j+stride+2] + src[j+stride+3] +2)>>2) + dst[j+2] + 1)>>1);
  419|   408k|				dst[j + 3] = (uint8_t)((((src[j+3] + src[j+4] + src[j+stride+3] + src[j+stride+4] +2)>>2) + dst[j+3] + 1)>>1);
  420|   408k|				dst[j + 4] = (uint8_t)((((src[j+4] + src[j+5] + src[j+stride+4] + src[j+stride+5] +2)>>2) + dst[j+4] + 1)>>1);
  421|   408k|				dst[j + 5] = (uint8_t)((((src[j+5] + src[j+6] + src[j+stride+5] + src[j+stride+6] +2)>>2) + dst[j+5] + 1)>>1);
  422|   408k|				dst[j + 6] = (uint8_t)((((src[j+6] + src[j+7] + src[j+stride+6] + src[j+stride+7] +2)>>2) + dst[j+6] + 1)>>1);
  423|   408k|				dst[j + 7] = (uint8_t)((((src[j+7] + src[j+8] + src[j+stride+7] + src[j+stride+8] +2)>>2) + dst[j+7] + 1)>>1);
  424|   408k|		}
  425|  51.0k|	}
  426|  51.0k|}

decoder.c:interpolate16x16_switch:
  325|   312k|{
  326|   312k|	interpolate8x8_switch(cur, refn, x,   y,   dx, dy, stride, rounding);
  327|   312k|	interpolate8x8_switch(cur, refn, x+8, y,   dx, dy, stride, rounding);
  328|   312k|	interpolate8x8_switch(cur, refn, x,   y+8, dx, dy, stride, rounding);
  329|   312k|	interpolate8x8_switch(cur, refn, x+8, y+8, dx, dy, stride, rounding);
  330|   312k|}
decoder.c:interpolate8x8_switch:
  265|  4.62M|{
  266|       |
  267|  4.62M|	const uint8_t * const src = refn + (int)((y + (dy>>1)) * stride + x + (dx>>1));
  268|  4.62M|	uint8_t * const dst = cur + (int)(y * stride + x);
  269|       |
  270|  4.62M|	switch (((dx & 1) << 1) + (dy & 1))	{ /* ((dx%2)?2:0)+((dy%2)?1:0) */
  271|  3.93M|	case 0:
  ------------------
  |  Branch (271:2): [True: 3.93M, False: 686k]
  ------------------
  272|  3.93M|		transfer8x8_copy(dst, src, stride);
  273|  3.93M|		break;
  274|   247k|	case 1:
  ------------------
  |  Branch (274:2): [True: 247k, False: 4.37M]
  ------------------
  275|   247k|		interpolate8x8_halfpel_v(dst, src, stride, rounding);
  276|   247k|		break;
  277|   302k|	case 2:
  ------------------
  |  Branch (277:2): [True: 302k, False: 4.32M]
  ------------------
  278|   302k|		interpolate8x8_halfpel_h(dst, src, stride, rounding);
  279|   302k|		break;
  280|   136k|	default:
  ------------------
  |  Branch (280:2): [True: 136k, False: 4.48M]
  ------------------
  281|   136k|		interpolate8x8_halfpel_hv(dst, src, stride, rounding);
  282|   136k|		break;
  283|  4.62M|	}
  284|  4.62M|}
decoder.c:interpolate8x4_switch:
  234|  82.1k|{
  235|       |
  236|  82.1k|	const uint8_t * const src = refn + (int)((y + (dy>>1)) * stride + x + (dx>>1));
  237|  82.1k|	uint8_t * const dst = cur + (int)(y * stride + x);
  238|       |
  239|  82.1k|	switch (((dx & 1) << 1) + (dy & 1))	
  240|  82.1k|	{ /* ((dx%2)?2:0)+((dy%2)?1:0) */
  241|  40.0k|	case 0:
  ------------------
  |  Branch (241:2): [True: 40.0k, False: 42.1k]
  ------------------
  242|  40.0k|		transfer8x4_copy(dst, src, stride);
  243|  40.0k|		break;
  244|  16.3k|	case 1:
  ------------------
  |  Branch (244:2): [True: 16.3k, False: 65.8k]
  ------------------
  245|  16.3k|		interpolate8x4_halfpel_v(dst, src, stride, rounding);
  246|  16.3k|		break;
  247|  14.9k|	case 2:
  ------------------
  |  Branch (247:2): [True: 14.9k, False: 67.2k]
  ------------------
  248|  14.9k|		interpolate8x4_halfpel_h(dst, src, stride, rounding);
  249|  14.9k|		break;
  250|  10.8k|	default:
  ------------------
  |  Branch (250:2): [True: 10.8k, False: 71.3k]
  ------------------
  251|  10.8k|		interpolate8x4_halfpel_hv(dst, src, stride, rounding);
  252|  10.8k|		break;
  253|  82.1k|	}
  254|  82.1k|}
decoder.c:interpolate8x8_add_switch:
  295|  2.27M|{
  296|       |
  297|  2.27M|	const uint8_t * const src = refn + (int)((y + (dy>>1)) * stride + x + (dx>>1));
  298|  2.27M|	uint8_t * const dst = cur + (int)(y * stride + x);
  299|       |
  300|  2.27M|	switch (((dx & 1) << 1) + (dy & 1))	{ /* ((dx%2)?2:0)+((dy%2)?1:0) */
  301|  2.01M|	case 0:
  ------------------
  |  Branch (301:2): [True: 2.01M, False: 267k]
  ------------------
  302|  2.01M|		interpolate8x8_halfpel_add(dst, src, stride, rounding);
  303|  2.01M|		break;
  304|  76.1k|	case 1:
  ------------------
  |  Branch (304:2): [True: 76.1k, False: 2.20M]
  ------------------
  305|  76.1k|		interpolate8x8_halfpel_v_add(dst, src, stride, rounding);
  306|  76.1k|		break;
  307|   140k|	case 2:
  ------------------
  |  Branch (307:2): [True: 140k, False: 2.13M]
  ------------------
  308|   140k|		interpolate8x8_halfpel_h_add(dst, src, stride, rounding);
  309|   140k|		break;
  310|  51.0k|	default:
  ------------------
  |  Branch (310:2): [True: 51.0k, False: 2.22M]
  ------------------
  311|  51.0k|		interpolate8x8_halfpel_hv_add(dst, src, stride, rounding);
  312|  51.0k|		break;
  313|  2.27M|	}
  314|  2.27M|}

init_postproc:
   51|  10.5k|{
   52|  10.5k|	init_deblock(tbls);
   53|  10.5k|	init_noise(tbls);
   54|  10.5k|}
init_deblock:
  214|  10.5k|{
  215|  10.5k|	int i;
  216|       |
  217|  5.38M|	for(i = -255; i < 256; i++) {
  ------------------
  |  Branch (217:16): [True: 5.37M, False: 10.5k]
  ------------------
  218|  5.37M|		tbls->xvid_thresh_tbl[i + 255] = 0;
  219|  5.37M|		if(ABS(i) < THR1)
  ------------------
  |  |   48|  5.37M|#define ABS(X)    (((X)>0)?(X):-(X)) 
  |  |  ------------------
  |  |  |  Branch (48:20): [True: 2.68M, False: 2.69M]
  |  |  ------------------
  ------------------
              		if(ABS(i) < THR1)
  ------------------
  |  |   34|  5.37M|#define THR1 2
  ------------------
  |  Branch (219:6): [True: 31.5k, False: 5.34M]
  ------------------
  220|  31.5k|			tbls->xvid_thresh_tbl[i + 255] = 1;
  221|  5.37M|		tbls->xvid_abs_tbl[i + 255] = ABS(i);
  ------------------
  |  |   48|  5.37M|#define ABS(X)    (((X)>0)?(X):-(X)) 
  |  |  ------------------
  |  |  |  Branch (48:20): [True: 2.68M, False: 2.69M]
  |  |  ------------------
  ------------------
  222|  5.37M|	}
  223|  10.5k|}
init_noise:
  427|  10.5k|{
  428|  10.5k|	int i, j;
  429|  10.5k|	int patt[4] = { -1,0,1,0 };
  430|       |
  431|  10.5k|	emms();
  432|       |
  433|  10.5k|	srand(123457);
  434|       |
  435|  43.0M|	for(i = 0, j = 0; i < MAX_NOISE; i++, j++)
  ------------------
  |  |   37|  43.0M|#define MAX_NOISE 4096
  ------------------
  |  Branch (435:20): [True: 43.0M, False: 10.5k]
  ------------------
  436|  43.0M|	{
  437|  43.0M|		double x1, x2, w, y1, y2;
  438|       |		
  439|  55.1M|		do {
  440|  55.1M|			x1 = 2.0 * rand() / (float) RAND_MAX - 1.0;
  441|  55.1M|			x2 = 2.0 * rand() / (float) RAND_MAX - 1.0;
  442|  55.1M|			w = x1 * x1 + x2 * x2;
  443|  55.1M|		} while (w >= 1.0);
  ------------------
  |  Branch (443:12): [True: 12.1M, False: 43.0M]
  ------------------
  444|       |		
  445|  43.0M|		w = sqrt((-2.0 * log(w)) / w);
  446|  43.0M|		y1 = x1 * w;
  447|  43.0M|		y2 = x1 * w;
  448|       |
  449|  43.0M|		y1 *= STRENGTH1 / sqrt(3.0);
  ------------------
  |  |  423|  43.0M|#define STRENGTH1 12
  ------------------
  450|  43.0M|		y2 *= STRENGTH2 / sqrt(3.0);
  ------------------
  |  |  424|  43.0M|#define STRENGTH2 8
  ------------------
  451|       |
  452|  43.0M|	    y1 /= 2;
  453|  43.0M|		y2 /= 2;
  454|  43.0M|	    y1 += patt[j%4] * STRENGTH1 * 0.35;
  ------------------
  |  |  423|  43.0M|#define STRENGTH1 12
  ------------------
  455|  43.0M|		y2 += patt[j%4] * STRENGTH2 * 0.35;
  ------------------
  |  |  424|  43.0M|#define STRENGTH2 8
  ------------------
  456|       |
  457|  43.0M|		if (y1 < -128) {
  ------------------
  |  Branch (457:7): [True: 0, False: 43.0M]
  ------------------
  458|      0|			y1=-128;
  459|      0|		}
  460|  43.0M|		else if (y1 > 127) {
  ------------------
  |  Branch (460:12): [True: 0, False: 43.0M]
  ------------------
  461|      0|			y1= 127;
  462|      0|		}
  463|       |
  464|  43.0M|		if (y2 < -128) {
  ------------------
  |  Branch (464:7): [True: 0, False: 43.0M]
  ------------------
  465|      0|			y2=-128;
  466|      0|		}
  467|  43.0M|		else if (y2 > 127) {
  ------------------
  |  Branch (467:12): [True: 0, False: 43.0M]
  ------------------
  468|      0|			y2= 127;
  469|      0|		}
  470|       |
  471|  43.0M|		y1 /= 3.0;
  472|  43.0M|		y2 /= 3.0;
  473|  43.0M|		tbls->xvid_noise1[i] = (int) y1;
  474|  43.0M|		tbls->xvid_noise2[i] = (int) y2;
  475|       |	
  476|  43.0M|		if (RAND_N(6) == 0) {
  ------------------
  |  |  422|  43.0M|#define RAND_N(range) ((int) ((double)range * rand() / (RAND_MAX + 1.0)))
  ------------------
  |  Branch (476:7): [True: 7.24M, False: 35.8M]
  ------------------
  477|  7.24M|			j--;
  478|  7.24M|		}
  479|  43.0M|	}
  480|       |	
  481|  32.3M|	for (i = 0; i < MAX_RES; i++)
  ------------------
  |  |   39|  32.3M|#define MAX_RES (MAX_NOISE - MAX_SHIFT)
  |  |  ------------------
  |  |  |  |   37|  32.3M|#define MAX_NOISE 4096
  |  |  ------------------
  |  |               #define MAX_RES (MAX_NOISE - MAX_SHIFT)
  |  |  ------------------
  |  |  |  |   38|  32.3M|#define MAX_SHIFT 1024
  |  |  ------------------
  ------------------
  |  Branch (481:14): [True: 32.2M, False: 10.5k]
  ------------------
  482|   129M|		for (j = 0; j < 3; j++) {
  ------------------
  |  Branch (482:15): [True: 96.8M, False: 32.2M]
  ------------------
  483|  96.8M|			tbls->xvid_prev_shift[i][j] = tbls->xvid_noise1 + (rand() & (MAX_SHIFT - 1));
  ------------------
  |  |   38|  96.8M|#define MAX_SHIFT 1024
  ------------------
  484|  96.8M|			tbls->xvid_prev_shift[i][3 + j] = tbls->xvid_noise2 + (rand() & (MAX_SHIFT - 1));
  ------------------
  |  |   38|  96.8M|#define MAX_SHIFT 1024
  ------------------
  485|  96.8M|		}
  486|  10.5k|}

xvid_Init_QP:
  412|      2|{
  413|       |#if defined (ARCH_IS_IA32) || defined (ARCH_IS_X86_64)
  414|       |	int i;
  415|       |
  416|       |	for(i=0; i<256; ++i) {
  417|       |		xvid_Expand_mmx[i][0] = i;
  418|       |		xvid_Expand_mmx[i][1] = i;
  419|       |		xvid_Expand_mmx[i][2] = i;
  420|       |		xvid_Expand_mmx[i][3] = i;
  421|       |	}
  422|       |#endif
  423|       |
  424|       |	/* Alternate way of filtering (cf. USE_TABLES flag in qpel_mmx.asm) */
  425|       |
  426|      2|	Init_FIR_Table(xvid_FIR_1_0_0_0,   -1,  0,  0,  0);
  427|      2|	Init_FIR_Table(xvid_FIR_3_1_0_0,    3, -1,  0,  0);
  428|      2|	Init_FIR_Table(xvid_FIR_6_3_1_0,   -6,  3, -1,  0);
  429|      2|	Init_FIR_Table(xvid_FIR_14_3_2_1,  14, -3,  2, -1);
  430|      2|	Init_FIR_Table(xvid_FIR_20_6_3_1,  20, -6,  3, -1);
  431|      2|	Init_FIR_Table(xvid_FIR_20_20_6_3, 20, 20, -6,  3);
  432|      2|	Init_FIR_Table(xvid_FIR_23_19_6_3, 23, 19, -6,  3);
  433|      2|	Init_FIR_Table(xvid_FIR_7_20_20_6, -7, 20, 20, -6);
  434|      2|	Init_FIR_Table(xvid_FIR_6_20_20_6, -6, 20, 20, -6);
  435|      2|	Init_FIR_Table(xvid_FIR_6_20_20_7, -6, 20, 20, -7);
  436|      2|	Init_FIR_Table(xvid_FIR_3_6_20_20,  3, -6, 20, 20);
  437|      2|	Init_FIR_Table(xvid_FIR_3_6_19_23,  3, -6, 19, 23);
  438|      2|	Init_FIR_Table(xvid_FIR_1_3_6_20,  -1,  3, -6, 20);
  439|      2|	Init_FIR_Table(xvid_FIR_1_2_3_14,  -1,  2, -3, 14);
  440|      2|	Init_FIR_Table(xvid_FIR_0_1_3_6,    0, -1,  3, -6);
  441|      2|	Init_FIR_Table(xvid_FIR_0_0_1_3,    0,  0, -1,  3);
  442|      2|	Init_FIR_Table(xvid_FIR_0_0_0_1,    0,  0,  0, -1);
  443|       |
  444|      2|}
qpel.c:H_Pass_16_C:
  617|  29.6k|{
  618|  29.6k|#if (SIZE==16)
  619|   521k|  while(H-->0) {
  ------------------
  |  Branch (619:9): [True: 491k, False: 29.6k]
  ------------------
  620|   491k|    int C;
  621|   491k|    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  622|   491k|    CLIP_STORE(Dst[ 0],C);
  ------------------
  |  |  612|   491k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.21k, False: 489k]
  |  |  |  Branch (612:28): [True: 7.95k, False: 481k]
  |  |  ------------------
  |  |  613|   491k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   491k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  623|   491k|    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  624|   491k|    CLIP_STORE(Dst[ 1],C);
  ------------------
  |  |  612|   491k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 3.65k, False: 488k]
  |  |  |  Branch (612:28): [True: 9.26k, False: 479k]
  |  |  ------------------
  |  |  613|   491k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   491k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  625|   491k|    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  626|   491k|    CLIP_STORE(Dst[ 2],C);
  ------------------
  |  |  612|   491k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 3.24k, False: 488k]
  |  |  |  Branch (612:28): [True: 7.53k, False: 481k]
  |  |  ------------------
  |  |  613|   491k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   491k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  627|   491k|    C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
  628|   491k|    CLIP_STORE(Dst[ 3],C);
  ------------------
  |  |  612|   491k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 4.26k, False: 487k]
  |  |  |  Branch (612:28): [True: 8.50k, False: 479k]
  |  |  ------------------
  |  |  613|   491k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   491k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  629|   491k|    C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
  630|   491k|    CLIP_STORE(Dst[ 4],C);
  ------------------
  |  |  612|   491k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 4.12k, False: 487k]
  |  |  |  Branch (612:28): [True: 8.53k, False: 479k]
  |  |  ------------------
  |  |  613|   491k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   491k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  631|   491k|    C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
  632|   491k|    CLIP_STORE(Dst[ 5],C);
  ------------------
  |  |  612|   491k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 3.94k, False: 488k]
  |  |  |  Branch (612:28): [True: 8.12k, False: 479k]
  |  |  ------------------
  |  |  613|   491k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   491k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  633|   491k|    C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
  634|   491k|    CLIP_STORE(Dst[ 6],C);
  ------------------
  |  |  612|   491k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 4.10k, False: 487k]
  |  |  |  Branch (612:28): [True: 8.00k, False: 479k]
  |  |  ------------------
  |  |  613|   491k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   491k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  635|   491k|    C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
  636|   491k|    CLIP_STORE(Dst[ 7],C);
  ------------------
  |  |  612|   491k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 4.45k, False: 487k]
  |  |  |  Branch (612:28): [True: 8.38k, False: 479k]
  |  |  ------------------
  |  |  613|   491k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   491k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  637|   491k|    C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
  638|   491k|    CLIP_STORE(Dst[ 8],C);
  ------------------
  |  |  612|   491k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 4.84k, False: 487k]
  |  |  |  Branch (612:28): [True: 7.55k, False: 479k]
  |  |  ------------------
  |  |  613|   491k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   491k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  639|   491k|    C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
  640|   491k|    CLIP_STORE(Dst[ 9],C);
  ------------------
  |  |  612|   491k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 4.82k, False: 487k]
  |  |  |  Branch (612:28): [True: 8.24k, False: 478k]
  |  |  ------------------
  |  |  613|   491k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   491k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  641|   491k|    C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
  642|   491k|    CLIP_STORE(Dst[10],C);
  ------------------
  |  |  612|   491k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 4.28k, False: 487k]
  |  |  |  Branch (612:28): [True: 8.19k, False: 479k]
  |  |  ------------------
  |  |  613|   491k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   491k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  643|   491k|    C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
  644|   491k|    CLIP_STORE(Dst[11],C);
  ------------------
  |  |  612|   491k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 4.09k, False: 487k]
  |  |  |  Branch (612:28): [True: 8.66k, False: 479k]
  |  |  ------------------
  |  |  613|   491k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   491k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  645|   491k|    C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
  646|   491k|    CLIP_STORE(Dst[12],C);
  ------------------
  |  |  612|   491k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 4.24k, False: 487k]
  |  |  |  Branch (612:28): [True: 8.47k, False: 479k]
  |  |  ------------------
  |  |  613|   491k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   491k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  647|   491k|    C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
  648|   491k|    CLIP_STORE(Dst[13],C);
  ------------------
  |  |  612|   491k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.67k, False: 489k]
  |  |  |  Branch (612:28): [True: 8.10k, False: 481k]
  |  |  ------------------
  |  |  613|   491k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   491k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  649|   491k|    C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
  650|   491k|    CLIP_STORE(Dst[14],C);
  ------------------
  |  |  612|   491k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 4.20k, False: 487k]
  |  |  |  Branch (612:28): [True: 8.41k, False: 479k]
  |  |  ------------------
  |  |  613|   491k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   491k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  651|   491k|    C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
  652|   491k|    CLIP_STORE(Dst[15],C);
  ------------------
  |  |  612|   491k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.19k, False: 489k]
  |  |  |  Branch (612:28): [True: 7.81k, False: 481k]
  |  |  ------------------
  |  |  613|   491k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   491k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  653|   491k|    Src += BpS;
  654|   491k|    Dst += BpS;
  655|   491k|  }
  656|       |#else
  657|       |  while(H-->0) {
  658|       |    int C;
  659|       |    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  660|       |    CLIP_STORE(Dst[0],C);
  661|       |    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  662|       |    CLIP_STORE(Dst[1],C);
  663|       |    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  664|       |    CLIP_STORE(Dst[2],C);
  665|       |    C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
  666|       |    CLIP_STORE(Dst[3],C);
  667|       |    C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
  668|       |    CLIP_STORE(Dst[4],C);
  669|       |    C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];
  670|       |    CLIP_STORE(Dst[5],C);
  671|       |    C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];
  672|       |    CLIP_STORE(Dst[6],C);
  673|       |    C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];
  674|       |    CLIP_STORE(Dst[7],C);
  675|       |    Src += BpS;
  676|       |    Dst += BpS;
  677|       |  }
  678|       |#endif
  679|  29.6k|}
qpel.c:H_Pass_Avrg_16_C:
  689|  34.6k|{
  690|  34.6k|#if (SIZE==16)
  691|   610k|  while(H-->0) {
  ------------------
  |  Branch (691:9): [True: 575k, False: 34.6k]
  ------------------
  692|   575k|    int C;
  693|   575k|    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  694|   575k|    CLIP_STORE(0,C);
  ------------------
  |  |  683|   575k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 1.75k, False: 573k]
  |  |  |  Branch (683:28): [True: 7.27k, False: 566k]
  |  |  ------------------
  |  |  684|   575k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   575k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   575k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  695|   575k|    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  696|   575k|    CLIP_STORE( 1,C);
  ------------------
  |  |  683|   575k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 3.26k, False: 572k]
  |  |  |  Branch (683:28): [True: 7.26k, False: 564k]
  |  |  ------------------
  |  |  684|   575k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   575k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   575k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  697|   575k|    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  698|   575k|    CLIP_STORE( 2,C);
  ------------------
  |  |  683|   575k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 3.06k, False: 572k]
  |  |  |  Branch (683:28): [True: 6.27k, False: 566k]
  |  |  ------------------
  |  |  684|   575k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   575k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   575k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  699|   575k|    C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
  700|   575k|    CLIP_STORE( 3,C);
  ------------------
  |  |  683|   575k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.97k, False: 572k]
  |  |  |  Branch (683:28): [True: 6.82k, False: 565k]
  |  |  ------------------
  |  |  684|   575k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   575k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   575k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  701|   575k|    C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
  702|   575k|    CLIP_STORE( 4,C);
  ------------------
  |  |  683|   575k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 3.76k, False: 571k]
  |  |  |  Branch (683:28): [True: 7.00k, False: 564k]
  |  |  ------------------
  |  |  684|   575k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   575k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   575k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  703|   575k|    C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
  704|   575k|    CLIP_STORE( 5,C);
  ------------------
  |  |  683|   575k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.90k, False: 572k]
  |  |  |  Branch (683:28): [True: 6.10k, False: 566k]
  |  |  ------------------
  |  |  684|   575k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   575k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   575k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  705|   575k|    C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
  706|   575k|    CLIP_STORE( 6,C);
  ------------------
  |  |  683|   575k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 3.91k, False: 571k]
  |  |  |  Branch (683:28): [True: 6.99k, False: 564k]
  |  |  ------------------
  |  |  684|   575k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   575k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   575k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  707|   575k|    C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
  708|   575k|    CLIP_STORE( 7,C);
  ------------------
  |  |  683|   575k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 3.47k, False: 572k]
  |  |  |  Branch (683:28): [True: 5.87k, False: 566k]
  |  |  ------------------
  |  |  684|   575k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   575k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   575k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  709|   575k|    C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
  710|   575k|    CLIP_STORE( 8,C);
  ------------------
  |  |  683|   575k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 4.98k, False: 570k]
  |  |  |  Branch (683:28): [True: 7.26k, False: 563k]
  |  |  ------------------
  |  |  684|   575k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   575k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   575k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  711|   575k|    C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
  712|   575k|    CLIP_STORE( 9,C);
  ------------------
  |  |  683|   575k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 3.82k, False: 571k]
  |  |  |  Branch (683:28): [True: 5.88k, False: 565k]
  |  |  ------------------
  |  |  684|   575k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   575k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   575k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  713|   575k|    C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
  714|   575k|    CLIP_STORE(10,C);
  ------------------
  |  |  683|   575k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 4.52k, False: 571k]
  |  |  |  Branch (683:28): [True: 7.07k, False: 563k]
  |  |  ------------------
  |  |  684|   575k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   575k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   575k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  715|   575k|    C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
  716|   575k|    CLIP_STORE(11,C);
  ------------------
  |  |  683|   575k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 3.09k, False: 572k]
  |  |  |  Branch (683:28): [True: 6.95k, False: 565k]
  |  |  ------------------
  |  |  684|   575k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   575k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   575k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  717|   575k|    C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
  718|   575k|    CLIP_STORE(12,C);
  ------------------
  |  |  683|   575k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 5.74k, False: 569k]
  |  |  |  Branch (683:28): [True: 6.92k, False: 562k]
  |  |  ------------------
  |  |  684|   575k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   575k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   575k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  719|   575k|    C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
  720|   575k|    CLIP_STORE(13,C);
  ------------------
  |  |  683|   575k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.31k, False: 573k]
  |  |  |  Branch (683:28): [True: 5.11k, False: 568k]
  |  |  ------------------
  |  |  684|   575k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   575k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   575k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  721|   575k|    C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
  722|   575k|    CLIP_STORE(14,C);
  ------------------
  |  |  683|   575k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 5.43k, False: 570k]
  |  |  |  Branch (683:28): [True: 7.14k, False: 562k]
  |  |  ------------------
  |  |  684|   575k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   575k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   575k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  723|   575k|    C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
  724|   575k|    CLIP_STORE(15,C);
  ------------------
  |  |  683|   575k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 1.87k, False: 573k]
  |  |  |  Branch (683:28): [True: 5.07k, False: 568k]
  |  |  ------------------
  |  |  684|   575k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   575k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   575k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  725|   575k|    Src += BpS;
  726|   575k|    Dst += BpS;
  727|   575k|  }
  728|       |#else
  729|       |  while(H-->0) {
  730|       |    int C;
  731|       |    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  732|       |    CLIP_STORE(0,C);
  733|       |    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  734|       |    CLIP_STORE(1,C);
  735|       |    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  736|       |    CLIP_STORE(2,C);
  737|       |    C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
  738|       |    CLIP_STORE(3,C);
  739|       |    C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
  740|       |    CLIP_STORE(4,C);
  741|       |    C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];
  742|       |    CLIP_STORE(5,C);
  743|       |    C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];
  744|       |    CLIP_STORE(6,C);
  745|       |    C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];
  746|       |    CLIP_STORE(7,C);
  747|       |    Src += BpS;
  748|       |    Dst += BpS;
  749|       |  }
  750|       |#endif
  751|  34.6k|}
qpel.c:H_Pass_Avrg_Up_16_C:
  761|  41.0k|{
  762|  41.0k|#if (SIZE==16)
  763|   719k|  while(H-->0) {
  ------------------
  |  Branch (763:9): [True: 678k, False: 41.0k]
  ------------------
  764|   678k|    int C;
  765|   678k|    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  766|   678k|    CLIP_STORE(0,C);
  ------------------
  |  |  755|   678k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.33k, False: 675k]
  |  |  |  Branch (755:28): [True: 6.36k, False: 669k]
  |  |  ------------------
  |  |  756|   678k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   678k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   678k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  767|   678k|    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  768|   678k|    CLIP_STORE( 1,C);
  ------------------
  |  |  755|   678k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 5.36k, False: 672k]
  |  |  |  Branch (755:28): [True: 10.9k, False: 661k]
  |  |  ------------------
  |  |  756|   678k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   678k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   678k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  769|   678k|    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  770|   678k|    CLIP_STORE( 2,C);
  ------------------
  |  |  755|   678k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 3.04k, False: 675k]
  |  |  |  Branch (755:28): [True: 6.56k, False: 668k]
  |  |  ------------------
  |  |  756|   678k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   678k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   678k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  771|   678k|    C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
  772|   678k|    CLIP_STORE( 3,C);
  ------------------
  |  |  755|   678k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 7.57k, False: 670k]
  |  |  |  Branch (755:28): [True: 8.78k, False: 661k]
  |  |  ------------------
  |  |  756|   678k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   678k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   678k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  773|   678k|    C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
  774|   678k|    CLIP_STORE( 4,C);
  ------------------
  |  |  755|   678k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.87k, False: 675k]
  |  |  |  Branch (755:28): [True: 8.98k, False: 666k]
  |  |  ------------------
  |  |  756|   678k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   678k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   678k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  775|   678k|    C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
  776|   678k|    CLIP_STORE( 5,C);
  ------------------
  |  |  755|   678k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 5.52k, False: 672k]
  |  |  |  Branch (755:28): [True: 9.07k, False: 663k]
  |  |  ------------------
  |  |  756|   678k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   678k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   678k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  777|   678k|    C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
  778|   678k|    CLIP_STORE( 6,C);
  ------------------
  |  |  755|   678k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 3.30k, False: 674k]
  |  |  |  Branch (755:28): [True: 7.41k, False: 667k]
  |  |  ------------------
  |  |  756|   678k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   678k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   678k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  779|   678k|    C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
  780|   678k|    CLIP_STORE( 7,C);
  ------------------
  |  |  755|   678k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 4.80k, False: 673k]
  |  |  |  Branch (755:28): [True: 9.48k, False: 663k]
  |  |  ------------------
  |  |  756|   678k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   678k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   678k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  781|   678k|    C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
  782|   678k|    CLIP_STORE( 8,C);
  ------------------
  |  |  755|   678k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 3.74k, False: 674k]
  |  |  |  Branch (755:28): [True: 7.16k, False: 667k]
  |  |  ------------------
  |  |  756|   678k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   678k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   678k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  783|   678k|    C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
  784|   678k|    CLIP_STORE( 9,C);
  ------------------
  |  |  755|   678k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 7.34k, False: 670k]
  |  |  |  Branch (755:28): [True: 10.5k, False: 660k]
  |  |  ------------------
  |  |  756|   678k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   678k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   678k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  785|   678k|    C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
  786|   678k|    CLIP_STORE(10,C);
  ------------------
  |  |  755|   678k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 3.56k, False: 674k]
  |  |  |  Branch (755:28): [True: 7.44k, False: 667k]
  |  |  ------------------
  |  |  756|   678k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   678k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   678k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  787|   678k|    C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
  788|   678k|    CLIP_STORE(11,C);
  ------------------
  |  |  755|   678k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 6.87k, False: 671k]
  |  |  |  Branch (755:28): [True: 10.2k, False: 661k]
  |  |  ------------------
  |  |  756|   678k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   678k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   678k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  789|   678k|    C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
  790|   678k|    CLIP_STORE(12,C);
  ------------------
  |  |  755|   678k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 3.20k, False: 674k]
  |  |  |  Branch (755:28): [True: 9.36k, False: 665k]
  |  |  ------------------
  |  |  756|   678k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   678k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   678k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  791|   678k|    C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
  792|   678k|    CLIP_STORE(13,C);
  ------------------
  |  |  755|   678k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 3.37k, False: 674k]
  |  |  |  Branch (755:28): [True: 10.1k, False: 664k]
  |  |  ------------------
  |  |  756|   678k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   678k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   678k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  793|   678k|    C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
  794|   678k|    CLIP_STORE(14,C);
  ------------------
  |  |  755|   678k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.98k, False: 675k]
  |  |  |  Branch (755:28): [True: 9.72k, False: 665k]
  |  |  ------------------
  |  |  756|   678k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   678k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   678k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  795|   678k|    C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
  796|   678k|    CLIP_STORE(15,C);
  ------------------
  |  |  755|   678k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.61k, False: 675k]
  |  |  |  Branch (755:28): [True: 9.85k, False: 665k]
  |  |  ------------------
  |  |  756|   678k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   678k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   678k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  797|   678k|    Src += BpS;
  798|   678k|    Dst += BpS;
  799|   678k|  }
  800|       |#else
  801|       |  while(H-->0) {
  802|       |    int C;
  803|       |    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  804|       |    CLIP_STORE(0,C);
  805|       |    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  806|       |    CLIP_STORE(1,C);
  807|       |    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  808|       |    CLIP_STORE(2,C);
  809|       |    C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
  810|       |    CLIP_STORE(3,C);
  811|       |    C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
  812|       |    CLIP_STORE(4,C);
  813|       |    C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];
  814|       |    CLIP_STORE(5,C);
  815|       |    C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];
  816|       |    CLIP_STORE(6,C);
  817|       |    C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];
  818|       |    CLIP_STORE(7,C);
  819|       |    Src += BpS;
  820|       |    Dst += BpS;
  821|       |  }
  822|       |#endif
  823|  41.0k|}
qpel.c:V_Pass_16_C:
  837|  25.1k|{
  838|  25.1k|#if (SIZE==16)
  839|   427k|  while(H-->0) {
  ------------------
  |  Branch (839:9): [True: 402k, False: 25.1k]
  ------------------
  840|   402k|    int C;
  841|   402k|    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  842|   402k|    CLIP_STORE(Dst[BpS* 0],C);
  ------------------
  |  |  832|   402k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 1.17k, False: 401k]
  |  |  |  Branch (832:28): [True: 2.23k, False: 399k]
  |  |  ------------------
  |  |  833|   402k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   402k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  843|   402k|    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  844|   402k|    CLIP_STORE(Dst[BpS* 1],C);
  ------------------
  |  |  832|   402k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.97k, False: 399k]
  |  |  |  Branch (832:28): [True: 2.34k, False: 397k]
  |  |  ------------------
  |  |  833|   402k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   402k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  845|   402k|    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  846|   402k|    CLIP_STORE(Dst[BpS* 2],C);
  ------------------
  |  |  832|   402k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 1.61k, False: 400k]
  |  |  |  Branch (832:28): [True: 2.09k, False: 398k]
  |  |  ------------------
  |  |  833|   402k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   402k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  847|   402k|    C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]);
  848|   402k|    CLIP_STORE(Dst[BpS* 3],C);
  ------------------
  |  |  832|   402k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 3.43k, False: 399k]
  |  |  |  Branch (832:28): [True: 2.58k, False: 396k]
  |  |  ------------------
  |  |  833|   402k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   402k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  849|   402k|    C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]);
  850|   402k|    CLIP_STORE(Dst[BpS* 4],C);
  ------------------
  |  |  832|   402k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.07k, False: 400k]
  |  |  |  Branch (832:28): [True: 2.68k, False: 397k]
  |  |  ------------------
  |  |  833|   402k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   402k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  851|   402k|    C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]);
  852|   402k|    CLIP_STORE(Dst[BpS* 5],C);
  ------------------
  |  |  832|   402k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.33k, False: 400k]
  |  |  |  Branch (832:28): [True: 2.70k, False: 397k]
  |  |  ------------------
  |  |  833|   402k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   402k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  853|   402k|    C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]);
  854|   402k|    CLIP_STORE(Dst[BpS* 6],C);
  ------------------
  |  |  832|   402k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.21k, False: 400k]
  |  |  |  Branch (832:28): [True: 2.82k, False: 397k]
  |  |  ------------------
  |  |  833|   402k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   402k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  855|   402k|    C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]);
  856|   402k|    CLIP_STORE(Dst[BpS* 7],C);
  ------------------
  |  |  832|   402k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 1.84k, False: 400k]
  |  |  |  Branch (832:28): [True: 2.95k, False: 397k]
  |  |  ------------------
  |  |  833|   402k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   402k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  857|   402k|    C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]);
  858|   402k|    CLIP_STORE(Dst[BpS* 8],C);
  ------------------
  |  |  832|   402k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.56k, False: 399k]
  |  |  |  Branch (832:28): [True: 3.69k, False: 396k]
  |  |  ------------------
  |  |  833|   402k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   402k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  859|   402k|    C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]);
  860|   402k|    CLIP_STORE(Dst[BpS* 9],C);
  ------------------
  |  |  832|   402k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.24k, False: 400k]
  |  |  |  Branch (832:28): [True: 2.94k, False: 397k]
  |  |  ------------------
  |  |  833|   402k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   402k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  861|   402k|    C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]);
  862|   402k|    CLIP_STORE(Dst[BpS*10],C);
  ------------------
  |  |  832|   402k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.69k, False: 399k]
  |  |  |  Branch (832:28): [True: 3.23k, False: 396k]
  |  |  ------------------
  |  |  833|   402k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   402k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  863|   402k|    C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]);
  864|   402k|    CLIP_STORE(Dst[BpS*11],C);
  ------------------
  |  |  832|   402k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.08k, False: 400k]
  |  |  |  Branch (832:28): [True: 3.44k, False: 397k]
  |  |  ------------------
  |  |  833|   402k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   402k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  865|   402k|    C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]);
  866|   402k|    CLIP_STORE(Dst[BpS*12],C);
  ------------------
  |  |  832|   402k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.27k, False: 400k]
  |  |  |  Branch (832:28): [True: 3.84k, False: 396k]
  |  |  ------------------
  |  |  833|   402k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   402k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  867|   402k|    C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16];
  868|   402k|    CLIP_STORE(Dst[BpS*13],C);
  ------------------
  |  |  832|   402k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.46k, False: 400k]
  |  |  |  Branch (832:28): [True: 2.49k, False: 397k]
  |  |  ------------------
  |  |  833|   402k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   402k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  869|   402k|    C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15];
  870|   402k|    CLIP_STORE(Dst[BpS*14],C);
  ------------------
  |  |  832|   402k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.96k, False: 399k]
  |  |  |  Branch (832:28): [True: 2.86k, False: 396k]
  |  |  ------------------
  |  |  833|   402k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   402k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  871|   402k|    C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16];
  872|   402k|    CLIP_STORE(Dst[BpS*15],C);
  ------------------
  |  |  832|   402k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.92k, False: 399k]
  |  |  |  Branch (832:28): [True: 2.54k, False: 397k]
  |  |  ------------------
  |  |  833|   402k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   402k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  873|   402k|    Src += 1;
  874|   402k|    Dst += 1;
  875|   402k|  }
  876|       |#else
  877|       |  while(H-->0) {
  878|       |    int C;
  879|       |    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  880|       |    CLIP_STORE(Dst[BpS*0],C);
  881|       |    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  882|       |    CLIP_STORE(Dst[BpS*1],C);
  883|       |    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  884|       |    CLIP_STORE(Dst[BpS*2],C);
  885|       |    C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]);
  886|       |    CLIP_STORE(Dst[BpS*3],C);
  887|       |    C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]);
  888|       |    CLIP_STORE(Dst[BpS*4],C);
  889|       |    C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8];
  890|       |    CLIP_STORE(Dst[BpS*5],C);
  891|       |    C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7];
  892|       |    CLIP_STORE(Dst[BpS*6],C);
  893|       |    C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8];
  894|       |    CLIP_STORE(Dst[BpS*7],C);
  895|       |    Src += 1;
  896|       |    Dst += 1;
  897|       |  }
  898|       |#endif
  899|  25.1k|}
qpel.c:V_Pass_Avrg_16_C:
  909|  26.8k|{
  910|  26.8k|#if (SIZE==16)
  911|   457k|  while(H-->0) {
  ------------------
  |  Branch (911:9): [True: 430k, False: 26.8k]
  ------------------
  912|   430k|    int C;
  913|   430k|    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  914|   430k|    CLIP_STORE(0,C);
  ------------------
  |  |  903|   430k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.39k, False: 428k]
  |  |  |  Branch (903:28): [True: 2.75k, False: 426k]
  |  |  ------------------
  |  |  904|   430k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   430k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   430k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  915|   430k|    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  916|   430k|    CLIP_STORE( 1,C);
  ------------------
  |  |  903|   430k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.75k, False: 428k]
  |  |  |  Branch (903:28): [True: 3.00k, False: 425k]
  |  |  ------------------
  |  |  904|   430k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   430k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   430k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  917|   430k|    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  918|   430k|    CLIP_STORE( 2,C);
  ------------------
  |  |  903|   430k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.75k, False: 428k]
  |  |  |  Branch (903:28): [True: 2.87k, False: 425k]
  |  |  ------------------
  |  |  904|   430k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   430k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   430k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  919|   430k|    C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]);
  920|   430k|    CLIP_STORE( 3,C);
  ------------------
  |  |  903|   430k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 2.29k, False: 428k]
  |  |  |  Branch (903:28): [True: 3.19k, False: 424k]
  |  |  ------------------
  |  |  904|   430k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   430k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   430k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  921|   430k|    C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]);
  922|   430k|    CLIP_STORE( 4,C);
  ------------------
  |  |  903|   430k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 2.45k, False: 427k]
  |  |  |  Branch (903:28): [True: 3.07k, False: 424k]
  |  |  ------------------
  |  |  904|   430k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   430k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   430k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  923|   430k|    C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]);
  924|   430k|    CLIP_STORE( 5,C);
  ------------------
  |  |  903|   430k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.92k, False: 428k]
  |  |  |  Branch (903:28): [True: 3.01k, False: 425k]
  |  |  ------------------
  |  |  904|   430k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   430k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   430k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  925|   430k|    C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]);
  926|   430k|    CLIP_STORE( 6,C);
  ------------------
  |  |  903|   430k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 2.55k, False: 427k]
  |  |  |  Branch (903:28): [True: 3.39k, False: 424k]
  |  |  ------------------
  |  |  904|   430k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   430k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   430k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  927|   430k|    C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]);
  928|   430k|    CLIP_STORE( 7,C);
  ------------------
  |  |  903|   430k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.55k, False: 428k]
  |  |  |  Branch (903:28): [True: 2.90k, False: 425k]
  |  |  ------------------
  |  |  904|   430k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   430k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   430k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  929|   430k|    C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]);
  930|   430k|    CLIP_STORE( 8,C);
  ------------------
  |  |  903|   430k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 2.17k, False: 428k]
  |  |  |  Branch (903:28): [True: 4.02k, False: 424k]
  |  |  ------------------
  |  |  904|   430k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   430k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   430k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  931|   430k|    C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]);
  932|   430k|    CLIP_STORE( 9,C);
  ------------------
  |  |  903|   430k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.75k, False: 428k]
  |  |  |  Branch (903:28): [True: 2.84k, False: 425k]
  |  |  ------------------
  |  |  904|   430k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   430k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   430k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  933|   430k|    C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]);
  934|   430k|    CLIP_STORE(10,C);
  ------------------
  |  |  903|   430k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 3.14k, False: 427k]
  |  |  |  Branch (903:28): [True: 3.71k, False: 423k]
  |  |  ------------------
  |  |  904|   430k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   430k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   430k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  935|   430k|    C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]);
  936|   430k|    CLIP_STORE(11,C);
  ------------------
  |  |  903|   430k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.72k, False: 428k]
  |  |  |  Branch (903:28): [True: 3.45k, False: 425k]
  |  |  ------------------
  |  |  904|   430k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   430k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   430k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  937|   430k|    C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]);
  938|   430k|    CLIP_STORE(12,C);
  ------------------
  |  |  903|   430k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 5.47k, False: 424k]
  |  |  |  Branch (903:28): [True: 3.42k, False: 421k]
  |  |  ------------------
  |  |  904|   430k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   430k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   430k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  939|   430k|    C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16];
  940|   430k|    CLIP_STORE(13,C);
  ------------------
  |  |  903|   430k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.66k, False: 428k]
  |  |  |  Branch (903:28): [True: 2.15k, False: 426k]
  |  |  ------------------
  |  |  904|   430k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   430k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   430k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  941|   430k|    C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15];
  942|   430k|    CLIP_STORE(14,C);
  ------------------
  |  |  903|   430k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 5.61k, False: 424k]
  |  |  |  Branch (903:28): [True: 3.53k, False: 421k]
  |  |  ------------------
  |  |  904|   430k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   430k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   430k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  943|   430k|    C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16];
  944|   430k|    CLIP_STORE(15,C);
  ------------------
  |  |  903|   430k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.81k, False: 428k]
  |  |  |  Branch (903:28): [True: 2.18k, False: 426k]
  |  |  ------------------
  |  |  904|   430k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   430k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   430k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  945|   430k|    Src += 1;
  946|   430k|    Dst += 1;
  947|   430k|  }
  948|       |#else
  949|       |  while(H-->0) {
  950|       |    int C;
  951|       |    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  952|       |    CLIP_STORE(0,C);
  953|       |    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  954|       |    CLIP_STORE(1,C);
  955|       |    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  956|       |    CLIP_STORE(2,C);
  957|       |    C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]);
  958|       |    CLIP_STORE(3,C);
  959|       |    C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]);
  960|       |    CLIP_STORE(4,C);
  961|       |    C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8];
  962|       |    CLIP_STORE(5,C);
  963|       |    C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7];
  964|       |    CLIP_STORE(6,C);
  965|       |    C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8];
  966|       |    CLIP_STORE(7,C);
  967|       |    Src += 1;
  968|       |    Dst += 1;
  969|       |  }
  970|       |#endif
  971|  26.8k|}
qpel.c:V_Pass_Avrg_Up_16_C:
  981|  33.7k|{
  982|  33.7k|#if (SIZE==16)
  983|   573k|  while(H-->0) {
  ------------------
  |  Branch (983:9): [True: 540k, False: 33.7k]
  ------------------
  984|   540k|    int C;
  985|   540k|    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  986|   540k|    CLIP_STORE(0,C);
  ------------------
  |  |  975|   540k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 1.46k, False: 538k]
  |  |  |  Branch (975:28): [True: 2.57k, False: 535k]
  |  |  ------------------
  |  |  976|   540k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   540k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   540k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  987|   540k|    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  988|   540k|    CLIP_STORE( 1,C);
  ------------------
  |  |  975|   540k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 6.40k, False: 533k]
  |  |  |  Branch (975:28): [True: 3.30k, False: 530k]
  |  |  ------------------
  |  |  976|   540k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   540k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   540k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  989|   540k|    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  990|   540k|    CLIP_STORE( 2,C);
  ------------------
  |  |  975|   540k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 1.86k, False: 538k]
  |  |  |  Branch (975:28): [True: 2.52k, False: 535k]
  |  |  ------------------
  |  |  976|   540k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   540k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   540k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  991|   540k|    C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]);
  992|   540k|    CLIP_STORE( 3,C);
  ------------------
  |  |  975|   540k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 7.09k, False: 532k]
  |  |  |  Branch (975:28): [True: 3.13k, False: 529k]
  |  |  ------------------
  |  |  976|   540k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   540k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   540k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  993|   540k|    C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]);
  994|   540k|    CLIP_STORE( 4,C);
  ------------------
  |  |  975|   540k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.81k, False: 537k]
  |  |  |  Branch (975:28): [True: 3.56k, False: 533k]
  |  |  ------------------
  |  |  976|   540k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   540k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   540k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  995|   540k|    C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]);
  996|   540k|    CLIP_STORE( 5,C);
  ------------------
  |  |  975|   540k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.96k, False: 537k]
  |  |  |  Branch (975:28): [True: 3.72k, False: 533k]
  |  |  ------------------
  |  |  976|   540k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   540k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   540k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  997|   540k|    C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]);
  998|   540k|    CLIP_STORE( 6,C);
  ------------------
  |  |  975|   540k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.84k, False: 537k]
  |  |  |  Branch (975:28): [True: 3.05k, False: 534k]
  |  |  ------------------
  |  |  976|   540k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   540k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   540k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  999|   540k|    C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]);
 1000|   540k|    CLIP_STORE( 7,C);
  ------------------
  |  |  975|   540k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 3.41k, False: 536k]
  |  |  |  Branch (975:28): [True: 3.80k, False: 532k]
  |  |  ------------------
  |  |  976|   540k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   540k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   540k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1001|   540k|    C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]);
 1002|   540k|    CLIP_STORE( 8,C);
  ------------------
  |  |  975|   540k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 1.51k, False: 538k]
  |  |  |  Branch (975:28): [True: 3.43k, False: 535k]
  |  |  ------------------
  |  |  976|   540k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   540k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   540k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1003|   540k|    C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]);
 1004|   540k|    CLIP_STORE( 9,C);
  ------------------
  |  |  975|   540k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.98k, False: 537k]
  |  |  |  Branch (975:28): [True: 4.27k, False: 532k]
  |  |  ------------------
  |  |  976|   540k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   540k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   540k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1005|   540k|    C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]);
 1006|   540k|    CLIP_STORE(10,C);
  ------------------
  |  |  975|   540k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.26k, False: 537k]
  |  |  |  Branch (975:28): [True: 3.00k, False: 534k]
  |  |  ------------------
  |  |  976|   540k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   540k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   540k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1007|   540k|    C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]);
 1008|   540k|    CLIP_STORE(11,C);
  ------------------
  |  |  975|   540k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 3.43k, False: 536k]
  |  |  |  Branch (975:28): [True: 4.38k, False: 532k]
  |  |  ------------------
  |  |  976|   540k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   540k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   540k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1009|   540k|    C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]);
 1010|   540k|    CLIP_STORE(12,C);
  ------------------
  |  |  975|   540k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.04k, False: 537k]
  |  |  |  Branch (975:28): [True: 3.84k, False: 534k]
  |  |  ------------------
  |  |  976|   540k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   540k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   540k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1011|   540k|    C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16];
 1012|   540k|    CLIP_STORE(13,C);
  ------------------
  |  |  975|   540k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.65k, False: 537k]
  |  |  |  Branch (975:28): [True: 3.98k, False: 533k]
  |  |  ------------------
  |  |  976|   540k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   540k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   540k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1013|   540k|    C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15];
 1014|   540k|    CLIP_STORE(14,C);
  ------------------
  |  |  975|   540k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 1.92k, False: 538k]
  |  |  |  Branch (975:28): [True: 4.11k, False: 533k]
  |  |  ------------------
  |  |  976|   540k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   540k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   540k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1015|   540k|    C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16];
 1016|   540k|    CLIP_STORE(15,C);
  ------------------
  |  |  975|   540k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.05k, False: 537k]
  |  |  |  Branch (975:28): [True: 4.02k, False: 533k]
  |  |  ------------------
  |  |  976|   540k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   540k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   540k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1017|   540k|    Src += 1;
 1018|   540k|    Dst += 1;
 1019|   540k|  }
 1020|       |#else
 1021|       |  while(H-->0) {
 1022|       |    int C;
 1023|       |    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
 1024|       |    CLIP_STORE(0,C);
 1025|       |    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
 1026|       |    CLIP_STORE(1,C);
 1027|       |    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
 1028|       |    CLIP_STORE(2,C);
 1029|       |    C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]);
 1030|       |    CLIP_STORE(3,C);
 1031|       |    C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]);
 1032|       |    CLIP_STORE(4,C);
 1033|       |    C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8];
 1034|       |    CLIP_STORE(5,C);
 1035|       |    C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7];
 1036|       |    CLIP_STORE(6,C);
 1037|       |    C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8];
 1038|       |    CLIP_STORE(7,C);
 1039|       |    Src += 1;
 1040|       |    Dst += 1;
 1041|       |  }
 1042|       |#endif
 1043|  33.7k|}
qpel.c:H_Pass_8_C:
  617|  60.5k|{
  618|       |#if (SIZE==16)
  619|       |  while(H-->0) {
  620|       |    int C;
  621|       |    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  622|       |    CLIP_STORE(Dst[ 0],C);
  623|       |    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  624|       |    CLIP_STORE(Dst[ 1],C);
  625|       |    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  626|       |    CLIP_STORE(Dst[ 2],C);
  627|       |    C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
  628|       |    CLIP_STORE(Dst[ 3],C);
  629|       |    C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
  630|       |    CLIP_STORE(Dst[ 4],C);
  631|       |    C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
  632|       |    CLIP_STORE(Dst[ 5],C);
  633|       |    C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
  634|       |    CLIP_STORE(Dst[ 6],C);
  635|       |    C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
  636|       |    CLIP_STORE(Dst[ 7],C);
  637|       |    C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
  638|       |    CLIP_STORE(Dst[ 8],C);
  639|       |    C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
  640|       |    CLIP_STORE(Dst[ 9],C);
  641|       |    C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
  642|       |    CLIP_STORE(Dst[10],C);
  643|       |    C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
  644|       |    CLIP_STORE(Dst[11],C);
  645|       |    C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
  646|       |    CLIP_STORE(Dst[12],C);
  647|       |    C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
  648|       |    CLIP_STORE(Dst[13],C);
  649|       |    C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
  650|       |    CLIP_STORE(Dst[14],C);
  651|       |    C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
  652|       |    CLIP_STORE(Dst[15],C);
  653|       |    Src += BpS;
  654|       |    Dst += BpS;
  655|       |  }
  656|       |#else
  657|   578k|  while(H-->0) {
  ------------------
  |  Branch (657:9): [True: 517k, False: 60.5k]
  ------------------
  658|   517k|    int C;
  659|   517k|    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  660|   517k|    CLIP_STORE(Dst[0],C);
  ------------------
  |  |  612|   517k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.55k, False: 515k]
  |  |  |  Branch (612:28): [True: 6.46k, False: 508k]
  |  |  ------------------
  |  |  613|   517k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   517k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  661|   517k|    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  662|   517k|    CLIP_STORE(Dst[1],C);
  ------------------
  |  |  612|   517k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 3.36k, False: 514k]
  |  |  |  Branch (612:28): [True: 7.72k, False: 506k]
  |  |  ------------------
  |  |  613|   517k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   517k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  663|   517k|    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  664|   517k|    CLIP_STORE(Dst[2],C);
  ------------------
  |  |  612|   517k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.97k, False: 514k]
  |  |  |  Branch (612:28): [True: 6.81k, False: 508k]
  |  |  ------------------
  |  |  613|   517k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   517k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  665|   517k|    C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
  666|   517k|    CLIP_STORE(Dst[3],C);
  ------------------
  |  |  612|   517k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 4.32k, False: 513k]
  |  |  |  Branch (612:28): [True: 7.14k, False: 506k]
  |  |  ------------------
  |  |  613|   517k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   517k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  667|   517k|    C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
  668|   517k|    CLIP_STORE(Dst[4],C);
  ------------------
  |  |  612|   517k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 3.54k, False: 514k]
  |  |  |  Branch (612:28): [True: 7.37k, False: 507k]
  |  |  ------------------
  |  |  613|   517k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   517k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  669|   517k|    C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];
  670|   517k|    CLIP_STORE(Dst[5],C);
  ------------------
  |  |  612|   517k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.84k, False: 515k]
  |  |  |  Branch (612:28): [True: 7.20k, False: 507k]
  |  |  ------------------
  |  |  613|   517k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   517k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  671|   517k|    C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];
  672|   517k|    CLIP_STORE(Dst[6],C);
  ------------------
  |  |  612|   517k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 3.78k, False: 514k]
  |  |  |  Branch (612:28): [True: 7.51k, False: 506k]
  |  |  ------------------
  |  |  613|   517k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   517k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  673|   517k|    C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];
  674|   517k|    CLIP_STORE(Dst[7],C);
  ------------------
  |  |  612|   517k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.41k, False: 515k]
  |  |  |  Branch (612:28): [True: 6.77k, False: 508k]
  |  |  ------------------
  |  |  613|   517k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   517k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  675|   517k|    Src += BpS;
  676|   517k|    Dst += BpS;
  677|   517k|  }
  678|  60.5k|#endif
  679|  60.5k|}
qpel.c:H_Pass_Avrg_8_C:
  689|   106k|{
  690|       |#if (SIZE==16)
  691|       |  while(H-->0) {
  692|       |    int C;
  693|       |    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  694|       |    CLIP_STORE(0,C);
  695|       |    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  696|       |    CLIP_STORE( 1,C);
  697|       |    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  698|       |    CLIP_STORE( 2,C);
  699|       |    C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
  700|       |    CLIP_STORE( 3,C);
  701|       |    C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
  702|       |    CLIP_STORE( 4,C);
  703|       |    C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
  704|       |    CLIP_STORE( 5,C);
  705|       |    C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
  706|       |    CLIP_STORE( 6,C);
  707|       |    C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
  708|       |    CLIP_STORE( 7,C);
  709|       |    C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
  710|       |    CLIP_STORE( 8,C);
  711|       |    C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
  712|       |    CLIP_STORE( 9,C);
  713|       |    C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
  714|       |    CLIP_STORE(10,C);
  715|       |    C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
  716|       |    CLIP_STORE(11,C);
  717|       |    C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
  718|       |    CLIP_STORE(12,C);
  719|       |    C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
  720|       |    CLIP_STORE(13,C);
  721|       |    C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
  722|       |    CLIP_STORE(14,C);
  723|       |    C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
  724|       |    CLIP_STORE(15,C);
  725|       |    Src += BpS;
  726|       |    Dst += BpS;
  727|       |  }
  728|       |#else
  729|  1.01M|  while(H-->0) {
  ------------------
  |  Branch (729:9): [True: 911k, False: 106k]
  ------------------
  730|   911k|    int C;
  731|   911k|    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  732|   911k|    CLIP_STORE(0,C);
  ------------------
  |  |  683|   911k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 3.73k, False: 907k]
  |  |  |  Branch (683:28): [True: 13.7k, False: 894k]
  |  |  ------------------
  |  |  684|   911k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   911k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|   911k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  733|   911k|    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  734|   911k|    CLIP_STORE(1,C);
  ------------------
  |  |  683|   911k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 4.63k, False: 907k]
  |  |  |  Branch (683:28): [True: 13.7k, False: 893k]
  |  |  ------------------
  |  |  684|   911k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   911k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|   911k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  735|   911k|    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  736|   911k|    CLIP_STORE(2,C);
  ------------------
  |  |  683|   911k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 4.84k, False: 906k]
  |  |  |  Branch (683:28): [True: 12.8k, False: 893k]
  |  |  ------------------
  |  |  684|   911k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   911k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|   911k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  737|   911k|    C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
  738|   911k|    CLIP_STORE(3,C);
  ------------------
  |  |  683|   911k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 5.01k, False: 906k]
  |  |  |  Branch (683:28): [True: 13.4k, False: 893k]
  |  |  ------------------
  |  |  684|   911k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   911k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|   911k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  739|   911k|    C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
  740|   911k|    CLIP_STORE(4,C);
  ------------------
  |  |  683|   911k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 8.12k, False: 903k]
  |  |  |  Branch (683:28): [True: 13.6k, False: 889k]
  |  |  ------------------
  |  |  684|   911k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   911k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|   911k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  741|   911k|    C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];
  742|   911k|    CLIP_STORE(5,C);
  ------------------
  |  |  683|   911k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 4.19k, False: 907k]
  |  |  |  Branch (683:28): [True: 11.1k, False: 896k]
  |  |  ------------------
  |  |  684|   911k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   911k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|   911k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  743|   911k|    C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];
  744|   911k|    CLIP_STORE(6,C);
  ------------------
  |  |  683|   911k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 8.38k, False: 903k]
  |  |  |  Branch (683:28): [True: 14.1k, False: 889k]
  |  |  ------------------
  |  |  684|   911k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   911k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|   911k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  745|   911k|    C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];
  746|   911k|    CLIP_STORE(7,C);
  ------------------
  |  |  683|   911k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 3.11k, False: 908k]
  |  |  |  Branch (683:28): [True: 11.3k, False: 897k]
  |  |  ------------------
  |  |  684|   911k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   911k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|   911k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  747|   911k|    Src += BpS;
  748|   911k|    Dst += BpS;
  749|   911k|  }
  750|   106k|#endif
  751|   106k|}
qpel.c:H_Pass_Avrg_Up_8_C:
  761|   131k|{
  762|       |#if (SIZE==16)
  763|       |  while(H-->0) {
  764|       |    int C;
  765|       |    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  766|       |    CLIP_STORE(0,C);
  767|       |    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  768|       |    CLIP_STORE( 1,C);
  769|       |    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  770|       |    CLIP_STORE( 2,C);
  771|       |    C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
  772|       |    CLIP_STORE( 3,C);
  773|       |    C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
  774|       |    CLIP_STORE( 4,C);
  775|       |    C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
  776|       |    CLIP_STORE( 5,C);
  777|       |    C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
  778|       |    CLIP_STORE( 6,C);
  779|       |    C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
  780|       |    CLIP_STORE( 7,C);
  781|       |    C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
  782|       |    CLIP_STORE( 8,C);
  783|       |    C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
  784|       |    CLIP_STORE( 9,C);
  785|       |    C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
  786|       |    CLIP_STORE(10,C);
  787|       |    C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
  788|       |    CLIP_STORE(11,C);
  789|       |    C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
  790|       |    CLIP_STORE(12,C);
  791|       |    C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
  792|       |    CLIP_STORE(13,C);
  793|       |    C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
  794|       |    CLIP_STORE(14,C);
  795|       |    C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
  796|       |    CLIP_STORE(15,C);
  797|       |    Src += BpS;
  798|       |    Dst += BpS;
  799|       |  }
  800|       |#else
  801|  1.26M|  while(H-->0) {
  ------------------
  |  Branch (801:9): [True: 1.13M, False: 131k]
  ------------------
  802|  1.13M|    int C;
  803|  1.13M|    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  804|  1.13M|    CLIP_STORE(0,C);
  ------------------
  |  |  755|  1.13M|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 3.41k, False: 1.13M]
  |  |  |  Branch (755:28): [True: 11.9k, False: 1.12M]
  |  |  ------------------
  |  |  756|  1.13M|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|  1.13M|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|  1.13M|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  805|  1.13M|    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  806|  1.13M|    CLIP_STORE(1,C);
  ------------------
  |  |  755|  1.13M|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 8.75k, False: 1.12M]
  |  |  |  Branch (755:28): [True: 16.7k, False: 1.11M]
  |  |  ------------------
  |  |  756|  1.13M|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|  1.13M|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|  1.13M|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  807|  1.13M|    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  808|  1.13M|    CLIP_STORE(2,C);
  ------------------
  |  |  755|  1.13M|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 4.20k, False: 1.13M]
  |  |  |  Branch (755:28): [True: 12.4k, False: 1.12M]
  |  |  ------------------
  |  |  756|  1.13M|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|  1.13M|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|  1.13M|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  809|  1.13M|    C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
  810|  1.13M|    CLIP_STORE(3,C);
  ------------------
  |  |  755|  1.13M|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 8.45k, False: 1.12M]
  |  |  |  Branch (755:28): [True: 16.1k, False: 1.11M]
  |  |  ------------------
  |  |  756|  1.13M|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|  1.13M|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|  1.13M|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  811|  1.13M|    C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
  812|  1.13M|    CLIP_STORE(4,C);
  ------------------
  |  |  755|  1.13M|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 5.75k, False: 1.13M]
  |  |  |  Branch (755:28): [True: 15.1k, False: 1.11M]
  |  |  ------------------
  |  |  756|  1.13M|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|  1.13M|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|  1.13M|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  813|  1.13M|    C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];
  814|  1.13M|    CLIP_STORE(5,C);
  ------------------
  |  |  755|  1.13M|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 4.70k, False: 1.13M]
  |  |  |  Branch (755:28): [True: 15.6k, False: 1.11M]
  |  |  ------------------
  |  |  756|  1.13M|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|  1.13M|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|  1.13M|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  815|  1.13M|    C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];
  816|  1.13M|    CLIP_STORE(6,C);
  ------------------
  |  |  755|  1.13M|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 5.20k, False: 1.13M]
  |  |  |  Branch (755:28): [True: 15.0k, False: 1.11M]
  |  |  ------------------
  |  |  756|  1.13M|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|  1.13M|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|  1.13M|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  817|  1.13M|    C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];
  818|  1.13M|    CLIP_STORE(7,C);
  ------------------
  |  |  755|  1.13M|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 4.45k, False: 1.13M]
  |  |  |  Branch (755:28): [True: 15.1k, False: 1.11M]
  |  |  ------------------
  |  |  756|  1.13M|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|  1.13M|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|  1.13M|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  819|  1.13M|    Src += BpS;
  820|  1.13M|    Dst += BpS;
  821|  1.13M|  }
  822|   131k|#endif
  823|   131k|}
qpel.c:V_Pass_8_C:
  837|  55.9k|{
  838|       |#if (SIZE==16)
  839|       |  while(H-->0) {
  840|       |    int C;
  841|       |    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  842|       |    CLIP_STORE(Dst[BpS* 0],C);
  843|       |    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  844|       |    CLIP_STORE(Dst[BpS* 1],C);
  845|       |    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  846|       |    CLIP_STORE(Dst[BpS* 2],C);
  847|       |    C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]);
  848|       |    CLIP_STORE(Dst[BpS* 3],C);
  849|       |    C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]);
  850|       |    CLIP_STORE(Dst[BpS* 4],C);
  851|       |    C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]);
  852|       |    CLIP_STORE(Dst[BpS* 5],C);
  853|       |    C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]);
  854|       |    CLIP_STORE(Dst[BpS* 6],C);
  855|       |    C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]);
  856|       |    CLIP_STORE(Dst[BpS* 7],C);
  857|       |    C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]);
  858|       |    CLIP_STORE(Dst[BpS* 8],C);
  859|       |    C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]);
  860|       |    CLIP_STORE(Dst[BpS* 9],C);
  861|       |    C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]);
  862|       |    CLIP_STORE(Dst[BpS*10],C);
  863|       |    C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]);
  864|       |    CLIP_STORE(Dst[BpS*11],C);
  865|       |    C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]);
  866|       |    CLIP_STORE(Dst[BpS*12],C);
  867|       |    C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16];
  868|       |    CLIP_STORE(Dst[BpS*13],C);
  869|       |    C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15];
  870|       |    CLIP_STORE(Dst[BpS*14],C);
  871|       |    C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16];
  872|       |    CLIP_STORE(Dst[BpS*15],C);
  873|       |    Src += 1;
  874|       |    Dst += 1;
  875|       |  }
  876|       |#else
  877|   503k|  while(H-->0) {
  ------------------
  |  Branch (877:9): [True: 447k, False: 55.9k]
  ------------------
  878|   447k|    int C;
  879|   447k|    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  880|   447k|    CLIP_STORE(Dst[BpS*0],C);
  ------------------
  |  |  832|   447k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.21k, False: 445k]
  |  |  |  Branch (832:28): [True: 4.05k, False: 441k]
  |  |  ------------------
  |  |  833|   447k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   447k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  881|   447k|    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  882|   447k|    CLIP_STORE(Dst[BpS*1],C);
  ------------------
  |  |  832|   447k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.69k, False: 444k]
  |  |  |  Branch (832:28): [True: 3.66k, False: 441k]
  |  |  ------------------
  |  |  833|   447k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   447k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  883|   447k|    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  884|   447k|    CLIP_STORE(Dst[BpS*2],C);
  ------------------
  |  |  832|   447k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 4.11k, False: 443k]
  |  |  |  Branch (832:28): [True: 3.30k, False: 440k]
  |  |  ------------------
  |  |  833|   447k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   447k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  885|   447k|    C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]);
  886|   447k|    CLIP_STORE(Dst[BpS*3],C);
  ------------------
  |  |  832|   447k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 3.62k, False: 443k]
  |  |  |  Branch (832:28): [True: 3.35k, False: 440k]
  |  |  ------------------
  |  |  833|   447k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   447k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  887|   447k|    C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]);
  888|   447k|    CLIP_STORE(Dst[BpS*4],C);
  ------------------
  |  |  832|   447k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 3.77k, False: 443k]
  |  |  |  Branch (832:28): [True: 4.13k, False: 439k]
  |  |  ------------------
  |  |  833|   447k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   447k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  889|   447k|    C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8];
  890|   447k|    CLIP_STORE(Dst[BpS*5],C);
  ------------------
  |  |  832|   447k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 3.14k, False: 444k]
  |  |  |  Branch (832:28): [True: 3.69k, False: 440k]
  |  |  ------------------
  |  |  833|   447k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   447k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  891|   447k|    C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7];
  892|   447k|    CLIP_STORE(Dst[BpS*6],C);
  ------------------
  |  |  832|   447k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 3.57k, False: 444k]
  |  |  |  Branch (832:28): [True: 3.77k, False: 440k]
  |  |  ------------------
  |  |  833|   447k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   447k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  893|   447k|    C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8];
  894|   447k|    CLIP_STORE(Dst[BpS*7],C);
  ------------------
  |  |  832|   447k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.87k, False: 444k]
  |  |  |  Branch (832:28): [True: 3.64k, False: 441k]
  |  |  ------------------
  |  |  833|   447k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   447k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  895|   447k|    Src += 1;
  896|   447k|    Dst += 1;
  897|   447k|  }
  898|  55.9k|#endif
  899|  55.9k|}
qpel.c:V_Pass_Avrg_8_C:
  909|  64.2k|{
  910|       |#if (SIZE==16)
  911|       |  while(H-->0) {
  912|       |    int C;
  913|       |    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  914|       |    CLIP_STORE(0,C);
  915|       |    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  916|       |    CLIP_STORE( 1,C);
  917|       |    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  918|       |    CLIP_STORE( 2,C);
  919|       |    C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]);
  920|       |    CLIP_STORE( 3,C);
  921|       |    C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]);
  922|       |    CLIP_STORE( 4,C);
  923|       |    C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]);
  924|       |    CLIP_STORE( 5,C);
  925|       |    C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]);
  926|       |    CLIP_STORE( 6,C);
  927|       |    C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]);
  928|       |    CLIP_STORE( 7,C);
  929|       |    C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]);
  930|       |    CLIP_STORE( 8,C);
  931|       |    C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]);
  932|       |    CLIP_STORE( 9,C);
  933|       |    C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]);
  934|       |    CLIP_STORE(10,C);
  935|       |    C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]);
  936|       |    CLIP_STORE(11,C);
  937|       |    C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]);
  938|       |    CLIP_STORE(12,C);
  939|       |    C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16];
  940|       |    CLIP_STORE(13,C);
  941|       |    C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15];
  942|       |    CLIP_STORE(14,C);
  943|       |    C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16];
  944|       |    CLIP_STORE(15,C);
  945|       |    Src += 1;
  946|       |    Dst += 1;
  947|       |  }
  948|       |#else
  949|   578k|  while(H-->0) {
  ------------------
  |  Branch (949:9): [True: 514k, False: 64.2k]
  ------------------
  950|   514k|    int C;
  951|   514k|    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  952|   514k|    CLIP_STORE(0,C);
  ------------------
  |  |  903|   514k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.40k, False: 512k]
  |  |  |  Branch (903:28): [True: 2.63k, False: 509k]
  |  |  ------------------
  |  |  904|   514k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   514k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   514k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  953|   514k|    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  954|   514k|    CLIP_STORE(1,C);
  ------------------
  |  |  903|   514k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 2.11k, False: 511k]
  |  |  |  Branch (903:28): [True: 2.81k, False: 509k]
  |  |  ------------------
  |  |  904|   514k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   514k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   514k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  955|   514k|    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  956|   514k|    CLIP_STORE(2,C);
  ------------------
  |  |  903|   514k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.99k, False: 512k]
  |  |  |  Branch (903:28): [True: 2.66k, False: 509k]
  |  |  ------------------
  |  |  904|   514k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   514k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   514k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  957|   514k|    C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]);
  958|   514k|    CLIP_STORE(3,C);
  ------------------
  |  |  903|   514k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 2.72k, False: 511k]
  |  |  |  Branch (903:28): [True: 2.70k, False: 508k]
  |  |  ------------------
  |  |  904|   514k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   514k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   514k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  959|   514k|    C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]);
  960|   514k|    CLIP_STORE(4,C);
  ------------------
  |  |  903|   514k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 2.73k, False: 511k]
  |  |  |  Branch (903:28): [True: 2.61k, False: 508k]
  |  |  ------------------
  |  |  904|   514k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   514k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   514k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  961|   514k|    C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8];
  962|   514k|    CLIP_STORE(5,C);
  ------------------
  |  |  903|   514k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 2.16k, False: 511k]
  |  |  |  Branch (903:28): [True: 2.47k, False: 509k]
  |  |  ------------------
  |  |  904|   514k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   514k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   514k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  963|   514k|    C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7];
  964|   514k|    CLIP_STORE(6,C);
  ------------------
  |  |  903|   514k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 2.39k, False: 511k]
  |  |  |  Branch (903:28): [True: 2.63k, False: 509k]
  |  |  ------------------
  |  |  904|   514k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   514k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   514k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  965|   514k|    C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8];
  966|   514k|    CLIP_STORE(7,C);
  ------------------
  |  |  903|   514k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.54k, False: 512k]
  |  |  |  Branch (903:28): [True: 2.42k, False: 510k]
  |  |  ------------------
  |  |  904|   514k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   514k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   514k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  967|   514k|    Src += 1;
  968|   514k|    Dst += 1;
  969|   514k|  }
  970|  64.2k|#endif
  971|  64.2k|}
qpel.c:V_Pass_Avrg_Up_8_C:
  981|  74.0k|{
  982|       |#if (SIZE==16)
  983|       |  while(H-->0) {
  984|       |    int C;
  985|       |    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  986|       |    CLIP_STORE(0,C);
  987|       |    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  988|       |    CLIP_STORE( 1,C);
  989|       |    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  990|       |    CLIP_STORE( 2,C);
  991|       |    C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]);
  992|       |    CLIP_STORE( 3,C);
  993|       |    C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]);
  994|       |    CLIP_STORE( 4,C);
  995|       |    C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]);
  996|       |    CLIP_STORE( 5,C);
  997|       |    C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]);
  998|       |    CLIP_STORE( 6,C);
  999|       |    C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]);
 1000|       |    CLIP_STORE( 7,C);
 1001|       |    C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]);
 1002|       |    CLIP_STORE( 8,C);
 1003|       |    C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]);
 1004|       |    CLIP_STORE( 9,C);
 1005|       |    C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]);
 1006|       |    CLIP_STORE(10,C);
 1007|       |    C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]);
 1008|       |    CLIP_STORE(11,C);
 1009|       |    C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]);
 1010|       |    CLIP_STORE(12,C);
 1011|       |    C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16];
 1012|       |    CLIP_STORE(13,C);
 1013|       |    C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15];
 1014|       |    CLIP_STORE(14,C);
 1015|       |    C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16];
 1016|       |    CLIP_STORE(15,C);
 1017|       |    Src += 1;
 1018|       |    Dst += 1;
 1019|       |  }
 1020|       |#else
 1021|   666k|  while(H-->0) {
  ------------------
  |  Branch (1021:9): [True: 592k, False: 74.0k]
  ------------------
 1022|   592k|    int C;
 1023|   592k|    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
 1024|   592k|    CLIP_STORE(0,C);
  ------------------
  |  |  975|   592k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 1.57k, False: 590k]
  |  |  |  Branch (975:28): [True: 3.15k, False: 587k]
  |  |  ------------------
  |  |  976|   592k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   592k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   592k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1025|   592k|    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
 1026|   592k|    CLIP_STORE(1,C);
  ------------------
  |  |  975|   592k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.47k, False: 589k]
  |  |  |  Branch (975:28): [True: 3.79k, False: 585k]
  |  |  ------------------
  |  |  976|   592k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   592k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   592k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1027|   592k|    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
 1028|   592k|    CLIP_STORE(2,C);
  ------------------
  |  |  975|   592k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 1.92k, False: 590k]
  |  |  |  Branch (975:28): [True: 3.04k, False: 587k]
  |  |  ------------------
  |  |  976|   592k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   592k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   592k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1029|   592k|    C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]);
 1030|   592k|    CLIP_STORE(3,C);
  ------------------
  |  |  975|   592k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.88k, False: 589k]
  |  |  |  Branch (975:28): [True: 3.81k, False: 585k]
  |  |  ------------------
  |  |  976|   592k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   592k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   592k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1031|   592k|    C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]);
 1032|   592k|    CLIP_STORE(4,C);
  ------------------
  |  |  975|   592k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.32k, False: 589k]
  |  |  |  Branch (975:28): [True: 3.67k, False: 586k]
  |  |  ------------------
  |  |  976|   592k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   592k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   592k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1033|   592k|    C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8];
 1034|   592k|    CLIP_STORE(5,C);
  ------------------
  |  |  975|   592k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.69k, False: 589k]
  |  |  |  Branch (975:28): [True: 3.54k, False: 585k]
  |  |  ------------------
  |  |  976|   592k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   592k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   592k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1035|   592k|    C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7];
 1036|   592k|    CLIP_STORE(6,C);
  ------------------
  |  |  975|   592k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.42k, False: 589k]
  |  |  |  Branch (975:28): [True: 3.61k, False: 586k]
  |  |  ------------------
  |  |  976|   592k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   592k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   592k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1037|   592k|    C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8];
 1038|   592k|    CLIP_STORE(7,C);
  ------------------
  |  |  975|   592k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 1.83k, False: 590k]
  |  |  |  Branch (975:28): [True: 3.53k, False: 586k]
  |  |  ------------------
  |  |  976|   592k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   592k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   592k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1039|   592k|    Src += 1;
 1040|   592k|    Dst += 1;
 1041|   592k|  }
 1042|  74.0k|#endif
 1043|  74.0k|}
qpel.c:H_Pass_16_Add_C:
  617|  8.41k|{
  618|  8.41k|#if (SIZE==16)
  619|   143k|  while(H-->0) {
  ------------------
  |  Branch (619:9): [True: 134k, False: 8.41k]
  ------------------
  620|   134k|    int C;
  621|   134k|    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  622|   134k|    CLIP_STORE(Dst[ 0],C);
  ------------------
  |  |  612|   134k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 1.99k, False: 132k]
  |  |  |  Branch (612:28): [True: 4.23k, False: 128k]
  |  |  ------------------
  |  |  613|   134k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   134k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  623|   134k|    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  624|   134k|    CLIP_STORE(Dst[ 1],C);
  ------------------
  |  |  612|   134k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 3.08k, False: 131k]
  |  |  |  Branch (612:28): [True: 4.99k, False: 126k]
  |  |  ------------------
  |  |  613|   134k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   134k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  625|   134k|    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  626|   134k|    CLIP_STORE(Dst[ 2],C);
  ------------------
  |  |  612|   134k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 3.02k, False: 131k]
  |  |  |  Branch (612:28): [True: 3.79k, False: 127k]
  |  |  ------------------
  |  |  613|   134k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   134k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  627|   134k|    C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
  628|   134k|    CLIP_STORE(Dst[ 3],C);
  ------------------
  |  |  612|   134k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 3.48k, False: 131k]
  |  |  |  Branch (612:28): [True: 4.34k, False: 126k]
  |  |  ------------------
  |  |  613|   134k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   134k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  629|   134k|    C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
  630|   134k|    CLIP_STORE(Dst[ 4],C);
  ------------------
  |  |  612|   134k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.98k, False: 131k]
  |  |  |  Branch (612:28): [True: 4.04k, False: 127k]
  |  |  ------------------
  |  |  613|   134k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   134k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  631|   134k|    C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
  632|   134k|    CLIP_STORE(Dst[ 5],C);
  ------------------
  |  |  612|   134k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 3.08k, False: 131k]
  |  |  |  Branch (612:28): [True: 4.11k, False: 127k]
  |  |  ------------------
  |  |  613|   134k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   134k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  633|   134k|    C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
  634|   134k|    CLIP_STORE(Dst[ 6],C);
  ------------------
  |  |  612|   134k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.99k, False: 131k]
  |  |  |  Branch (612:28): [True: 3.89k, False: 127k]
  |  |  ------------------
  |  |  613|   134k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   134k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  635|   134k|    C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
  636|   134k|    CLIP_STORE(Dst[ 7],C);
  ------------------
  |  |  612|   134k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 3.98k, False: 130k]
  |  |  |  Branch (612:28): [True: 4.22k, False: 126k]
  |  |  ------------------
  |  |  613|   134k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   134k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  637|   134k|    C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
  638|   134k|    CLIP_STORE(Dst[ 8],C);
  ------------------
  |  |  612|   134k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 3.29k, False: 131k]
  |  |  |  Branch (612:28): [True: 3.46k, False: 127k]
  |  |  ------------------
  |  |  613|   134k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   134k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  639|   134k|    C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
  640|   134k|    CLIP_STORE(Dst[ 9],C);
  ------------------
  |  |  612|   134k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 4.79k, False: 129k]
  |  |  |  Branch (612:28): [True: 4.64k, False: 125k]
  |  |  ------------------
  |  |  613|   134k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   134k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  641|   134k|    C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
  642|   134k|    CLIP_STORE(Dst[10],C);
  ------------------
  |  |  612|   134k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 3.46k, False: 131k]
  |  |  |  Branch (612:28): [True: 3.90k, False: 127k]
  |  |  ------------------
  |  |  613|   134k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   134k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  643|   134k|    C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
  644|   134k|    CLIP_STORE(Dst[11],C);
  ------------------
  |  |  612|   134k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 4.24k, False: 130k]
  |  |  |  Branch (612:28): [True: 4.26k, False: 126k]
  |  |  ------------------
  |  |  613|   134k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   134k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  645|   134k|    C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
  646|   134k|    CLIP_STORE(Dst[12],C);
  ------------------
  |  |  612|   134k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 3.91k, False: 130k]
  |  |  |  Branch (612:28): [True: 4.25k, False: 126k]
  |  |  ------------------
  |  |  613|   134k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   134k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  647|   134k|    C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
  648|   134k|    CLIP_STORE(Dst[13],C);
  ------------------
  |  |  612|   134k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.53k, False: 132k]
  |  |  |  Branch (612:28): [True: 4.26k, False: 127k]
  |  |  ------------------
  |  |  613|   134k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   134k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  649|   134k|    C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
  650|   134k|    CLIP_STORE(Dst[14],C);
  ------------------
  |  |  612|   134k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 3.40k, False: 131k]
  |  |  |  Branch (612:28): [True: 4.77k, False: 126k]
  |  |  ------------------
  |  |  613|   134k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   134k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  651|   134k|    C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
  652|   134k|    CLIP_STORE(Dst[15],C);
  ------------------
  |  |  612|   134k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.13k, False: 132k]
  |  |  |  Branch (612:28): [True: 4.55k, False: 127k]
  |  |  ------------------
  |  |  613|   134k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   134k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  653|   134k|    Src += BpS;
  654|   134k|    Dst += BpS;
  655|   134k|  }
  656|       |#else
  657|       |  while(H-->0) {
  658|       |    int C;
  659|       |    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  660|       |    CLIP_STORE(Dst[0],C);
  661|       |    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  662|       |    CLIP_STORE(Dst[1],C);
  663|       |    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  664|       |    CLIP_STORE(Dst[2],C);
  665|       |    C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
  666|       |    CLIP_STORE(Dst[3],C);
  667|       |    C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
  668|       |    CLIP_STORE(Dst[4],C);
  669|       |    C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];
  670|       |    CLIP_STORE(Dst[5],C);
  671|       |    C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];
  672|       |    CLIP_STORE(Dst[6],C);
  673|       |    C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];
  674|       |    CLIP_STORE(Dst[7],C);
  675|       |    Src += BpS;
  676|       |    Dst += BpS;
  677|       |  }
  678|       |#endif
  679|  8.41k|}
qpel.c:H_Pass_Avrg_16_Add_C:
  689|  8.83k|{
  690|  8.83k|#if (SIZE==16)
  691|   150k|  while(H-->0) {
  ------------------
  |  Branch (691:9): [True: 141k, False: 8.83k]
  ------------------
  692|   141k|    int C;
  693|   141k|    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  694|   141k|    CLIP_STORE(0,C);
  ------------------
  |  |  683|   141k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 1.33k, False: 140k]
  |  |  |  Branch (683:28): [True: 5.28k, False: 134k]
  |  |  ------------------
  |  |  684|   141k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   141k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   141k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  695|   141k|    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  696|   141k|    CLIP_STORE( 1,C);
  ------------------
  |  |  683|   141k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 1.68k, False: 139k]
  |  |  |  Branch (683:28): [True: 4.87k, False: 134k]
  |  |  ------------------
  |  |  684|   141k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   141k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   141k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  697|   141k|    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  698|   141k|    CLIP_STORE( 2,C);
  ------------------
  |  |  683|   141k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.73k, False: 138k]
  |  |  |  Branch (683:28): [True: 4.53k, False: 134k]
  |  |  ------------------
  |  |  684|   141k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   141k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   141k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  699|   141k|    C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
  700|   141k|    CLIP_STORE( 3,C);
  ------------------
  |  |  683|   141k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.21k, False: 139k]
  |  |  |  Branch (683:28): [True: 4.94k, False: 134k]
  |  |  ------------------
  |  |  684|   141k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   141k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   141k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  701|   141k|    C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
  702|   141k|    CLIP_STORE( 4,C);
  ------------------
  |  |  683|   141k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.90k, False: 138k]
  |  |  |  Branch (683:28): [True: 4.64k, False: 133k]
  |  |  ------------------
  |  |  684|   141k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   141k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   141k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  703|   141k|    C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
  704|   141k|    CLIP_STORE( 5,C);
  ------------------
  |  |  683|   141k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.54k, False: 138k]
  |  |  |  Branch (683:28): [True: 3.51k, False: 135k]
  |  |  ------------------
  |  |  684|   141k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   141k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   141k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  705|   141k|    C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
  706|   141k|    CLIP_STORE( 6,C);
  ------------------
  |  |  683|   141k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.69k, False: 138k]
  |  |  |  Branch (683:28): [True: 4.96k, False: 133k]
  |  |  ------------------
  |  |  684|   141k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   141k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   141k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  707|   141k|    C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
  708|   141k|    CLIP_STORE( 7,C);
  ------------------
  |  |  683|   141k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 3.12k, False: 138k]
  |  |  |  Branch (683:28): [True: 4.07k, False: 134k]
  |  |  ------------------
  |  |  684|   141k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   141k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   141k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  709|   141k|    C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
  710|   141k|    CLIP_STORE( 8,C);
  ------------------
  |  |  683|   141k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 4.45k, False: 136k]
  |  |  |  Branch (683:28): [True: 4.16k, False: 132k]
  |  |  ------------------
  |  |  684|   141k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   141k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   141k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  711|   141k|    C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
  712|   141k|    CLIP_STORE( 9,C);
  ------------------
  |  |  683|   141k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.74k, False: 138k]
  |  |  |  Branch (683:28): [True: 3.79k, False: 134k]
  |  |  ------------------
  |  |  684|   141k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   141k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   141k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  713|   141k|    C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
  714|   141k|    CLIP_STORE(10,C);
  ------------------
  |  |  683|   141k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 4.68k, False: 136k]
  |  |  |  Branch (683:28): [True: 3.85k, False: 132k]
  |  |  ------------------
  |  |  684|   141k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   141k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   141k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  715|   141k|    C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
  716|   141k|    CLIP_STORE(11,C);
  ------------------
  |  |  683|   141k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.08k, False: 139k]
  |  |  |  Branch (683:28): [True: 3.94k, False: 135k]
  |  |  ------------------
  |  |  684|   141k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   141k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   141k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  717|   141k|    C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
  718|   141k|    CLIP_STORE(12,C);
  ------------------
  |  |  683|   141k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 4.65k, False: 136k]
  |  |  |  Branch (683:28): [True: 3.77k, False: 133k]
  |  |  ------------------
  |  |  684|   141k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   141k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   141k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  719|   141k|    C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
  720|   141k|    CLIP_STORE(13,C);
  ------------------
  |  |  683|   141k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.12k, False: 139k]
  |  |  |  Branch (683:28): [True: 3.27k, False: 136k]
  |  |  ------------------
  |  |  684|   141k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   141k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   141k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  721|   141k|    C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
  722|   141k|    CLIP_STORE(14,C);
  ------------------
  |  |  683|   141k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 4.34k, False: 137k]
  |  |  |  Branch (683:28): [True: 3.93k, False: 133k]
  |  |  ------------------
  |  |  684|   141k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   141k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   141k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  723|   141k|    C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
  724|   141k|    CLIP_STORE(15,C);
  ------------------
  |  |  683|   141k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 1.89k, False: 139k]
  |  |  |  Branch (683:28): [True: 3.14k, False: 136k]
  |  |  ------------------
  |  |  684|   141k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   141k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   141k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  725|   141k|    Src += BpS;
  726|   141k|    Dst += BpS;
  727|   141k|  }
  728|       |#else
  729|       |  while(H-->0) {
  730|       |    int C;
  731|       |    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  732|       |    CLIP_STORE(0,C);
  733|       |    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  734|       |    CLIP_STORE(1,C);
  735|       |    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  736|       |    CLIP_STORE(2,C);
  737|       |    C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
  738|       |    CLIP_STORE(3,C);
  739|       |    C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
  740|       |    CLIP_STORE(4,C);
  741|       |    C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];
  742|       |    CLIP_STORE(5,C);
  743|       |    C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];
  744|       |    CLIP_STORE(6,C);
  745|       |    C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];
  746|       |    CLIP_STORE(7,C);
  747|       |    Src += BpS;
  748|       |    Dst += BpS;
  749|       |  }
  750|       |#endif
  751|  8.83k|}
qpel.c:H_Pass_Avrg_Up_16_Add_C:
  761|  11.4k|{
  762|  11.4k|#if (SIZE==16)
  763|   194k|  while(H-->0) {
  ------------------
  |  Branch (763:9): [True: 183k, False: 11.4k]
  ------------------
  764|   183k|    int C;
  765|   183k|    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  766|   183k|    CLIP_STORE(0,C);
  ------------------
  |  |  755|   183k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 1.68k, False: 181k]
  |  |  |  Branch (755:28): [True: 3.20k, False: 178k]
  |  |  ------------------
  |  |  756|   183k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   183k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   183k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  767|   183k|    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  768|   183k|    CLIP_STORE( 1,C);
  ------------------
  |  |  755|   183k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 3.22k, False: 180k]
  |  |  |  Branch (755:28): [True: 6.60k, False: 173k]
  |  |  ------------------
  |  |  756|   183k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   183k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   183k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  769|   183k|    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  770|   183k|    CLIP_STORE( 2,C);
  ------------------
  |  |  755|   183k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.77k, False: 180k]
  |  |  |  Branch (755:28): [True: 3.48k, False: 177k]
  |  |  ------------------
  |  |  756|   183k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   183k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   183k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  771|   183k|    C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
  772|   183k|    CLIP_STORE( 3,C);
  ------------------
  |  |  755|   183k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 5.26k, False: 178k]
  |  |  |  Branch (755:28): [True: 4.82k, False: 173k]
  |  |  ------------------
  |  |  756|   183k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   183k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   183k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  773|   183k|    C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
  774|   183k|    CLIP_STORE( 4,C);
  ------------------
  |  |  755|   183k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.10k, False: 181k]
  |  |  |  Branch (755:28): [True: 5.24k, False: 175k]
  |  |  ------------------
  |  |  756|   183k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   183k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   183k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  775|   183k|    C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
  776|   183k|    CLIP_STORE( 5,C);
  ------------------
  |  |  755|   183k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 4.15k, False: 179k]
  |  |  |  Branch (755:28): [True: 5.33k, False: 173k]
  |  |  ------------------
  |  |  756|   183k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   183k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   183k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  777|   183k|    C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
  778|   183k|    CLIP_STORE( 6,C);
  ------------------
  |  |  755|   183k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.51k, False: 180k]
  |  |  |  Branch (755:28): [True: 3.74k, False: 177k]
  |  |  ------------------
  |  |  756|   183k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   183k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   183k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  779|   183k|    C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
  780|   183k|    CLIP_STORE( 7,C);
  ------------------
  |  |  755|   183k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.92k, False: 180k]
  |  |  |  Branch (755:28): [True: 5.77k, False: 174k]
  |  |  ------------------
  |  |  756|   183k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   183k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   183k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  781|   183k|    C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
  782|   183k|    CLIP_STORE( 8,C);
  ------------------
  |  |  755|   183k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 3.71k, False: 179k]
  |  |  |  Branch (755:28): [True: 3.66k, False: 175k]
  |  |  ------------------
  |  |  756|   183k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   183k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   183k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  783|   183k|    C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
  784|   183k|    CLIP_STORE( 9,C);
  ------------------
  |  |  755|   183k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 6.13k, False: 177k]
  |  |  |  Branch (755:28): [True: 4.33k, False: 172k]
  |  |  ------------------
  |  |  756|   183k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   183k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   183k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  785|   183k|    C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
  786|   183k|    CLIP_STORE(10,C);
  ------------------
  |  |  755|   183k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 3.60k, False: 179k]
  |  |  |  Branch (755:28): [True: 3.58k, False: 176k]
  |  |  ------------------
  |  |  756|   183k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   183k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   183k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  787|   183k|    C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
  788|   183k|    CLIP_STORE(11,C);
  ------------------
  |  |  755|   183k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 6.18k, False: 177k]
  |  |  |  Branch (755:28): [True: 3.99k, False: 173k]
  |  |  ------------------
  |  |  756|   183k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   183k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   183k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  789|   183k|    C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
  790|   183k|    CLIP_STORE(12,C);
  ------------------
  |  |  755|   183k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.84k, False: 180k]
  |  |  |  Branch (755:28): [True: 3.70k, False: 176k]
  |  |  ------------------
  |  |  756|   183k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   183k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   183k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  791|   183k|    C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
  792|   183k|    CLIP_STORE(13,C);
  ------------------
  |  |  755|   183k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.41k, False: 180k]
  |  |  |  Branch (755:28): [True: 4.55k, False: 176k]
  |  |  ------------------
  |  |  756|   183k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   183k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   183k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  793|   183k|    C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
  794|   183k|    CLIP_STORE(14,C);
  ------------------
  |  |  755|   183k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.02k, False: 181k]
  |  |  |  Branch (755:28): [True: 3.94k, False: 177k]
  |  |  ------------------
  |  |  756|   183k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   183k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   183k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  795|   183k|    C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
  796|   183k|    CLIP_STORE(15,C);
  ------------------
  |  |  755|   183k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.85k, False: 180k]
  |  |  |  Branch (755:28): [True: 4.17k, False: 176k]
  |  |  ------------------
  |  |  756|   183k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   183k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   183k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  797|   183k|    Src += BpS;
  798|   183k|    Dst += BpS;
  799|   183k|  }
  800|       |#else
  801|       |  while(H-->0) {
  802|       |    int C;
  803|       |    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  804|       |    CLIP_STORE(0,C);
  805|       |    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  806|       |    CLIP_STORE(1,C);
  807|       |    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  808|       |    CLIP_STORE(2,C);
  809|       |    C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
  810|       |    CLIP_STORE(3,C);
  811|       |    C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
  812|       |    CLIP_STORE(4,C);
  813|       |    C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];
  814|       |    CLIP_STORE(5,C);
  815|       |    C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];
  816|       |    CLIP_STORE(6,C);
  817|       |    C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];
  818|       |    CLIP_STORE(7,C);
  819|       |    Src += BpS;
  820|       |    Dst += BpS;
  821|       |  }
  822|       |#endif
  823|  11.4k|}
qpel.c:V_Pass_16_Add_C:
  837|  11.1k|{
  838|  11.1k|#if (SIZE==16)
  839|   189k|  while(H-->0) {
  ------------------
  |  Branch (839:9): [True: 178k, False: 11.1k]
  ------------------
  840|   178k|    int C;
  841|   178k|    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  842|   178k|    CLIP_STORE(Dst[BpS* 0],C);
  ------------------
  |  |  832|   178k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.74k, False: 175k]
  |  |  |  Branch (832:28): [True: 3.53k, False: 171k]
  |  |  ------------------
  |  |  833|   178k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   178k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  843|   178k|    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  844|   178k|    CLIP_STORE(Dst[BpS* 1],C);
  ------------------
  |  |  832|   178k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 6.01k, False: 172k]
  |  |  |  Branch (832:28): [True: 4.35k, False: 167k]
  |  |  ------------------
  |  |  833|   178k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   178k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  845|   178k|    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  846|   178k|    CLIP_STORE(Dst[BpS* 2],C);
  ------------------
  |  |  832|   178k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.56k, False: 175k]
  |  |  |  Branch (832:28): [True: 3.83k, False: 171k]
  |  |  ------------------
  |  |  833|   178k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   178k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  847|   178k|    C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]);
  848|   178k|    CLIP_STORE(Dst[BpS* 3],C);
  ------------------
  |  |  832|   178k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 5.67k, False: 172k]
  |  |  |  Branch (832:28): [True: 4.29k, False: 168k]
  |  |  ------------------
  |  |  833|   178k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   178k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  849|   178k|    C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]);
  850|   178k|    CLIP_STORE(Dst[BpS* 4],C);
  ------------------
  |  |  832|   178k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 4.48k, False: 173k]
  |  |  |  Branch (832:28): [True: 4.47k, False: 169k]
  |  |  ------------------
  |  |  833|   178k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   178k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  851|   178k|    C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]);
  852|   178k|    CLIP_STORE(Dst[BpS* 5],C);
  ------------------
  |  |  832|   178k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 3.45k, False: 174k]
  |  |  |  Branch (832:28): [True: 4.14k, False: 170k]
  |  |  ------------------
  |  |  833|   178k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   178k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  853|   178k|    C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]);
  854|   178k|    CLIP_STORE(Dst[BpS* 6],C);
  ------------------
  |  |  832|   178k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 7.80k, False: 170k]
  |  |  |  Branch (832:28): [True: 4.23k, False: 166k]
  |  |  ------------------
  |  |  833|   178k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   178k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  855|   178k|    C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]);
  856|   178k|    CLIP_STORE(Dst[BpS* 7],C);
  ------------------
  |  |  832|   178k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 3.11k, False: 175k]
  |  |  |  Branch (832:28): [True: 4.27k, False: 170k]
  |  |  ------------------
  |  |  833|   178k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   178k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  857|   178k|    C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]);
  858|   178k|    CLIP_STORE(Dst[BpS* 8],C);
  ------------------
  |  |  832|   178k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 6.44k, False: 171k]
  |  |  |  Branch (832:28): [True: 4.11k, False: 167k]
  |  |  ------------------
  |  |  833|   178k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   178k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  859|   178k|    C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]);
  860|   178k|    CLIP_STORE(Dst[BpS* 9],C);
  ------------------
  |  |  832|   178k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 3.97k, False: 174k]
  |  |  |  Branch (832:28): [True: 4.49k, False: 169k]
  |  |  ------------------
  |  |  833|   178k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   178k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  861|   178k|    C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]);
  862|   178k|    CLIP_STORE(Dst[BpS*10],C);
  ------------------
  |  |  832|   178k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 3.98k, False: 174k]
  |  |  |  Branch (832:28): [True: 5.72k, False: 168k]
  |  |  ------------------
  |  |  833|   178k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   178k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  863|   178k|    C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]);
  864|   178k|    CLIP_STORE(Dst[BpS*11],C);
  ------------------
  |  |  832|   178k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 3.44k, False: 174k]
  |  |  |  Branch (832:28): [True: 4.83k, False: 169k]
  |  |  ------------------
  |  |  833|   178k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   178k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  865|   178k|    C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]);
  866|   178k|    CLIP_STORE(Dst[BpS*12],C);
  ------------------
  |  |  832|   178k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 3.62k, False: 174k]
  |  |  |  Branch (832:28): [True: 6.09k, False: 168k]
  |  |  ------------------
  |  |  833|   178k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   178k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  867|   178k|    C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16];
  868|   178k|    CLIP_STORE(Dst[BpS*13],C);
  ------------------
  |  |  832|   178k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.47k, False: 175k]
  |  |  |  Branch (832:28): [True: 4.15k, False: 171k]
  |  |  ------------------
  |  |  833|   178k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   178k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  869|   178k|    C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15];
  870|   178k|    CLIP_STORE(Dst[BpS*14],C);
  ------------------
  |  |  832|   178k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 4.72k, False: 173k]
  |  |  |  Branch (832:28): [True: 5.03k, False: 168k]
  |  |  ------------------
  |  |  833|   178k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   178k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  871|   178k|    C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16];
  872|   178k|    CLIP_STORE(Dst[BpS*15],C);
  ------------------
  |  |  832|   178k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.04k, False: 176k]
  |  |  |  Branch (832:28): [True: 4.92k, False: 171k]
  |  |  ------------------
  |  |  833|   178k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   178k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  873|   178k|    Src += 1;
  874|   178k|    Dst += 1;
  875|   178k|  }
  876|       |#else
  877|       |  while(H-->0) {
  878|       |    int C;
  879|       |    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  880|       |    CLIP_STORE(Dst[BpS*0],C);
  881|       |    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  882|       |    CLIP_STORE(Dst[BpS*1],C);
  883|       |    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  884|       |    CLIP_STORE(Dst[BpS*2],C);
  885|       |    C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]);
  886|       |    CLIP_STORE(Dst[BpS*3],C);
  887|       |    C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]);
  888|       |    CLIP_STORE(Dst[BpS*4],C);
  889|       |    C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8];
  890|       |    CLIP_STORE(Dst[BpS*5],C);
  891|       |    C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7];
  892|       |    CLIP_STORE(Dst[BpS*6],C);
  893|       |    C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8];
  894|       |    CLIP_STORE(Dst[BpS*7],C);
  895|       |    Src += 1;
  896|       |    Dst += 1;
  897|       |  }
  898|       |#endif
  899|  11.1k|}
qpel.c:V_Pass_Avrg_16_Add_C:
  909|  13.1k|{
  910|  13.1k|#if (SIZE==16)
  911|   222k|  while(H-->0) {
  ------------------
  |  Branch (911:9): [True: 209k, False: 13.1k]
  ------------------
  912|   209k|    int C;
  913|   209k|    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  914|   209k|    CLIP_STORE(0,C);
  ------------------
  |  |  903|   209k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 2.44k, False: 207k]
  |  |  |  Branch (903:28): [True: 4.66k, False: 202k]
  |  |  ------------------
  |  |  904|   209k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   209k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   209k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  915|   209k|    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  916|   209k|    CLIP_STORE( 1,C);
  ------------------
  |  |  903|   209k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 3.51k, False: 206k]
  |  |  |  Branch (903:28): [True: 4.87k, False: 201k]
  |  |  ------------------
  |  |  904|   209k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   209k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   209k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  917|   209k|    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  918|   209k|    CLIP_STORE( 2,C);
  ------------------
  |  |  903|   209k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 2.95k, False: 206k]
  |  |  |  Branch (903:28): [True: 4.90k, False: 201k]
  |  |  ------------------
  |  |  904|   209k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   209k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   209k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  919|   209k|    C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]);
  920|   209k|    CLIP_STORE( 3,C);
  ------------------
  |  |  903|   209k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 4.75k, False: 204k]
  |  |  |  Branch (903:28): [True: 4.80k, False: 200k]
  |  |  ------------------
  |  |  904|   209k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   209k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   209k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  921|   209k|    C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]);
  922|   209k|    CLIP_STORE( 4,C);
  ------------------
  |  |  903|   209k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 4.17k, False: 205k]
  |  |  |  Branch (903:28): [True: 5.13k, False: 200k]
  |  |  ------------------
  |  |  904|   209k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   209k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   209k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  923|   209k|    C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]);
  924|   209k|    CLIP_STORE( 5,C);
  ------------------
  |  |  903|   209k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 4.19k, False: 205k]
  |  |  |  Branch (903:28): [True: 4.32k, False: 201k]
  |  |  ------------------
  |  |  904|   209k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   209k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   209k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  925|   209k|    C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]);
  926|   209k|    CLIP_STORE( 6,C);
  ------------------
  |  |  903|   209k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 4.03k, False: 205k]
  |  |  |  Branch (903:28): [True: 4.85k, False: 200k]
  |  |  ------------------
  |  |  904|   209k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   209k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   209k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  927|   209k|    C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]);
  928|   209k|    CLIP_STORE( 7,C);
  ------------------
  |  |  903|   209k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 2.75k, False: 206k]
  |  |  |  Branch (903:28): [True: 4.22k, False: 202k]
  |  |  ------------------
  |  |  904|   209k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   209k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   209k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  929|   209k|    C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]);
  930|   209k|    CLIP_STORE( 8,C);
  ------------------
  |  |  903|   209k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 3.73k, False: 205k]
  |  |  |  Branch (903:28): [True: 5.72k, False: 200k]
  |  |  ------------------
  |  |  904|   209k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   209k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   209k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  931|   209k|    C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]);
  932|   209k|    CLIP_STORE( 9,C);
  ------------------
  |  |  903|   209k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 2.59k, False: 207k]
  |  |  |  Branch (903:28): [True: 4.72k, False: 202k]
  |  |  ------------------
  |  |  904|   209k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   209k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   209k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  933|   209k|    C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]);
  934|   209k|    CLIP_STORE(10,C);
  ------------------
  |  |  903|   209k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 3.98k, False: 205k]
  |  |  |  Branch (903:28): [True: 6.30k, False: 199k]
  |  |  ------------------
  |  |  904|   209k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   209k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   209k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  935|   209k|    C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]);
  936|   209k|    CLIP_STORE(11,C);
  ------------------
  |  |  903|   209k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 3.72k, False: 205k]
  |  |  |  Branch (903:28): [True: 5.89k, False: 200k]
  |  |  ------------------
  |  |  904|   209k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   209k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   209k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  937|   209k|    C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]);
  938|   209k|    CLIP_STORE(12,C);
  ------------------
  |  |  903|   209k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 8.30k, False: 201k]
  |  |  |  Branch (903:28): [True: 5.87k, False: 195k]
  |  |  ------------------
  |  |  904|   209k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   209k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   209k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  939|   209k|    C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16];
  940|   209k|    CLIP_STORE(13,C);
  ------------------
  |  |  903|   209k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 3.50k, False: 206k]
  |  |  |  Branch (903:28): [True: 3.87k, False: 202k]
  |  |  ------------------
  |  |  904|   209k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   209k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   209k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  941|   209k|    C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15];
  942|   209k|    CLIP_STORE(14,C);
  ------------------
  |  |  903|   209k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 8.40k, False: 201k]
  |  |  |  Branch (903:28): [True: 5.79k, False: 195k]
  |  |  ------------------
  |  |  904|   209k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   209k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   209k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  943|   209k|    C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16];
  944|   209k|    CLIP_STORE(15,C);
  ------------------
  |  |  903|   209k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 2.52k, False: 207k]
  |  |  |  Branch (903:28): [True: 3.96k, False: 203k]
  |  |  ------------------
  |  |  904|   209k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   209k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   209k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  945|   209k|    Src += 1;
  946|   209k|    Dst += 1;
  947|   209k|  }
  948|       |#else
  949|       |  while(H-->0) {
  950|       |    int C;
  951|       |    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  952|       |    CLIP_STORE(0,C);
  953|       |    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  954|       |    CLIP_STORE(1,C);
  955|       |    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  956|       |    CLIP_STORE(2,C);
  957|       |    C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]);
  958|       |    CLIP_STORE(3,C);
  959|       |    C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]);
  960|       |    CLIP_STORE(4,C);
  961|       |    C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8];
  962|       |    CLIP_STORE(5,C);
  963|       |    C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7];
  964|       |    CLIP_STORE(6,C);
  965|       |    C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8];
  966|       |    CLIP_STORE(7,C);
  967|       |    Src += 1;
  968|       |    Dst += 1;
  969|       |  }
  970|       |#endif
  971|  13.1k|}
qpel.c:V_Pass_Avrg_Up_16_Add_C:
  981|  15.9k|{
  982|  15.9k|#if (SIZE==16)
  983|   271k|  while(H-->0) {
  ------------------
  |  Branch (983:9): [True: 255k, False: 15.9k]
  ------------------
  984|   255k|    int C;
  985|   255k|    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  986|   255k|    CLIP_STORE(0,C);
  ------------------
  |  |  975|   255k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.34k, False: 252k]
  |  |  |  Branch (975:28): [True: 5.30k, False: 247k]
  |  |  ------------------
  |  |  976|   255k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   255k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   255k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  987|   255k|    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  988|   255k|    CLIP_STORE( 1,C);
  ------------------
  |  |  975|   255k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 10.6k, False: 244k]
  |  |  |  Branch (975:28): [True: 6.23k, False: 238k]
  |  |  ------------------
  |  |  976|   255k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   255k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   255k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  989|   255k|    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  990|   255k|    CLIP_STORE( 2,C);
  ------------------
  |  |  975|   255k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 3.54k, False: 251k]
  |  |  |  Branch (975:28): [True: 4.51k, False: 247k]
  |  |  ------------------
  |  |  976|   255k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   255k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   255k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  991|   255k|    C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]);
  992|   255k|    CLIP_STORE( 3,C);
  ------------------
  |  |  975|   255k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 11.2k, False: 243k]
  |  |  |  Branch (975:28): [True: 5.80k, False: 238k]
  |  |  ------------------
  |  |  976|   255k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   255k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   255k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  993|   255k|    C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]);
  994|   255k|    CLIP_STORE( 4,C);
  ------------------
  |  |  975|   255k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 3.25k, False: 251k]
  |  |  |  Branch (975:28): [True: 6.41k, False: 245k]
  |  |  ------------------
  |  |  976|   255k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   255k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   255k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  995|   255k|    C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]);
  996|   255k|    CLIP_STORE( 5,C);
  ------------------
  |  |  975|   255k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 4.91k, False: 250k]
  |  |  |  Branch (975:28): [True: 6.58k, False: 243k]
  |  |  ------------------
  |  |  976|   255k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   255k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   255k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  997|   255k|    C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]);
  998|   255k|    CLIP_STORE( 6,C);
  ------------------
  |  |  975|   255k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 3.22k, False: 251k]
  |  |  |  Branch (975:28): [True: 5.44k, False: 246k]
  |  |  ------------------
  |  |  976|   255k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   255k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   255k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  999|   255k|    C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]);
 1000|   255k|    CLIP_STORE( 7,C);
  ------------------
  |  |  975|   255k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 5.69k, False: 249k]
  |  |  |  Branch (975:28): [True: 5.88k, False: 243k]
  |  |  ------------------
  |  |  976|   255k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   255k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   255k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1001|   255k|    C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]);
 1002|   255k|    CLIP_STORE( 8,C);
  ------------------
  |  |  975|   255k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.40k, False: 252k]
  |  |  |  Branch (975:28): [True: 5.08k, False: 247k]
  |  |  ------------------
  |  |  976|   255k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   255k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   255k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1003|   255k|    C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]);
 1004|   255k|    CLIP_STORE( 9,C);
  ------------------
  |  |  975|   255k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 4.58k, False: 250k]
  |  |  |  Branch (975:28): [True: 7.53k, False: 242k]
  |  |  ------------------
  |  |  976|   255k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   255k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   255k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1005|   255k|    C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]);
 1006|   255k|    CLIP_STORE(10,C);
  ------------------
  |  |  975|   255k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 3.49k, False: 251k]
  |  |  |  Branch (975:28): [True: 5.53k, False: 246k]
  |  |  ------------------
  |  |  976|   255k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   255k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   255k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1007|   255k|    C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]);
 1008|   255k|    CLIP_STORE(11,C);
  ------------------
  |  |  975|   255k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 5.88k, False: 249k]
  |  |  |  Branch (975:28): [True: 7.49k, False: 241k]
  |  |  ------------------
  |  |  976|   255k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   255k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   255k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1009|   255k|    C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]);
 1010|   255k|    CLIP_STORE(12,C);
  ------------------
  |  |  975|   255k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 3.70k, False: 251k]
  |  |  |  Branch (975:28): [True: 7.27k, False: 244k]
  |  |  ------------------
  |  |  976|   255k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   255k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   255k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1011|   255k|    C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16];
 1012|   255k|    CLIP_STORE(13,C);
  ------------------
  |  |  975|   255k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 4.30k, False: 250k]
  |  |  |  Branch (975:28): [True: 6.77k, False: 244k]
  |  |  ------------------
  |  |  976|   255k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   255k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   255k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1013|   255k|    C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15];
 1014|   255k|    CLIP_STORE(14,C);
  ------------------
  |  |  975|   255k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 3.31k, False: 251k]
  |  |  |  Branch (975:28): [True: 6.82k, False: 244k]
  |  |  ------------------
  |  |  976|   255k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   255k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   255k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1015|   255k|    C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16];
 1016|   255k|    CLIP_STORE(15,C);
  ------------------
  |  |  975|   255k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 1.78k, False: 253k]
  |  |  |  Branch (975:28): [True: 6.76k, False: 246k]
  |  |  ------------------
  |  |  976|   255k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   255k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   255k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1017|   255k|    Src += 1;
 1018|   255k|    Dst += 1;
 1019|   255k|  }
 1020|       |#else
 1021|       |  while(H-->0) {
 1022|       |    int C;
 1023|       |    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
 1024|       |    CLIP_STORE(0,C);
 1025|       |    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
 1026|       |    CLIP_STORE(1,C);
 1027|       |    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
 1028|       |    CLIP_STORE(2,C);
 1029|       |    C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]);
 1030|       |    CLIP_STORE(3,C);
 1031|       |    C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]);
 1032|       |    CLIP_STORE(4,C);
 1033|       |    C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8];
 1034|       |    CLIP_STORE(5,C);
 1035|       |    C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7];
 1036|       |    CLIP_STORE(6,C);
 1037|       |    C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8];
 1038|       |    CLIP_STORE(7,C);
 1039|       |    Src += 1;
 1040|       |    Dst += 1;
 1041|       |  }
 1042|       |#endif
 1043|  15.9k|}
qpel.c:H_Pass_8_Add_C:
  617|  24.8k|{
  618|       |#if (SIZE==16)
  619|       |  while(H-->0) {
  620|       |    int C;
  621|       |    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  622|       |    CLIP_STORE(Dst[ 0],C);
  623|       |    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  624|       |    CLIP_STORE(Dst[ 1],C);
  625|       |    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  626|       |    CLIP_STORE(Dst[ 2],C);
  627|       |    C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
  628|       |    CLIP_STORE(Dst[ 3],C);
  629|       |    C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
  630|       |    CLIP_STORE(Dst[ 4],C);
  631|       |    C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
  632|       |    CLIP_STORE(Dst[ 5],C);
  633|       |    C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
  634|       |    CLIP_STORE(Dst[ 6],C);
  635|       |    C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
  636|       |    CLIP_STORE(Dst[ 7],C);
  637|       |    C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
  638|       |    CLIP_STORE(Dst[ 8],C);
  639|       |    C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
  640|       |    CLIP_STORE(Dst[ 9],C);
  641|       |    C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
  642|       |    CLIP_STORE(Dst[10],C);
  643|       |    C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
  644|       |    CLIP_STORE(Dst[11],C);
  645|       |    C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
  646|       |    CLIP_STORE(Dst[12],C);
  647|       |    C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
  648|       |    CLIP_STORE(Dst[13],C);
  649|       |    C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
  650|       |    CLIP_STORE(Dst[14],C);
  651|       |    C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
  652|       |    CLIP_STORE(Dst[15],C);
  653|       |    Src += BpS;
  654|       |    Dst += BpS;
  655|       |  }
  656|       |#else
  657|   223k|  while(H-->0) {
  ------------------
  |  Branch (657:9): [True: 198k, False: 24.8k]
  ------------------
  658|   198k|    int C;
  659|   198k|    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  660|   198k|    CLIP_STORE(Dst[0],C);
  ------------------
  |  |  612|   198k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 1.65k, False: 196k]
  |  |  |  Branch (612:28): [True: 6.01k, False: 190k]
  |  |  ------------------
  |  |  613|   198k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   198k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  661|   198k|    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  662|   198k|    CLIP_STORE(Dst[1],C);
  ------------------
  |  |  612|   198k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 3.90k, False: 194k]
  |  |  |  Branch (612:28): [True: 7.89k, False: 186k]
  |  |  ------------------
  |  |  613|   198k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   198k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  663|   198k|    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  664|   198k|    CLIP_STORE(Dst[2],C);
  ------------------
  |  |  612|   198k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 1.88k, False: 196k]
  |  |  |  Branch (612:28): [True: 5.13k, False: 191k]
  |  |  ------------------
  |  |  613|   198k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   198k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  665|   198k|    C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
  666|   198k|    CLIP_STORE(Dst[3],C);
  ------------------
  |  |  612|   198k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 4.42k, False: 194k]
  |  |  |  Branch (612:28): [True: 6.65k, False: 187k]
  |  |  ------------------
  |  |  613|   198k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   198k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  667|   198k|    C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
  668|   198k|    CLIP_STORE(Dst[4],C);
  ------------------
  |  |  612|   198k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.64k, False: 195k]
  |  |  |  Branch (612:28): [True: 6.61k, False: 189k]
  |  |  ------------------
  |  |  613|   198k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   198k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  669|   198k|    C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];
  670|   198k|    CLIP_STORE(Dst[5],C);
  ------------------
  |  |  612|   198k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.51k, False: 195k]
  |  |  |  Branch (612:28): [True: 6.22k, False: 189k]
  |  |  ------------------
  |  |  613|   198k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   198k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  671|   198k|    C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];
  672|   198k|    CLIP_STORE(Dst[6],C);
  ------------------
  |  |  612|   198k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.71k, False: 195k]
  |  |  |  Branch (612:28): [True: 6.47k, False: 189k]
  |  |  ------------------
  |  |  613|   198k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   198k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  673|   198k|    C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];
  674|   198k|    CLIP_STORE(Dst[7],C);
  ------------------
  |  |  612|   198k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.00k, False: 196k]
  |  |  |  Branch (612:28): [True: 6.51k, False: 189k]
  |  |  ------------------
  |  |  613|   198k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   198k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  675|   198k|    Src += BpS;
  676|   198k|    Dst += BpS;
  677|   198k|  }
  678|  24.8k|#endif
  679|  24.8k|}
qpel.c:H_Pass_Avrg_8_Add_C:
  689|  45.3k|{
  690|       |#if (SIZE==16)
  691|       |  while(H-->0) {
  692|       |    int C;
  693|       |    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  694|       |    CLIP_STORE(0,C);
  695|       |    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  696|       |    CLIP_STORE( 1,C);
  697|       |    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  698|       |    CLIP_STORE( 2,C);
  699|       |    C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
  700|       |    CLIP_STORE( 3,C);
  701|       |    C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
  702|       |    CLIP_STORE( 4,C);
  703|       |    C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
  704|       |    CLIP_STORE( 5,C);
  705|       |    C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
  706|       |    CLIP_STORE( 6,C);
  707|       |    C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
  708|       |    CLIP_STORE( 7,C);
  709|       |    C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
  710|       |    CLIP_STORE( 8,C);
  711|       |    C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
  712|       |    CLIP_STORE( 9,C);
  713|       |    C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
  714|       |    CLIP_STORE(10,C);
  715|       |    C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
  716|       |    CLIP_STORE(11,C);
  717|       |    C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
  718|       |    CLIP_STORE(12,C);
  719|       |    C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
  720|       |    CLIP_STORE(13,C);
  721|       |    C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
  722|       |    CLIP_STORE(14,C);
  723|       |    C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
  724|       |    CLIP_STORE(15,C);
  725|       |    Src += BpS;
  726|       |    Dst += BpS;
  727|       |  }
  728|       |#else
  729|   408k|  while(H-->0) {
  ------------------
  |  Branch (729:9): [True: 363k, False: 45.3k]
  ------------------
  730|   363k|    int C;
  731|   363k|    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  732|   363k|    CLIP_STORE(0,C);
  ------------------
  |  |  683|   363k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 3.66k, False: 359k]
  |  |  |  Branch (683:28): [True: 12.0k, False: 347k]
  |  |  ------------------
  |  |  684|   363k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   363k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   363k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  733|   363k|    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  734|   363k|    CLIP_STORE(1,C);
  ------------------
  |  |  683|   363k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 4.19k, False: 358k]
  |  |  |  Branch (683:28): [True: 12.5k, False: 346k]
  |  |  ------------------
  |  |  684|   363k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   363k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   363k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  735|   363k|    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  736|   363k|    CLIP_STORE(2,C);
  ------------------
  |  |  683|   363k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 4.57k, False: 358k]
  |  |  |  Branch (683:28): [True: 11.3k, False: 347k]
  |  |  ------------------
  |  |  684|   363k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   363k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   363k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  737|   363k|    C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
  738|   363k|    CLIP_STORE(3,C);
  ------------------
  |  |  683|   363k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 4.47k, False: 358k]
  |  |  |  Branch (683:28): [True: 11.7k, False: 346k]
  |  |  ------------------
  |  |  684|   363k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   363k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   363k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  739|   363k|    C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
  740|   363k|    CLIP_STORE(4,C);
  ------------------
  |  |  683|   363k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 9.51k, False: 353k]
  |  |  |  Branch (683:28): [True: 12.4k, False: 341k]
  |  |  ------------------
  |  |  684|   363k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   363k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   363k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  741|   363k|    C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];
  742|   363k|    CLIP_STORE(5,C);
  ------------------
  |  |  683|   363k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 3.62k, False: 359k]
  |  |  |  Branch (683:28): [True: 8.43k, False: 351k]
  |  |  ------------------
  |  |  684|   363k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   363k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   363k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  743|   363k|    C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];
  744|   363k|    CLIP_STORE(6,C);
  ------------------
  |  |  683|   363k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 9.67k, False: 353k]
  |  |  |  Branch (683:28): [True: 13.1k, False: 340k]
  |  |  ------------------
  |  |  684|   363k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   363k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   363k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  745|   363k|    C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];
  746|   363k|    CLIP_STORE(7,C);
  ------------------
  |  |  683|   363k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.72k, False: 360k]
  |  |  |  Branch (683:28): [True: 8.18k, False: 352k]
  |  |  ------------------
  |  |  684|   363k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   363k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   363k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  747|   363k|    Src += BpS;
  748|   363k|    Dst += BpS;
  749|   363k|  }
  750|  45.3k|#endif
  751|  45.3k|}
qpel.c:H_Pass_Avrg_Up_8_Add_C:
  761|  43.8k|{
  762|       |#if (SIZE==16)
  763|       |  while(H-->0) {
  764|       |    int C;
  765|       |    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  766|       |    CLIP_STORE(0,C);
  767|       |    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  768|       |    CLIP_STORE( 1,C);
  769|       |    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  770|       |    CLIP_STORE( 2,C);
  771|       |    C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
  772|       |    CLIP_STORE( 3,C);
  773|       |    C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
  774|       |    CLIP_STORE( 4,C);
  775|       |    C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
  776|       |    CLIP_STORE( 5,C);
  777|       |    C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
  778|       |    CLIP_STORE( 6,C);
  779|       |    C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
  780|       |    CLIP_STORE( 7,C);
  781|       |    C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
  782|       |    CLIP_STORE( 8,C);
  783|       |    C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
  784|       |    CLIP_STORE( 9,C);
  785|       |    C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
  786|       |    CLIP_STORE(10,C);
  787|       |    C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
  788|       |    CLIP_STORE(11,C);
  789|       |    C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
  790|       |    CLIP_STORE(12,C);
  791|       |    C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
  792|       |    CLIP_STORE(13,C);
  793|       |    C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
  794|       |    CLIP_STORE(14,C);
  795|       |    C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
  796|       |    CLIP_STORE(15,C);
  797|       |    Src += BpS;
  798|       |    Dst += BpS;
  799|       |  }
  800|       |#else
  801|   394k|  while(H-->0) {
  ------------------
  |  Branch (801:9): [True: 350k, False: 43.8k]
  ------------------
  802|   350k|    int C;
  803|   350k|    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  804|   350k|    CLIP_STORE(0,C);
  ------------------
  |  |  755|   350k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.63k, False: 348k]
  |  |  |  Branch (755:28): [True: 6.99k, False: 341k]
  |  |  ------------------
  |  |  756|   350k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   350k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   350k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  805|   350k|    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  806|   350k|    CLIP_STORE(1,C);
  ------------------
  |  |  755|   350k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 7.06k, False: 343k]
  |  |  |  Branch (755:28): [True: 10.7k, False: 332k]
  |  |  ------------------
  |  |  756|   350k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   350k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   350k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  807|   350k|    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  808|   350k|    CLIP_STORE(2,C);
  ------------------
  |  |  755|   350k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.85k, False: 347k]
  |  |  |  Branch (755:28): [True: 7.09k, False: 340k]
  |  |  ------------------
  |  |  756|   350k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   350k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   350k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  809|   350k|    C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
  810|   350k|    CLIP_STORE(3,C);
  ------------------
  |  |  755|   350k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 7.44k, False: 343k]
  |  |  |  Branch (755:28): [True: 9.89k, False: 333k]
  |  |  ------------------
  |  |  756|   350k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   350k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   350k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  811|   350k|    C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
  812|   350k|    CLIP_STORE(4,C);
  ------------------
  |  |  755|   350k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 3.39k, False: 347k]
  |  |  |  Branch (755:28): [True: 9.60k, False: 337k]
  |  |  ------------------
  |  |  756|   350k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   350k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   350k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  813|   350k|    C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];
  814|   350k|    CLIP_STORE(5,C);
  ------------------
  |  |  755|   350k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 3.43k, False: 347k]
  |  |  |  Branch (755:28): [True: 9.70k, False: 337k]
  |  |  ------------------
  |  |  756|   350k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   350k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   350k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  815|   350k|    C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];
  816|   350k|    CLIP_STORE(6,C);
  ------------------
  |  |  755|   350k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 3.92k, False: 346k]
  |  |  |  Branch (755:28): [True: 9.58k, False: 337k]
  |  |  ------------------
  |  |  756|   350k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   350k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   350k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  817|   350k|    C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];
  818|   350k|    CLIP_STORE(7,C);
  ------------------
  |  |  755|   350k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 3.18k, False: 347k]
  |  |  |  Branch (755:28): [True: 9.45k, False: 338k]
  |  |  ------------------
  |  |  756|   350k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   350k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   350k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  819|   350k|    Src += BpS;
  820|   350k|    Dst += BpS;
  821|   350k|  }
  822|  43.8k|#endif
  823|  43.8k|}
qpel.c:V_Pass_8_Add_C:
  837|  45.3k|{
  838|       |#if (SIZE==16)
  839|       |  while(H-->0) {
  840|       |    int C;
  841|       |    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  842|       |    CLIP_STORE(Dst[BpS* 0],C);
  843|       |    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  844|       |    CLIP_STORE(Dst[BpS* 1],C);
  845|       |    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  846|       |    CLIP_STORE(Dst[BpS* 2],C);
  847|       |    C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]);
  848|       |    CLIP_STORE(Dst[BpS* 3],C);
  849|       |    C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]);
  850|       |    CLIP_STORE(Dst[BpS* 4],C);
  851|       |    C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]);
  852|       |    CLIP_STORE(Dst[BpS* 5],C);
  853|       |    C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]);
  854|       |    CLIP_STORE(Dst[BpS* 6],C);
  855|       |    C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]);
  856|       |    CLIP_STORE(Dst[BpS* 7],C);
  857|       |    C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]);
  858|       |    CLIP_STORE(Dst[BpS* 8],C);
  859|       |    C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]);
  860|       |    CLIP_STORE(Dst[BpS* 9],C);
  861|       |    C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]);
  862|       |    CLIP_STORE(Dst[BpS*10],C);
  863|       |    C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]);
  864|       |    CLIP_STORE(Dst[BpS*11],C);
  865|       |    C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]);
  866|       |    CLIP_STORE(Dst[BpS*12],C);
  867|       |    C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16];
  868|       |    CLIP_STORE(Dst[BpS*13],C);
  869|       |    C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15];
  870|       |    CLIP_STORE(Dst[BpS*14],C);
  871|       |    C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16];
  872|       |    CLIP_STORE(Dst[BpS*15],C);
  873|       |    Src += 1;
  874|       |    Dst += 1;
  875|       |  }
  876|       |#else
  877|   408k|  while(H-->0) {
  ------------------
  |  Branch (877:9): [True: 362k, False: 45.3k]
  ------------------
  878|   362k|    int C;
  879|   362k|    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  880|   362k|    CLIP_STORE(Dst[BpS*0],C);
  ------------------
  |  |  832|   362k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 3.33k, False: 359k]
  |  |  |  Branch (832:28): [True: 9.13k, False: 350k]
  |  |  ------------------
  |  |  833|   362k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   362k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  881|   362k|    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  882|   362k|    CLIP_STORE(Dst[BpS*1],C);
  ------------------
  |  |  832|   362k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 9.02k, False: 353k]
  |  |  |  Branch (832:28): [True: 11.9k, False: 341k]
  |  |  ------------------
  |  |  833|   362k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   362k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  883|   362k|    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  884|   362k|    CLIP_STORE(Dst[BpS*2],C);
  ------------------
  |  |  832|   362k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 4.06k, False: 358k]
  |  |  |  Branch (832:28): [True: 8.93k, False: 349k]
  |  |  ------------------
  |  |  833|   362k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   362k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  885|   362k|    C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]);
  886|   362k|    CLIP_STORE(Dst[BpS*3],C);
  ------------------
  |  |  832|   362k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 9.67k, False: 353k]
  |  |  |  Branch (832:28): [True: 11.3k, False: 341k]
  |  |  ------------------
  |  |  833|   362k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   362k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  887|   362k|    C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]);
  888|   362k|    CLIP_STORE(Dst[BpS*4],C);
  ------------------
  |  |  832|   362k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 7.02k, False: 355k]
  |  |  |  Branch (832:28): [True: 11.1k, False: 344k]
  |  |  ------------------
  |  |  833|   362k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   362k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  889|   362k|    C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8];
  890|   362k|    CLIP_STORE(Dst[BpS*5],C);
  ------------------
  |  |  832|   362k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 4.00k, False: 358k]
  |  |  |  Branch (832:28): [True: 10.1k, False: 348k]
  |  |  ------------------
  |  |  833|   362k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   362k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  891|   362k|    C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7];
  892|   362k|    CLIP_STORE(Dst[BpS*6],C);
  ------------------
  |  |  832|   362k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 6.15k, False: 356k]
  |  |  |  Branch (832:28): [True: 11.5k, False: 345k]
  |  |  ------------------
  |  |  833|   362k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   362k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  893|   362k|    C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8];
  894|   362k|    CLIP_STORE(Dst[BpS*7],C);
  ------------------
  |  |  832|   362k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 3.18k, False: 359k]
  |  |  |  Branch (832:28): [True: 10.2k, False: 349k]
  |  |  ------------------
  |  |  833|   362k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   362k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  895|   362k|    Src += 1;
  896|   362k|    Dst += 1;
  897|   362k|  }
  898|  45.3k|#endif
  899|  45.3k|}
qpel.c:V_Pass_Avrg_8_Add_C:
  909|  56.9k|{
  910|       |#if (SIZE==16)
  911|       |  while(H-->0) {
  912|       |    int C;
  913|       |    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  914|       |    CLIP_STORE(0,C);
  915|       |    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  916|       |    CLIP_STORE( 1,C);
  917|       |    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  918|       |    CLIP_STORE( 2,C);
  919|       |    C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]);
  920|       |    CLIP_STORE( 3,C);
  921|       |    C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]);
  922|       |    CLIP_STORE( 4,C);
  923|       |    C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]);
  924|       |    CLIP_STORE( 5,C);
  925|       |    C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]);
  926|       |    CLIP_STORE( 6,C);
  927|       |    C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]);
  928|       |    CLIP_STORE( 7,C);
  929|       |    C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]);
  930|       |    CLIP_STORE( 8,C);
  931|       |    C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]);
  932|       |    CLIP_STORE( 9,C);
  933|       |    C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]);
  934|       |    CLIP_STORE(10,C);
  935|       |    C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]);
  936|       |    CLIP_STORE(11,C);
  937|       |    C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]);
  938|       |    CLIP_STORE(12,C);
  939|       |    C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16];
  940|       |    CLIP_STORE(13,C);
  941|       |    C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15];
  942|       |    CLIP_STORE(14,C);
  943|       |    C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16];
  944|       |    CLIP_STORE(15,C);
  945|       |    Src += 1;
  946|       |    Dst += 1;
  947|       |  }
  948|       |#else
  949|   512k|  while(H-->0) {
  ------------------
  |  Branch (949:9): [True: 455k, False: 56.9k]
  ------------------
  950|   455k|    int C;
  951|   455k|    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  952|   455k|    CLIP_STORE(0,C);
  ------------------
  |  |  903|   455k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 4.93k, False: 450k]
  |  |  |  Branch (903:28): [True: 13.0k, False: 437k]
  |  |  ------------------
  |  |  904|   455k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   455k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   455k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  953|   455k|    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  954|   455k|    CLIP_STORE(1,C);
  ------------------
  |  |  903|   455k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 7.04k, False: 448k]
  |  |  |  Branch (903:28): [True: 13.5k, False: 434k]
  |  |  ------------------
  |  |  904|   455k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   455k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   455k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  955|   455k|    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  956|   455k|    CLIP_STORE(2,C);
  ------------------
  |  |  903|   455k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 4.86k, False: 450k]
  |  |  |  Branch (903:28): [True: 13.4k, False: 437k]
  |  |  ------------------
  |  |  904|   455k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   455k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   455k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  957|   455k|    C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]);
  958|   455k|    CLIP_STORE(3,C);
  ------------------
  |  |  903|   455k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 6.82k, False: 448k]
  |  |  |  Branch (903:28): [True: 14.1k, False: 434k]
  |  |  ------------------
  |  |  904|   455k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   455k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   455k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  959|   455k|    C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]);
  960|   455k|    CLIP_STORE(4,C);
  ------------------
  |  |  903|   455k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 15.4k, False: 440k]
  |  |  |  Branch (903:28): [True: 14.4k, False: 425k]
  |  |  ------------------
  |  |  904|   455k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   455k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   455k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  961|   455k|    C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8];
  962|   455k|    CLIP_STORE(5,C);
  ------------------
  |  |  903|   455k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 4.51k, False: 451k]
  |  |  |  Branch (903:28): [True: 9.46k, False: 441k]
  |  |  ------------------
  |  |  904|   455k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   455k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   455k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  963|   455k|    C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7];
  964|   455k|    CLIP_STORE(6,C);
  ------------------
  |  |  903|   455k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 15.5k, False: 440k]
  |  |  |  Branch (903:28): [True: 15.5k, False: 424k]
  |  |  ------------------
  |  |  904|   455k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   455k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   455k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  965|   455k|    C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8];
  966|   455k|    CLIP_STORE(7,C);
  ------------------
  |  |  903|   455k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 3.43k, False: 452k]
  |  |  |  Branch (903:28): [True: 9.45k, False: 442k]
  |  |  ------------------
  |  |  904|   455k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   455k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   455k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  967|   455k|    Src += 1;
  968|   455k|    Dst += 1;
  969|   455k|  }
  970|  56.9k|#endif
  971|  56.9k|}
qpel.c:V_Pass_Avrg_Up_8_Add_C:
  981|  60.1k|{
  982|       |#if (SIZE==16)
  983|       |  while(H-->0) {
  984|       |    int C;
  985|       |    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  986|       |    CLIP_STORE(0,C);
  987|       |    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  988|       |    CLIP_STORE( 1,C);
  989|       |    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  990|       |    CLIP_STORE( 2,C);
  991|       |    C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]);
  992|       |    CLIP_STORE( 3,C);
  993|       |    C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]);
  994|       |    CLIP_STORE( 4,C);
  995|       |    C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]);
  996|       |    CLIP_STORE( 5,C);
  997|       |    C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]);
  998|       |    CLIP_STORE( 6,C);
  999|       |    C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]);
 1000|       |    CLIP_STORE( 7,C);
 1001|       |    C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]);
 1002|       |    CLIP_STORE( 8,C);
 1003|       |    C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]);
 1004|       |    CLIP_STORE( 9,C);
 1005|       |    C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]);
 1006|       |    CLIP_STORE(10,C);
 1007|       |    C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]);
 1008|       |    CLIP_STORE(11,C);
 1009|       |    C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]);
 1010|       |    CLIP_STORE(12,C);
 1011|       |    C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16];
 1012|       |    CLIP_STORE(13,C);
 1013|       |    C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15];
 1014|       |    CLIP_STORE(14,C);
 1015|       |    C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16];
 1016|       |    CLIP_STORE(15,C);
 1017|       |    Src += 1;
 1018|       |    Dst += 1;
 1019|       |  }
 1020|       |#else
 1021|   541k|  while(H-->0) {
  ------------------
  |  Branch (1021:9): [True: 481k, False: 60.1k]
  ------------------
 1022|   481k|    int C;
 1023|   481k|    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
 1024|   481k|    CLIP_STORE(0,C);
  ------------------
  |  |  975|   481k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 4.48k, False: 476k]
  |  |  |  Branch (975:28): [True: 9.21k, False: 467k]
  |  |  ------------------
  |  |  976|   481k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   481k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   481k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1025|   481k|    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
 1026|   481k|    CLIP_STORE(1,C);
  ------------------
  |  |  975|   481k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 13.7k, False: 467k]
  |  |  |  Branch (975:28): [True: 13.3k, False: 454k]
  |  |  ------------------
  |  |  976|   481k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   481k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   481k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1027|   481k|    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
 1028|   481k|    CLIP_STORE(2,C);
  ------------------
  |  |  975|   481k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 5.17k, False: 476k]
  |  |  |  Branch (975:28): [True: 10.0k, False: 466k]
  |  |  ------------------
  |  |  976|   481k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   481k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   481k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1029|   481k|    C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]);
 1030|   481k|    CLIP_STORE(3,C);
  ------------------
  |  |  975|   481k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 13.8k, False: 467k]
  |  |  |  Branch (975:28): [True: 13.6k, False: 453k]
  |  |  ------------------
  |  |  976|   481k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   481k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   481k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1031|   481k|    C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]);
 1032|   481k|    CLIP_STORE(4,C);
  ------------------
  |  |  975|   481k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 6.55k, False: 474k]
  |  |  |  Branch (975:28): [True: 12.5k, False: 462k]
  |  |  ------------------
  |  |  976|   481k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   481k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   481k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1033|   481k|    C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8];
 1034|   481k|    CLIP_STORE(5,C);
  ------------------
  |  |  975|   481k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 6.34k, False: 475k]
  |  |  |  Branch (975:28): [True: 12.6k, False: 462k]
  |  |  ------------------
  |  |  976|   481k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   481k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   481k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1035|   481k|    C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7];
 1036|   481k|    CLIP_STORE(6,C);
  ------------------
  |  |  975|   481k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 6.26k, False: 475k]
  |  |  |  Branch (975:28): [True: 13.1k, False: 462k]
  |  |  ------------------
  |  |  976|   481k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   481k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   481k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1037|   481k|    C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8];
 1038|   481k|    CLIP_STORE(7,C);
  ------------------
  |  |  975|   481k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 4.77k, False: 476k]
  |  |  |  Branch (975:28): [True: 12.5k, False: 464k]
  |  |  ------------------
  |  |  976|   481k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   481k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   481k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1039|   481k|    Src += 1;
 1040|   481k|    Dst += 1;
 1041|   481k|  }
 1042|  60.1k|#endif
 1043|  60.1k|}
qpel.c:Init_FIR_Table:
  400|     34|{
  401|     34|	int i;
  402|  8.73k|	for(i=0; i<256; ++i) {
  ------------------
  |  Branch (402:11): [True: 8.70k, False: 34]
  ------------------
  403|  8.70k|		Tab[i][0] = i*A;
  404|  8.70k|		Tab[i][1] = i*B;
  405|  8.70k|		Tab[i][2] = i*C;
  406|  8.70k|		Tab[i][3] = i*D;
  407|  8.70k|	}
  408|     34|}

decoder.c:interpolate16x16_quarterpel:
  119|   211k|{
  120|   211k|	const uint8_t *src;
  121|   211k|	uint8_t *dst;
  122|   211k|	uint8_t *tmp;
  123|   211k|	int32_t quads;
  124|   211k|	const XVID_QP_FUNCS *Ops;
  125|       |
  126|   211k|	int32_t x_int, y_int;
  127|       |
  128|   211k|	const int32_t xRef = (int)x*4 + dx;
  129|   211k|	const int32_t yRef = (int)y*4 + dy;
  130|       |
  131|   211k|	Ops = xvid_QP_Funcs;
  132|   211k|	quads = (dx&3) | ((dy&3)<<2);
  133|       |
  134|   211k|	x_int = xRef >> 2;
  135|   211k|	y_int = yRef >> 2;
  136|       |
  137|   211k|	dst = cur + y * stride + x;
  138|   211k|	src = refn + y_int * (int)stride + x_int;
  139|       |
  140|   211k|	tmp = refh; /* we need at least a 16 x stride scratch block */
  141|       |
  142|   211k|	switch(quads) {
  ------------------
  |  Branch (142:9): [True: 0, False: 211k]
  ------------------
  143|  79.6k|	case 0:
  ------------------
  |  Branch (143:2): [True: 79.6k, False: 131k]
  ------------------
  144|  79.6k|		transfer8x8_copy(dst, src, stride);
  145|  79.6k|		transfer8x8_copy(dst+8, src+8, stride);
  146|  79.6k|		transfer8x8_copy(dst+8*stride, src+8*stride, stride);
  147|  79.6k|		transfer8x8_copy(dst+8*stride+8, src+8*stride+8, stride);
  148|  79.6k|		break;
  149|  14.1k|	case 1:
  ------------------
  |  Branch (149:2): [True: 14.1k, False: 196k]
  ------------------
  150|  14.1k|		Ops->H_Pass_Avrg(dst, src, 16, stride, rounding);
  151|  14.1k|		break;
  152|  12.7k|	case 2:
  ------------------
  |  Branch (152:2): [True: 12.7k, False: 198k]
  ------------------
  153|  12.7k|		Ops->H_Pass(dst, src, 16, stride, rounding);
  154|  12.7k|		break;
  155|  18.8k|	case 3:
  ------------------
  |  Branch (155:2): [True: 18.8k, False: 192k]
  ------------------
  156|  18.8k|		Ops->H_Pass_Avrg_Up(dst, src, 16, stride, rounding);
  157|  18.8k|		break;
  158|  14.1k|	case 4:
  ------------------
  |  Branch (158:2): [True: 14.1k, False: 196k]
  ------------------
  159|  14.1k|		Ops->V_Pass_Avrg(dst, src, 16, stride, rounding);
  160|  14.1k|		break;
  161|  5.39k|	case 5:
  ------------------
  |  Branch (161:2): [True: 5.39k, False: 205k]
  ------------------
  162|  5.39k|		Ops->H_Pass_Avrg(tmp, src, 17, stride, rounding);
  163|  5.39k|		Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding);
  164|  5.39k|		break;
  165|  3.07k|	case 6:
  ------------------
  |  Branch (165:2): [True: 3.07k, False: 208k]
  ------------------
  166|  3.07k|		Ops->H_Pass(tmp, src,	  17, stride, rounding);
  167|  3.07k|		Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding);
  168|  3.07k|		break;
  169|  4.28k|	case 7:
  ------------------
  |  Branch (169:2): [True: 4.28k, False: 206k]
  ------------------
  170|  4.28k|		Ops->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding);
  171|  4.28k|		Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding);
  172|  4.28k|		break;
  173|  14.3k|	case 8:
  ------------------
  |  Branch (173:2): [True: 14.3k, False: 196k]
  ------------------
  174|  14.3k|		Ops->V_Pass(dst, src, 16, stride, rounding);
  175|  14.3k|		break;
  176|  3.48k|	case 9:
  ------------------
  |  Branch (176:2): [True: 3.48k, False: 207k]
  ------------------
  177|  3.48k|		Ops->H_Pass_Avrg(tmp, src, 17, stride, rounding);
  178|  3.48k|		Ops->V_Pass(dst, tmp, 16, stride, rounding);
  179|  3.48k|		break;
  180|  3.70k|	case 10:
  ------------------
  |  Branch (180:2): [True: 3.70k, False: 207k]
  ------------------
  181|  3.70k|		Ops->H_Pass(tmp, src, 17, stride, rounding);
  182|  3.70k|		Ops->V_Pass(dst, tmp, 16, stride, rounding);
  183|  3.70k|		break;
  184|  3.61k|	case 11:
  ------------------
  |  Branch (184:2): [True: 3.61k, False: 207k]
  ------------------
  185|  3.61k|		Ops->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding);
  186|  3.61k|		Ops->V_Pass(dst, tmp, 16, stride, rounding);
  187|  3.61k|		break;
  188|  18.8k|	case 12:
  ------------------
  |  Branch (188:2): [True: 18.8k, False: 192k]
  ------------------
  189|  18.8k|		Ops->V_Pass_Avrg_Up(dst, src, 16, stride, rounding);
  190|  18.8k|		break;
  191|  4.76k|	case 13:
  ------------------
  |  Branch (191:2): [True: 4.76k, False: 206k]
  ------------------
  192|  4.76k|		Ops->H_Pass_Avrg(tmp, src, 17, stride, rounding);
  193|  4.76k|		Ops->V_Pass_Avrg_Up(dst, tmp, 16, stride, rounding);
  194|  4.76k|		break;
  195|  3.68k|	case 14:
  ------------------
  |  Branch (195:2): [True: 3.68k, False: 207k]
  ------------------
  196|  3.68k|		Ops->H_Pass(tmp, src, 17, stride, rounding);
  197|  3.68k|		Ops->V_Pass_Avrg_Up( dst, tmp, 16, stride, rounding);
  198|  3.68k|		break;
  199|  6.40k|	case 15:
  ------------------
  |  Branch (199:2): [True: 6.40k, False: 204k]
  ------------------
  200|  6.40k|		Ops->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding);
  201|  6.40k|		Ops->V_Pass_Avrg_Up(dst, tmp, 16, stride, rounding);
  202|  6.40k|		break;
  203|   211k|	}
  204|   211k|}
decoder.c:interpolate8x8_quarterpel:
  412|  1.90M|{
  413|  1.90M|	const uint8_t *src;
  414|  1.90M|	uint8_t *dst;
  415|  1.90M|	uint8_t *tmp;
  416|  1.90M|	int32_t quads;
  417|  1.90M|	const XVID_QP_FUNCS *Ops;
  418|       |
  419|  1.90M|	int32_t x_int, y_int;
  420|       |
  421|  1.90M|	const int32_t xRef = (int)x*4 + dx;
  422|  1.90M|	const int32_t yRef = (int)y*4 + dy;
  423|       |
  424|  1.90M|	Ops = xvid_QP_Funcs;
  425|  1.90M|	quads = (dx&3) | ((dy&3)<<2);
  426|       |
  427|  1.90M|	x_int = xRef >> 2;
  428|  1.90M|	y_int = yRef >> 2;
  429|       |
  430|  1.90M|	dst = cur + y * stride + x;
  431|  1.90M|	src = refn + y_int * (int)stride + x_int;
  432|       |
  433|  1.90M|	tmp = refh; /* we need at least a 16 x stride scratch block */
  434|       |
  435|  1.90M|	switch(quads) {
  ------------------
  |  Branch (435:9): [True: 0, False: 1.90M]
  ------------------
  436|  1.58M|	case 0:
  ------------------
  |  Branch (436:2): [True: 1.58M, False: 316k]
  ------------------
  437|  1.58M|		transfer8x8_copy( dst, src, stride);
  438|  1.58M|		break;
  439|  48.2k|	case 1:
  ------------------
  |  Branch (439:2): [True: 48.2k, False: 1.85M]
  ------------------
  440|  48.2k|		Ops->H_Pass_Avrg_8(dst, src, 8, stride, rounding);
  441|  48.2k|		break;
  442|  27.0k|	case 2:
  ------------------
  |  Branch (442:2): [True: 27.0k, False: 1.87M]
  ------------------
  443|  27.0k|		Ops->H_Pass_8(dst, src, 8, stride, rounding);
  444|  27.0k|		break;
  445|  46.9k|	case 3:
  ------------------
  |  Branch (445:2): [True: 46.9k, False: 1.85M]
  ------------------
  446|  46.9k|		Ops->H_Pass_Avrg_Up_8(dst, src, 8, stride, rounding);
  447|  46.9k|		break;
  448|  28.4k|	case 4:
  ------------------
  |  Branch (448:2): [True: 28.4k, False: 1.87M]
  ------------------
  449|  28.4k|		Ops->V_Pass_Avrg_8(dst, src, 8, stride, rounding);
  450|  28.4k|		break;
  451|  10.3k|	case 5:
  ------------------
  |  Branch (451:2): [True: 10.3k, False: 1.89M]
  ------------------
  452|  10.3k|		Ops->H_Pass_Avrg_8(tmp, src, 9, stride, rounding);
  453|  10.3k|		Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding);
  454|  10.3k|		break;
  455|  4.90k|	case 6:
  ------------------
  |  Branch (455:2): [True: 4.90k, False: 1.90M]
  ------------------
  456|  4.90k|		Ops->H_Pass_8(tmp, src, 9, stride, rounding);
  457|  4.90k|		Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding);
  458|  4.90k|		break;
  459|  20.6k|	case 7:
  ------------------
  |  Branch (459:2): [True: 20.6k, False: 1.88M]
  ------------------
  460|  20.6k|		Ops->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding);
  461|  20.6k|		Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding);
  462|  20.6k|		break;
  463|  33.2k|	case 8:
  ------------------
  |  Branch (463:2): [True: 33.2k, False: 1.87M]
  ------------------
  464|  33.2k|		Ops->V_Pass_8(dst, src, 8, stride, rounding);
  465|  33.2k|		break;
  466|  7.98k|	case 9:
  ------------------
  |  Branch (466:2): [True: 7.98k, False: 1.89M]
  ------------------
  467|  7.98k|		Ops->H_Pass_Avrg_8(tmp, src, 9, stride, rounding);
  468|  7.98k|		Ops->V_Pass_8(dst, tmp, 8, stride, rounding);
  469|  7.98k|		break;
  470|  7.15k|	case 10:
  ------------------
  |  Branch (470:2): [True: 7.15k, False: 1.89M]
  ------------------
  471|  7.15k|		Ops->H_Pass_8(tmp, src, 9, stride, rounding);
  472|  7.15k|		Ops->V_Pass_8(dst, tmp, 8, stride, rounding);
  473|  7.15k|		break;
  474|  7.51k|	case 11:
  ------------------
  |  Branch (474:2): [True: 7.51k, False: 1.89M]
  ------------------
  475|  7.51k|		Ops->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding);
  476|  7.51k|		Ops->V_Pass_8(dst, tmp, 8, stride, rounding);
  477|  7.51k|		break;
  478|  36.8k|	case 12:
  ------------------
  |  Branch (478:2): [True: 36.8k, False: 1.86M]
  ------------------
  479|  36.8k|		Ops->V_Pass_Avrg_Up_8(dst, src, 8, stride, rounding);
  480|  36.8k|		break;
  481|  13.5k|	case 13:
  ------------------
  |  Branch (481:2): [True: 13.5k, False: 1.89M]
  ------------------
  482|  13.5k|		Ops->H_Pass_Avrg_8(tmp, src, 9, stride, rounding);
  483|  13.5k|		Ops->V_Pass_Avrg_Up_8(dst, tmp, 8, stride, rounding);
  484|  13.5k|		break;
  485|  7.10k|	case 14:
  ------------------
  |  Branch (485:2): [True: 7.10k, False: 1.89M]
  ------------------
  486|  7.10k|		Ops->H_Pass_8(tmp, src, 9, stride, rounding);
  487|  7.10k|		Ops->V_Pass_Avrg_Up_8( dst, tmp, 8, stride, rounding);
  488|  7.10k|		break;
  489|  16.4k|	case 15:
  ------------------
  |  Branch (489:2): [True: 16.4k, False: 1.88M]
  ------------------
  490|  16.4k|		Ops->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding);
  491|  16.4k|		Ops->V_Pass_Avrg_Up_8(dst, tmp, 8, stride, rounding);
  492|  16.4k|		break;
  493|  1.90M|	}
  494|  1.90M|}
decoder.c:interpolate16x16_add_quarterpel:
  216|   103k|{
  217|   103k|	const uint8_t *src;
  218|   103k|	uint8_t *dst;
  219|   103k|	uint8_t *tmp;
  220|   103k|	int32_t quads;
  221|   103k|	const XVID_QP_FUNCS *Ops;
  222|   103k|	const XVID_QP_FUNCS *Ops_Copy;
  223|       |
  224|   103k|	int32_t x_int, y_int;
  225|       |
  226|   103k|	const int32_t xRef = (int)x*4 + dx;
  227|   103k|	const int32_t yRef = (int)y*4 + dy;
  228|       |
  229|   103k|	Ops = xvid_QP_Add_Funcs;
  230|   103k|	Ops_Copy = xvid_QP_Funcs;
  231|   103k|	quads = (dx&3) | ((dy&3)<<2);
  232|       |
  233|   103k|	x_int = xRef >> 2;
  234|   103k|	y_int = yRef >> 2;
  235|       |
  236|   103k|	dst = cur + y * stride + x;
  237|   103k|	src = refn + y_int * (int)stride + x_int;
  238|       |
  239|   103k|	tmp = refh; /* we need at least a 16 x stride scratch block */
  240|       |
  241|   103k|	switch(quads) {
  ------------------
  |  Branch (241:9): [True: 0, False: 103k]
  ------------------
  242|  35.0k|	case 0:
  ------------------
  |  Branch (242:2): [True: 35.0k, False: 68.9k]
  ------------------
  243|       |		/* NB: there is no halfpel involved ! the name's function can be
  244|       |		 *     misleading */
  245|  35.0k|		interpolate8x8_halfpel_add(dst, src, stride, rounding);
  246|  35.0k|		interpolate8x8_halfpel_add(dst+8, src+8, stride, rounding);
  247|  35.0k|		interpolate8x8_halfpel_add(dst+8*stride, src+8*stride, stride, rounding);
  248|  35.0k|		interpolate8x8_halfpel_add(dst+8*stride+8, src+8*stride+8, stride, rounding);
  249|  35.0k|		break;
  250|  8.83k|	case 1:
  ------------------
  |  Branch (250:2): [True: 8.83k, False: 95.0k]
  ------------------
  251|  8.83k|		Ops->H_Pass_Avrg(dst, src, 16, stride, rounding);
  252|  8.83k|		break;
  253|  8.41k|	case 2:
  ------------------
  |  Branch (253:2): [True: 8.41k, False: 95.5k]
  ------------------
  254|  8.41k|		Ops->H_Pass(dst, src, 16, stride, rounding);
  255|  8.41k|		break;
  256|  11.4k|	case 3:
  ------------------
  |  Branch (256:2): [True: 11.4k, False: 92.4k]
  ------------------
  257|  11.4k|		Ops->H_Pass_Avrg_Up(dst, src, 16, stride, rounding);
  258|  11.4k|		break;
  259|  6.19k|	case 4:
  ------------------
  |  Branch (259:2): [True: 6.19k, False: 97.7k]
  ------------------
  260|  6.19k|		Ops->V_Pass_Avrg(dst, src, 16, stride, rounding);
  261|  6.19k|		break;
  262|  2.33k|	case 5:
  ------------------
  |  Branch (262:2): [True: 2.33k, False: 101k]
  ------------------
  263|  2.33k|		Ops_Copy->H_Pass_Avrg(tmp, src, 17, stride, rounding);
  264|  2.33k|		Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding);
  265|  2.33k|		break;
  266|  1.89k|	case 6:
  ------------------
  |  Branch (266:2): [True: 1.89k, False: 102k]
  ------------------
  267|  1.89k|		Ops_Copy->H_Pass(tmp, src,	  17, stride, rounding);
  268|  1.89k|		Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding);
  269|  1.89k|		break;
  270|  2.67k|	case 7:
  ------------------
  |  Branch (270:2): [True: 2.67k, False: 101k]
  ------------------
  271|  2.67k|		Ops_Copy->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding);
  272|  2.67k|		Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding);
  273|  2.67k|		break;
  274|  4.71k|	case 8:
  ------------------
  |  Branch (274:2): [True: 4.71k, False: 99.2k]
  ------------------
  275|  4.71k|		Ops->V_Pass(dst, src, 16, stride, rounding);
  276|  4.71k|		break;
  277|  1.91k|	case 9:
  ------------------
  |  Branch (277:2): [True: 1.91k, False: 102k]
  ------------------
  278|  1.91k|		Ops_Copy->H_Pass_Avrg(tmp, src, 17, stride, rounding);
  279|  1.91k|		Ops->V_Pass(dst, tmp, 16, stride, rounding);
  280|  1.91k|		break;
  281|  2.47k|	case 10:
  ------------------
  |  Branch (281:2): [True: 2.47k, False: 101k]
  ------------------
  282|  2.47k|		Ops_Copy->H_Pass(tmp, src, 17, stride, rounding);
  283|  2.47k|		Ops->V_Pass(dst, tmp, 16, stride, rounding);
  284|  2.47k|		break;
  285|  2.03k|	case 11:
  ------------------
  |  Branch (285:2): [True: 2.03k, False: 101k]
  ------------------
  286|  2.03k|		Ops_Copy->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding);
  287|  2.03k|		Ops->V_Pass(dst, tmp, 16, stride, rounding);
  288|  2.03k|		break;
  289|  8.02k|	case 12:
  ------------------
  |  Branch (289:2): [True: 8.02k, False: 95.9k]
  ------------------
  290|  8.02k|		Ops->V_Pass_Avrg_Up(dst, src, 16, stride, rounding);
  291|  8.02k|		break;
  292|  2.62k|	case 13:
  ------------------
  |  Branch (292:2): [True: 2.62k, False: 101k]
  ------------------
  293|  2.62k|		Ops_Copy->H_Pass_Avrg(tmp, src, 17, stride, rounding);
  294|  2.62k|		Ops->V_Pass_Avrg_Up(dst, tmp, 16, stride, rounding);
  295|  2.62k|		break;
  296|  2.12k|	case 14:
  ------------------
  |  Branch (296:2): [True: 2.12k, False: 101k]
  ------------------
  297|  2.12k|		Ops_Copy->H_Pass(tmp, src, 17, stride, rounding);
  298|  2.12k|		Ops->V_Pass_Avrg_Up( dst, tmp, 16, stride, rounding);
  299|  2.12k|		break;
  300|  3.16k|	case 15:
  ------------------
  |  Branch (300:2): [True: 3.16k, False: 100k]
  ------------------
  301|  3.16k|		Ops_Copy->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding);
  302|  3.16k|		Ops->V_Pass_Avrg_Up(dst, tmp, 16, stride, rounding);
  303|  3.16k|		break;
  304|   103k|	}
  305|   103k|}
decoder.c:interpolate8x8_add_quarterpel:
  506|  1.84M|{
  507|  1.84M|	const uint8_t *src;
  508|  1.84M|	uint8_t *dst;
  509|  1.84M|	uint8_t *tmp;
  510|  1.84M|	int32_t quads;
  511|  1.84M|	const XVID_QP_FUNCS *Ops;
  512|  1.84M|	const XVID_QP_FUNCS *Ops_Copy;
  513|       |
  514|  1.84M|	int32_t x_int, y_int;
  515|       |
  516|  1.84M|	const int32_t xRef = (int)x*4 + dx;
  517|  1.84M|	const int32_t yRef = (int)y*4 + dy;
  518|       |
  519|  1.84M|	Ops = xvid_QP_Add_Funcs;
  520|  1.84M|	Ops_Copy = xvid_QP_Funcs;
  521|  1.84M|	quads = (dx&3) | ((dy&3)<<2);
  522|       |
  523|  1.84M|	x_int = xRef >> 2;
  524|  1.84M|	y_int = yRef >> 2;
  525|       |
  526|  1.84M|	dst = cur + y * stride + x;
  527|  1.84M|	src = refn + y_int * (int)stride + x_int;
  528|       |
  529|  1.84M|	tmp = refh; /* we need at least a 16 x stride scratch block */
  530|       |
  531|  1.84M|	switch(quads) {
  ------------------
  |  Branch (531:9): [True: 0, False: 1.84M]
  ------------------
  532|  1.56M|	case 0:
  ------------------
  |  Branch (532:2): [True: 1.56M, False: 276k]
  ------------------
  533|       |		/* Misleading function name, there is no halfpel involved
  534|       |		 * just dst and src averaging with rounding=0 */
  535|  1.56M|		interpolate8x8_halfpel_add(dst, src, stride, rounding);
  536|  1.56M|		break;
  537|  45.3k|	case 1:
  ------------------
  |  Branch (537:2): [True: 45.3k, False: 1.79M]
  ------------------
  538|  45.3k|		Ops->H_Pass_Avrg_8(dst, src, 8, stride, rounding);
  539|  45.3k|		break;
  540|  24.8k|	case 2:
  ------------------
  |  Branch (540:2): [True: 24.8k, False: 1.82M]
  ------------------
  541|  24.8k|		Ops->H_Pass_8(dst, src, 8, stride, rounding);
  542|  24.8k|		break;
  543|  43.8k|	case 3:
  ------------------
  |  Branch (543:2): [True: 43.8k, False: 1.80M]
  ------------------
  544|  43.8k|		Ops->H_Pass_Avrg_Up_8(dst, src, 8, stride, rounding);
  545|  43.8k|		break;
  546|  25.8k|	case 4:
  ------------------
  |  Branch (546:2): [True: 25.8k, False: 1.81M]
  ------------------
  547|  25.8k|		Ops->V_Pass_Avrg_8(dst, src, 8, stride, rounding);
  548|  25.8k|		break;
  549|  8.14k|	case 5:
  ------------------
  |  Branch (549:2): [True: 8.14k, False: 1.83M]
  ------------------
  550|  8.14k|		Ops_Copy->H_Pass_Avrg_8(tmp, src, 9, stride, rounding);
  551|  8.14k|		Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding);
  552|  8.14k|		break;
  553|  3.79k|	case 6:
  ------------------
  |  Branch (553:2): [True: 3.79k, False: 1.84M]
  ------------------
  554|  3.79k|		Ops_Copy->H_Pass_8(tmp, src, 9, stride, rounding);
  555|  3.79k|		Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding);
  556|  3.79k|		break;
  557|  19.1k|	case 7:
  ------------------
  |  Branch (557:2): [True: 19.1k, False: 1.82M]
  ------------------
  558|  19.1k|		Ops_Copy->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding);
  559|  19.1k|		Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding);
  560|  19.1k|		break;
  561|  27.6k|	case 8:
  ------------------
  |  Branch (561:2): [True: 27.6k, False: 1.81M]
  ------------------
  562|  27.6k|		Ops->V_Pass_8(dst, src, 8, stride, rounding);
  563|  27.6k|		break;
  564|  6.62k|	case 9:
  ------------------
  |  Branch (564:2): [True: 6.62k, False: 1.83M]
  ------------------
  565|  6.62k|		Ops_Copy->H_Pass_Avrg_8(tmp, src, 9, stride, rounding);
  566|  6.62k|		Ops->V_Pass_8(dst, tmp, 8, stride, rounding);
  567|  6.62k|		break;
  568|  4.84k|	case 10:
  ------------------
  |  Branch (568:2): [True: 4.84k, False: 1.84M]
  ------------------
  569|  4.84k|		Ops_Copy->H_Pass_8(tmp, src, 9, stride, rounding);
  570|  4.84k|		Ops->V_Pass_8(dst, tmp, 8, stride, rounding);
  571|  4.84k|		break;
  572|  6.26k|	case 11:
  ------------------
  |  Branch (572:2): [True: 6.26k, False: 1.83M]
  ------------------
  573|  6.26k|		Ops_Copy->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding);
  574|  6.26k|		Ops->V_Pass_8(dst, tmp, 8, stride, rounding);
  575|  6.26k|		break;
  576|  28.0k|	case 12:
  ------------------
  |  Branch (576:2): [True: 28.0k, False: 1.81M]
  ------------------
  577|  28.0k|		Ops->V_Pass_Avrg_Up_8(dst, src, 8, stride, rounding);
  578|  28.0k|		break;
  579|  11.7k|	case 13:
  ------------------
  |  Branch (579:2): [True: 11.7k, False: 1.83M]
  ------------------
  580|  11.7k|		Ops_Copy->H_Pass_Avrg_8(tmp, src, 9, stride, rounding);
  581|  11.7k|		Ops->V_Pass_Avrg_Up_8(dst, tmp, 8, stride, rounding);
  582|  11.7k|		break;
  583|  5.70k|	case 14:
  ------------------
  |  Branch (583:2): [True: 5.70k, False: 1.83M]
  ------------------
  584|  5.70k|		Ops_Copy->H_Pass_8(tmp, src, 9, stride, rounding);
  585|  5.70k|		Ops->V_Pass_Avrg_Up_8( dst, tmp, 8, stride, rounding);
  586|  5.70k|		break;
  587|  14.7k|	case 15:
  ------------------
  |  Branch (587:2): [True: 14.7k, False: 1.83M]
  ------------------
  588|  14.7k|		Ops_Copy->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding);
  589|  14.7k|		Ops->V_Pass_Avrg_Up_8(dst, tmp, 8, stride, rounding);
  590|  14.7k|		break;
  591|  1.84M|	}
  592|  1.84M|}

init_GMC:
  589|      2|{
  590|      2|      Predict_16x16_func = Predict_16x16_C;
  591|      2|      Predict_8x8_func   = Predict_8x8_C;
  592|       |
  593|       |#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
  594|       |      if ((cpu_flags & XVID_CPU_MMX)   || (cpu_flags & XVID_CPU_MMXEXT)   ||
  595|       |          (cpu_flags & XVID_CPU_3DNOW) || (cpu_flags & XVID_CPU_3DNOWEXT) ||
  596|       |          (cpu_flags & XVID_CPU_SSE)   || (cpu_flags & XVID_CPU_SSE2) ||
  597|       |          (cpu_flags & XVID_CPU_SSE3)  || (cpu_flags & XVID_CPU_SSE41))
  598|       |	{
  599|       |	   Predict_16x16_func = Predict_16x16_mmx;
  600|       |	   Predict_8x8_func   = Predict_8x8_mmx;
  601|       |
  602|       |           if (cpu_flags & XVID_CPU_SSE41)
  603|       |	     GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_sse41;
  604|       |	   else if (cpu_flags & XVID_CPU_SSE2)
  605|       |	     GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_sse2;
  606|       |	   else
  607|       |             GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_mmx;
  608|       |	}
  609|       |#endif
  610|      2|}
generate_GMCparameters:
  620|  2.48k|{
  621|  2.48k|	gmc->sW = width	<< 4;
  622|  2.48k|	gmc->sH = height << 4;
  623|  2.48k|	gmc->accuracy = accuracy;
  624|  2.48k|	gmc->num_wp = nb_pts;
  625|       |
  626|       |	/* reduce the number of points, if possible */
  627|  2.48k|	if (nb_pts<2 || (pts->duv[2].x==0 && pts->duv[2].y==0 && pts->duv[1].x==0 && pts->duv[1].y==0 )) {
  ------------------
  |  Branch (627:6): [True: 1.47k, False: 1.01k]
  |  Branch (627:19): [True: 660, False: 358]
  |  Branch (627:39): [True: 587, False: 73]
  |  Branch (627:59): [True: 351, False: 236]
  |  Branch (627:79): [True: 267, False: 84]
  ------------------
  628|  1.73k|  	if (nb_pts<2 || (pts->duv[1].x==0 && pts->duv[1].y==0)) {
  ------------------
  |  Branch (628:8): [True: 1.47k, False: 267]
  |  Branch (628:21): [True: 267, False: 0]
  |  Branch (628:41): [True: 267, False: 0]
  ------------------
  629|  1.73k|	  	if (nb_pts<1 || (pts->duv[0].x==0 && pts->duv[0].y==0)) {
  ------------------
  |  Branch (629:9): [True: 1.28k, False: 449]
  |  Branch (629:22): [True: 166, False: 283]
  |  Branch (629:42): [True: 125, False: 41]
  ------------------
  630|  1.41k|		    nb_pts = 0;
  631|  1.41k|  		}
  632|    324|	  	else nb_pts = 1;
  633|  1.73k|  	}
  634|      0|	  else nb_pts = 2;
  635|  1.73k|  }
  636|       |
  637|       |	/* now, nb_pts stores the actual number of points required for interpolation */
  638|       |
  639|  2.48k|	if (nb_pts<=1)
  ------------------
  |  Branch (639:6): [True: 1.73k, False: 751]
  ------------------
  640|  1.73k|	{
  641|  1.73k|	if (nb_pts==1) {
  ------------------
  |  Branch (641:6): [True: 324, False: 1.41k]
  ------------------
  642|       |		/* store as 4b fixed point */
  643|    324|		gmc->Uo = pts->duv[0].x << accuracy;
  644|    324|		gmc->Vo = pts->duv[0].y << accuracy;
  645|    324|		gmc->Uco = ((pts->duv[0].x>>1) | (pts->duv[0].x&1)) << accuracy;	 /* DIV2RND() */
  646|    324|		gmc->Vco = ((pts->duv[0].y>>1) | (pts->duv[0].y&1)) << accuracy;	 /* DIV2RND() */
  647|    324|	}
  648|  1.41k|	else {	/* zero points?! */
  649|  1.41k|		gmc->Uo	= gmc->Vo	= 0;
  650|  1.41k|		gmc->Uco = gmc->Vco = 0;
  651|  1.41k|	}
  652|       |
  653|  1.73k|	gmc->predict_16x16	= Predict_1pt_16x16_C;
  654|  1.73k|	gmc->predict_8x8	= Predict_1pt_8x8_C;
  655|  1.73k|	gmc->get_average_mv = get_average_mv_1pt_C;
  656|  1.73k|	}
  657|    751|	else {		/* 2 or 3 points */
  658|    751|	const int rho	 = 3 - accuracy;	/* = {3,2,1,0} for Acc={0,1,2,3} */
  659|    751|	int Alpha = log2bin(width-1);
  660|    751|	int Ws = 1 << Alpha;
  661|       |
  662|    751|	gmc->dU[0] = 16*Ws + RDIV( 8*Ws*pts->duv[1].x, width );	 /* dU/dx */
  ------------------
  |  |   76|    751|#define RDIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b))
  |  |  ------------------
  |  |  |  Branch (76:21): [True: 265, False: 486]
  |  |  ------------------
  ------------------
  663|    751|	gmc->dV[0] =		 RDIV( 8*Ws*pts->duv[1].y, width );	 /* dV/dx */
  ------------------
  |  |   76|    751|#define RDIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b))
  |  |  ------------------
  |  |  |  Branch (76:21): [True: 340, False: 411]
  |  |  ------------------
  ------------------
  664|       |
  665|    751|	if (nb_pts==2) {
  ------------------
  |  Branch (665:6): [True: 288, False: 463]
  ------------------
  666|    288|		gmc->dU[1] = -gmc->dV[0];	/* -Sin */
  667|    288|		gmc->dV[1] =	gmc->dU[0] ;	/* Cos */
  668|    288|	}
  669|    463|	else
  670|    463|	{
  671|    463|		const int Beta = log2bin(height-1);
  672|    463|		const int Hs = 1<<Beta;
  673|    463|		gmc->dU[1] =		 RDIV( 8*Hs*pts->duv[2].x, height );	 /* dU/dy */
  ------------------
  |  |   76|    463|#define RDIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b))
  |  |  ------------------
  |  |  |  Branch (76:21): [True: 209, False: 254]
  |  |  ------------------
  ------------------
  674|    463|		gmc->dV[1] = 16*Hs + RDIV( 8*Hs*pts->duv[2].y, height );	 /* dV/dy */
  ------------------
  |  |   76|    463|#define RDIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b))
  |  |  ------------------
  |  |  |  Branch (76:21): [True: 263, False: 200]
  |  |  ------------------
  ------------------
  675|    463|		if (Beta>Alpha) {
  ------------------
  |  Branch (675:7): [True: 193, False: 270]
  ------------------
  676|    193|		gmc->dU[0] <<= (Beta-Alpha);
  677|    193|		gmc->dV[0] <<= (Beta-Alpha);
  678|    193|		Alpha = Beta;
  679|    193|		Ws = Hs;
  680|    193|		}
  681|    270|		else {
  682|    270|		gmc->dU[1] <<= Alpha - Beta;
  683|    270|		gmc->dV[1] <<= Alpha - Beta;
  684|    270|		}
  685|    463|	}
  686|       |	/* upscale to 16b fixed-point */
  687|    751|	gmc->dU[0] <<= (16-Alpha - rho);
  688|    751|	gmc->dU[1] <<= (16-Alpha - rho);
  689|    751|	gmc->dV[0] <<= (16-Alpha - rho);
  690|    751|	gmc->dV[1] <<= (16-Alpha - rho);
  691|       |
  692|    751|	gmc->Uo	= ( pts->duv[0].x	 <<(16+ accuracy)) + (1<<15);
  693|    751|	gmc->Vo	= ( pts->duv[0].y	 <<(16+ accuracy)) + (1<<15);
  694|    751|	gmc->Uco = ((pts->duv[0].x-1)<<(17+ accuracy)) + (1<<17);
  695|    751|	gmc->Vco = ((pts->duv[0].y-1)<<(17+ accuracy)) + (1<<17);
  696|    751|	gmc->Uco = (gmc->Uco + gmc->dU[0] + gmc->dU[1])>>2;
  697|    751|	gmc->Vco = (gmc->Vco + gmc->dV[0] + gmc->dV[1])>>2;
  698|       |
  699|    751|	gmc->predict_16x16	= Predict_16x16_func;
  700|    751|	gmc->predict_8x8	= Predict_8x8_func;
  701|    751|	gmc->get_average_mv = get_average_mv_C;
  702|    751|	}
  703|  2.48k|}
gmc.c:Predict_16x16_C:
   98|   178k|{
   99|   178k|	const int W = This->sW;
  100|   178k|	const int H	= This->sH;
  101|   178k|	const int rho = 3 - This->accuracy;
  102|   178k|	const int Rounder = ( (1<<7) - (rounding<<(2*rho)) ) << 16;
  103|       |
  104|   178k|	const int dUx = This->dU[0];
  105|   178k|	const int dVx = This->dV[0];
  106|   178k|	const int dUy = This->dU[1];
  107|   178k|	const int dVy = This->dV[1];
  108|       |
  109|   178k|	int Uo = This->Uo + 16*(dUy*y + dUx*x);
  110|   178k|	int Vo = This->Vo + 16*(dVy*y + dVx*x);
  111|       |
  112|   178k|	int i, j;
  113|       |
  114|   178k|	dst += 16;
  115|  3.03M|	for (j=16; j>0; --j) {
  ------------------
  |  Branch (115:13): [True: 2.85M, False: 178k]
  ------------------
  116|  2.85M|		int U = Uo, V = Vo;
  117|  2.85M|		Uo += dUy; Vo += dVy;
  118|  48.6M|		for (i=-16; i<0; ++i) {
  ------------------
  |  Branch (118:15): [True: 45.7M, False: 2.85M]
  ------------------
  119|  45.7M|			unsigned int f0, f1, ri = 16, rj = 16;
  120|  45.7M|			int Offset;
  121|  45.7M|			int u = ( U >> 16 ) << rho;
  122|  45.7M|			int v = ( V >> 16 ) << rho;
  123|       |
  124|  45.7M|			U += dUx; V += dVx;
  125|       |
  126|  45.7M|			if (u > 0 && u <= W) { ri = MTab[u&15]; Offset = u>>4;	}
  ------------------
  |  Branch (126:8): [True: 40.9M, False: 4.79M]
  |  Branch (126:17): [True: 39.0M, False: 1.94M]
  ------------------
  127|  6.74M|			else {
  128|  6.74M|				if (u > W) Offset = W>>4;
  ------------------
  |  Branch (128:9): [True: 1.94M, False: 4.79M]
  ------------------
  129|  4.79M|				else Offset = 0;
  130|  6.74M|				ri = MTab[0];
  131|  6.74M|			}
  132|       |
  133|  45.7M|			if (v > 0 && v <= H) { rj = MTab[v&15]; Offset += (v>>4)*srcstride; }
  ------------------
  |  Branch (133:8): [True: 42.7M, False: 2.98M]
  |  Branch (133:17): [True: 42.6M, False: 137k]
  ------------------
  134|  3.12M|			else {
  135|  3.12M|				if (v > H) Offset += (H>>4)*srcstride;
  ------------------
  |  Branch (135:9): [True: 137k, False: 2.98M]
  ------------------
  136|  3.12M|				rj = MTab[0];
  137|  3.12M|			}
  138|       |
  139|  45.7M|			f0	= src[Offset + 0];
  140|  45.7M|			f0 |= src[Offset + 1] << 16;
  141|  45.7M|			f1	= src[Offset + srcstride + 0];
  142|  45.7M|			f1 |= src[Offset + srcstride + 1] << 16;
  143|  45.7M|			f0 = (ri*f0)>>16;
  144|  45.7M|			f1 = (ri*f1) & 0x0fff0000;
  145|  45.7M|			f0 |= f1;
  146|  45.7M|			f0 = (rj*f0 + Rounder) >> 24;
  147|       |
  148|  45.7M|			dst[i] = (uint8_t)f0;
  149|  45.7M|		}
  150|  2.85M|		dst += dststride;
  151|  2.85M|	}
  152|   178k|}
gmc.c:Predict_8x8_C:
  159|   178k|{
  160|   178k|	const int W	 = This->sW >> 1;
  161|   178k|	const int H	 = This->sH >> 1;
  162|   178k|	const int rho = 3-This->accuracy;
  163|   178k|	const int32_t Rounder = ( 128 - (rounding<<(2*rho)) ) << 16;
  164|       |
  165|   178k|	const int32_t dUx = This->dU[0];
  166|   178k|	const int32_t dVx = This->dV[0];
  167|   178k|	const int32_t dUy = This->dU[1];
  168|   178k|	const int32_t dVy = This->dV[1];
  169|       |
  170|   178k|	int32_t Uo = This->Uco + 8*(dUy*y + dUx*x);
  171|   178k|	int32_t Vo = This->Vco + 8*(dVy*y + dVx*x);
  172|       |
  173|   178k|	int i, j;
  174|       |
  175|   178k|	uDst += 8;
  176|   178k|	vDst += 8;
  177|  1.60M|	for (j=8; j>0; --j) {
  ------------------
  |  Branch (177:12): [True: 1.42M, False: 178k]
  ------------------
  178|  1.42M|		int32_t U = Uo, V = Vo;
  179|  1.42M|		Uo += dUy; Vo += dVy;
  180|       |
  181|  12.8M|		for (i=-8; i<0; ++i) {
  ------------------
  |  Branch (181:14): [True: 11.4M, False: 1.42M]
  ------------------
  182|  11.4M|			int Offset;
  183|  11.4M|			uint32_t f0, f1, ri, rj;
  184|  11.4M|			int32_t u, v;
  185|       |
  186|  11.4M|			u = ( U >> 16 ) << rho;
  187|  11.4M|			v = ( V >> 16 ) << rho;
  188|  11.4M|			U += dUx; V += dVx;
  189|       |
  190|  11.4M|			if (u > 0 && u <= W) {
  ------------------
  |  Branch (190:8): [True: 10.1M, False: 1.24M]
  |  Branch (190:17): [True: 9.78M, False: 405k]
  ------------------
  191|  9.78M|				ri = MTab[u&15];
  192|  9.78M|				Offset = u>>4;
  193|  9.78M|			} else {
  194|  1.65M|				if (u>W) Offset = W>>4;
  ------------------
  |  Branch (194:9): [True: 405k, False: 1.24M]
  ------------------
  195|  1.24M|				else Offset = 0;
  196|  1.65M|				ri = MTab[0];
  197|  1.65M|			}
  198|       |
  199|  11.4M|			if (v > 0 && v <= H) {
  ------------------
  |  Branch (199:8): [True: 10.8M, False: 617k]
  |  Branch (199:17): [True: 10.7M, False: 27.9k]
  ------------------
  200|  10.7M|				rj = MTab[v&15];
  201|  10.7M|				Offset += (v>>4)*srcstride;
  202|  10.7M|			} else {
  203|   645k|				if (v>H) Offset += (H>>4)*srcstride;
  ------------------
  |  Branch (203:9): [True: 27.9k, False: 617k]
  ------------------
  204|   645k|				rj = MTab[0];
  205|   645k|			}
  206|       |
  207|  11.4M|			f0	= uSrc[Offset + 0];
  208|  11.4M|			f0 |= uSrc[Offset + 1] << 16;
  209|  11.4M|			f1	= uSrc[Offset + srcstride + 0];
  210|  11.4M|			f1 |= uSrc[Offset + srcstride + 1] << 16;
  211|  11.4M|			f0 = (ri*f0)>>16;
  212|  11.4M|			f1 = (ri*f1) & 0x0fff0000;
  213|  11.4M|			f0 |= f1;
  214|  11.4M|			f0 = (rj*f0 + Rounder) >> 24;
  215|       |
  216|  11.4M|			uDst[i] = (uint8_t)f0;
  217|       |
  218|  11.4M|			f0	= vSrc[Offset + 0];
  219|  11.4M|			f0 |= vSrc[Offset + 1] << 16;
  220|  11.4M|			f1	= vSrc[Offset + srcstride + 0];
  221|  11.4M|			f1 |= vSrc[Offset + srcstride + 1] << 16;
  222|  11.4M|			f0 = (ri*f0)>>16;
  223|  11.4M|			f1 = (ri*f1) & 0x0fff0000;
  224|  11.4M|			f0 |= f1;
  225|  11.4M|			f0 = (rj*f0 + Rounder) >> 24;
  226|       |
  227|  11.4M|			vDst[i] = (uint8_t)f0;
  228|  11.4M|		}
  229|  1.42M|		uDst += dststride;
  230|  1.42M|		vDst += dststride;
  231|  1.42M|	}
  232|   178k|}
gmc.c:Predict_1pt_16x16_C:
  269|   345k|{
  270|   345k|	const int W	 = This->sW;
  271|   345k|	const int H	 = This->sH;
  272|   345k|	const int rho = 3-MIN(This->accuracy, 3);
  ------------------
  |  |  255|   345k|#define MIN(X, Y) ((X)<(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (255:20): [True: 246k, False: 99.4k]
  |  |  ------------------
  ------------------
  273|   345k|	const int32_t Rounder = ( 128 - (rounding<<(2*rho)) ) << 16;
  274|       |
  275|       |
  276|   345k|	int32_t uo = This->Uo + (x<<8);	 /* ((16*x)<<4) */
  277|   345k|	int32_t vo = This->Vo + (y<<8);
  278|   345k|	uint32_t ri = MTab[uo & 15];
  279|   345k|	uint32_t rj = MTab[vo & 15];
  280|   345k|	int i, j;
  281|       |
  282|   345k|	int32_t Offset;
  283|   345k|	if (vo>=(-16<<4) && vo<=H) Offset = (vo>>4)*srcstride;
  ------------------
  |  Branch (283:6): [True: 344k, False: 1.59k]
  |  Branch (283:22): [True: 343k, False: 514]
  ------------------
  284|  2.10k|	else {
  285|  2.10k|		if (vo>H) Offset = ( H>>4)*srcstride;
  ------------------
  |  Branch (285:7): [True: 514, False: 1.59k]
  ------------------
  286|  1.59k|		else Offset =-16*srcstride;
  287|  2.10k|		rj = MTab[0];
  288|  2.10k|	}
  289|   345k|	if (uo>=(-16<<4) && uo<=W) Offset += (uo>>4);
  ------------------
  |  Branch (289:6): [True: 344k, False: 1.72k]
  |  Branch (289:22): [True: 321k, False: 22.4k]
  ------------------
  290|  24.1k|	else {
  291|  24.1k|		if (uo>W) Offset += (W>>4);
  ------------------
  |  Branch (291:7): [True: 22.4k, False: 1.72k]
  ------------------
  292|  1.72k|		else Offset -= 16;
  293|  24.1k|		ri = MTab[0];
  294|  24.1k|	}
  295|       |
  296|   345k|	Dst += 16;
  297|       |
  298|  5.87M|	for(j=16; j>0; --j, Offset+=srcstride-16)
  ------------------
  |  Branch (298:12): [True: 5.53M, False: 345k]
  ------------------
  299|  5.53M|	{
  300|  94.0M|	for(i=-16; i<0; ++i, ++Offset)
  ------------------
  |  Branch (300:13): [True: 88.5M, False: 5.53M]
  ------------------
  301|  88.5M|	{
  302|  88.5M|		uint32_t f0, f1;
  303|  88.5M|		f0	= Src[ Offset		+0 ];
  304|  88.5M|		f0 |= Src[ Offset		+1 ] << 16;
  305|  88.5M|		f1	= Src[ Offset+srcstride +0 ];
  306|  88.5M|		f1 |= Src[ Offset+srcstride +1 ] << 16;
  307|  88.5M|		f0 = (ri*f0)>>16;
  308|  88.5M|		f1 = (ri*f1) & 0x0fff0000;
  309|  88.5M|		f0 |= f1;
  310|  88.5M|		f0 = ( rj*f0 + Rounder ) >> 24;
  311|  88.5M|		Dst[i] = (uint8_t)f0;
  312|  88.5M|	}
  313|  5.53M|	Dst += dststride;
  314|  5.53M|	}
  315|   345k|}
gmc.c:Predict_1pt_8x8_C:
  322|   345k|{
  323|   345k|	const int W	 = This->sW >> 1;
  324|   345k|	const int H	 = This->sH >> 1;
  325|   345k|	const int rho = 3-This->accuracy;
  326|   345k|	const int32_t Rounder = ( 128 - (rounding<<(2*rho)) ) << 16;
  327|       |
  328|   345k|	int32_t uo = This->Uco + (x<<7);
  329|   345k|	int32_t vo = This->Vco + (y<<7);
  330|   345k|	uint32_t rri = MTab[uo & 15];
  331|   345k|	uint32_t rrj = MTab[vo & 15];
  332|   345k|	int i, j;
  333|       |
  334|   345k|	int32_t Offset;
  335|   345k|	if (vo>=(-8<<4) && vo<=H) Offset = (vo>>4)*srcstride;
  ------------------
  |  Branch (335:6): [True: 344k, False: 1.68k]
  |  Branch (335:21): [True: 343k, False: 794]
  ------------------
  336|  2.48k|	else {
  337|  2.48k|		if (vo>H) Offset = ( H>>4)*srcstride;
  ------------------
  |  Branch (337:7): [True: 794, False: 1.68k]
  ------------------
  338|  1.68k|		else Offset =-8*srcstride;
  339|  2.48k|		rrj = MTab[0];
  340|  2.48k|	}
  341|   345k|	if (uo>=(-8<<4) && uo<=W) Offset += (uo>>4);
  ------------------
  |  Branch (341:6): [True: 343k, False: 1.91k]
  |  Branch (341:21): [True: 321k, False: 22.5k]
  ------------------
  342|  24.4k|	else {
  343|  24.4k|		if (uo>W) Offset += ( W>>4);
  ------------------
  |  Branch (343:7): [True: 22.5k, False: 1.91k]
  ------------------
  344|  1.91k|		else Offset -= 8;
  345|  24.4k|		rri = MTab[0];
  346|  24.4k|	}
  347|       |
  348|   345k|	uDst += 8;
  349|   345k|	vDst += 8;
  350|  3.11M|	for(j=8; j>0; --j, Offset+=srcstride-8)
  ------------------
  |  Branch (350:11): [True: 2.76M, False: 345k]
  ------------------
  351|  2.76M|	{
  352|  24.8M|	for(i=-8; i<0; ++i, Offset++)
  ------------------
  |  Branch (352:12): [True: 22.1M, False: 2.76M]
  ------------------
  353|  22.1M|	{
  354|  22.1M|		uint32_t f0, f1;
  355|  22.1M|		f0	= uSrc[ Offset + 0 ];
  356|  22.1M|		f0 |= uSrc[ Offset + 1 ] << 16;
  357|  22.1M|		f1	= uSrc[ Offset + srcstride + 0 ];
  358|  22.1M|		f1 |= uSrc[ Offset + srcstride + 1 ] << 16;
  359|  22.1M|		f0 = (rri*f0)>>16;
  360|  22.1M|		f1 = (rri*f1) & 0x0fff0000;
  361|  22.1M|		f0 |= f1;
  362|  22.1M|		f0 = ( rrj*f0 + Rounder ) >> 24;
  363|  22.1M|		uDst[i] = (uint8_t)f0;
  364|       |
  365|  22.1M|		f0	= vSrc[ Offset + 0 ];
  366|  22.1M|		f0 |= vSrc[ Offset + 1 ] << 16;
  367|  22.1M|		f1	= vSrc[ Offset + srcstride + 0 ];
  368|  22.1M|		f1 |= vSrc[ Offset + srcstride + 1 ] << 16;
  369|  22.1M|		f0 = (rri*f0)>>16;
  370|  22.1M|		f1 = (rri*f1) & 0x0fff0000;
  371|  22.1M|		f0 |= f1;
  372|  22.1M|		f0 = ( rrj*f0 + Rounder ) >> 24;
  373|  22.1M|		vDst[i] = (uint8_t)f0;
  374|  22.1M|	}
  375|  2.76M|	uDst += dststride;
  376|  2.76M|	vDst += dststride;
  377|  2.76M|	}
  378|   345k|}
gmc.c:get_average_mv_1pt_C:
  383|   345k|{
  384|   345k|	mv->x = RSHIFT(Dsp->Uo<<qpel, 3);
  ------------------
  |  |   77|   345k|#define RSHIFT(a,b) ( (a)>0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b))
  |  |  ------------------
  |  |  |  Branch (77:23): [True: 25.7k, False: 320k]
  |  |  ------------------
  ------------------
  385|   345k|	mv->y = RSHIFT(Dsp->Vo<<qpel, 3);
  ------------------
  |  |   77|   345k|#define RSHIFT(a,b) ( (a)>0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b))
  |  |  ------------------
  |  |  |  Branch (77:23): [True: 3.77k, False: 342k]
  |  |  ------------------
  ------------------
  386|   345k|}
gmc.c:log2bin:
   49|  1.21k|{
   50|       |/* Changed by Chenm001 */
   51|  1.21k|#if !defined(_MSC_VER) || defined(ARCH_IS_X86_64)
   52|  1.21k|  int n = 0;
   53|       |
   54|  9.81k|  while (value) {
  ------------------
  |  Branch (54:10): [True: 8.59k, False: 1.21k]
  ------------------
   55|  8.59k|	value >>= 1;
   56|  8.59k|	n++;
   57|  8.59k|  }
   58|  1.21k|  return n;
   59|       |#else
   60|       |  __asm {
   61|       |	bsr eax, value
   62|       |	inc eax
   63|       |  }
   64|       |#endif
   65|  1.21k|}
gmc.c:get_average_mv_C:
  237|   178k|{
  238|   178k|	int i, j;
  239|   178k|	int vx = 0, vy = 0;
  240|   178k|	int32_t uo = Dsp->Uo + 16*(Dsp->dU[1]*y + Dsp->dU[0]*x);
  241|   178k|	int32_t vo = Dsp->Vo + 16*(Dsp->dV[1]*y + Dsp->dV[0]*x);
  242|  3.03M|	for (j=16; j>0; --j)
  ------------------
  |  Branch (242:13): [True: 2.85M, False: 178k]
  ------------------
  243|  2.85M|	{
  244|  2.85M|	int32_t U, V;
  245|  2.85M|	U = uo; uo += Dsp->dU[1];
  246|  2.85M|	V = vo; vo += Dsp->dV[1];
  247|  48.6M|	for (i=16; i>0; --i)
  ------------------
  |  Branch (247:13): [True: 45.7M, False: 2.85M]
  ------------------
  248|  45.7M|	{
  249|  45.7M|		int32_t u,v;
  250|  45.7M|		u = U >> 16; U += Dsp->dU[0]; vx += u;
  251|  45.7M|		v = V >> 16; V += Dsp->dV[0]; vy += v;
  252|  45.7M|	}
  253|  2.85M|	}
  254|   178k|	vx -= (256*x+120) << (5+Dsp->accuracy);	/* 120 = 15*16/2 */
  255|   178k|	vy -= (256*y+120) << (5+Dsp->accuracy);
  256|       |
  257|   178k|	mv->x = RSHIFT( vx, 8+Dsp->accuracy - qpel );
  ------------------
  |  |   77|   178k|#define RSHIFT(a,b) ( (a)>0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b))
  |  |  ------------------
  |  |  |  Branch (77:23): [True: 91.1k, False: 87.5k]
  |  |  ------------------
  ------------------
  258|   178k|	mv->y = RSHIFT( vy, 8+Dsp->accuracy - qpel );
  ------------------
  |  |   77|   178k|#define RSHIFT(a,b) ( (a)>0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b))
  |  |  ------------------
  |  |  |  Branch (77:23): [True: 86.2k, False: 92.4k]
  |  |  ------------------
  ------------------
  259|   178k|}

decoder.c:gmc_sanitize:
   68|  1.04M|{
   69|  1.04M|	int length = 1 << (fcode+4);
   70|       |
   71|       |#if 0
   72|       |	if (quarterpel) value *= 2;
   73|       |#endif
   74|       |
   75|  1.04M|	if (value < -length)
  ------------------
  |  Branch (75:6): [True: 46.6k, False: 1.00M]
  ------------------
   76|  46.6k|		return -length;
   77|  1.00M|	else if (value >= length)
  ------------------
  |  Branch (77:11): [True: 40.8k, False: 961k]
  ------------------
   78|  40.8k|		return length-1;
   79|   961k|	else return value;
   80|  1.04M|}

mbcoding.c:DPRINTF:
  282|   243M|static __inline void DPRINTF(int level, char *format, ...) {}
decoder.c:DPRINTF:
  282|   254M|static __inline void DPRINTF(int level, char *format, ...) {}
bitstream.c:DPRINTF:
  282|  28.2M|static __inline void DPRINTF(int level, char *format, ...) {}
mbprediction.c:DPRINTF:
  282|   244M|static __inline void DPRINTF(int level, char *format, ...) {}

predict_acdc:
   72|   241M|{
   73|   241M|	const int mbpos = (y * mb_width) + x;
   74|   241M|	int16_t *left, *top, *diag, *current;
   75|       |
   76|   241M|	int32_t left_quant = current_quant;
   77|   241M|	int32_t top_quant = current_quant;
   78|       |
   79|   241M|	const int16_t *pLeft = default_acdc_values;
   80|   241M|	const int16_t *pTop = default_acdc_values;
   81|   241M|	const int16_t *pDiag = default_acdc_values;
   82|       |
   83|   241M|	uint32_t index = x + y * mb_width;	/* current macroblock */
   84|   241M|	int *acpred_direction = &pMBs[index].acpred_directions[block];
   85|   241M|	uint32_t i;
   86|       |
   87|   241M|	left = top = diag = current = NULL;
   88|       |
   89|       |	/* grab left,top and diag macroblocks */
   90|       |
   91|       |	/* left macroblock */
   92|       |
   93|   241M|	if (x && mbpos >= bound + 1  &&
  ------------------
  |  Branch (93:6): [True: 238M, False: 3.24M]
  |  Branch (93:11): [True: 237M, False: 285k]
  ------------------
   94|   241M|		(pMBs[index - 1].mode == MODE_INTRA ||
  ------------------
  |  |   37|   475M|#define	MODE_INTRA		3
  ------------------
  |  Branch (94:4): [True: 442k, False: 237M]
  ------------------
   95|   237M|		 pMBs[index - 1].mode == MODE_INTRA_Q)) {
  ------------------
  |  |   38|   237M|#define MODE_INTRA_Q	4
  ------------------
  |  Branch (95:4): [True: 110k, False: 237M]
  ------------------
   96|       |
   97|   553k|		left = (int16_t*)pMBs[index - 1].pred_values[0];
   98|   553k|		left_quant = pMBs[index - 1].quant;
   99|   553k|	}
  100|       |	/* top macroblock */
  101|       |
  102|   241M|	if (mbpos >= bound + (int)mb_width &&
  ------------------
  |  Branch (102:6): [True: 237M, False: 4.07M]
  ------------------
  103|   241M|		(pMBs[index - mb_width].mode == MODE_INTRA ||
  ------------------
  |  |   37|   474M|#define	MODE_INTRA		3
  ------------------
  |  Branch (103:4): [True: 375k, False: 236M]
  ------------------
  104|   237M|		 pMBs[index - mb_width].mode == MODE_INTRA_Q)) {
  ------------------
  |  |   38|   236M|#define MODE_INTRA_Q	4
  ------------------
  |  Branch (104:4): [True: 76.3k, False: 236M]
  ------------------
  105|       |
  106|   452k|		top = (int16_t*)pMBs[index - mb_width].pred_values[0];
  107|   452k|		top_quant = pMBs[index - mb_width].quant;
  108|   452k|	}
  109|       |	/* diag macroblock */
  110|       |
  111|   241M|	if (x && mbpos >= bound + (int)mb_width + 1 &&
  ------------------
  |  Branch (111:6): [True: 238M, False: 3.24M]
  |  Branch (111:11): [True: 234M, False: 4.02M]
  ------------------
  112|   241M|		(pMBs[index - 1 - mb_width].mode == MODE_INTRA ||
  ------------------
  |  |   37|   468M|#define	MODE_INTRA		3
  ------------------
  |  Branch (112:4): [True: 339k, False: 233M]
  ------------------
  113|   234M|		 pMBs[index - 1 - mb_width].mode == MODE_INTRA_Q)) {
  ------------------
  |  |   38|   233M|#define MODE_INTRA_Q	4
  ------------------
  |  Branch (113:4): [True: 62.1k, False: 233M]
  ------------------
  114|       |
  115|   401k|		diag = (int16_t*)pMBs[index - 1 - mb_width].pred_values[0];
  116|   401k|	}
  117|       |
  118|   241M|	current = (int16_t*)pMBs[index].pred_values[0];
  119|       |
  120|       |	/* now grab pLeft, pTop, pDiag _blocks_ */
  121|       |
  122|   241M|	switch (block) {
  ------------------
  |  Branch (122:10): [True: 0, False: 241M]
  ------------------
  123|       |
  124|  40.2M|	case 0:
  ------------------
  |  Branch (124:2): [True: 40.2M, False: 201M]
  ------------------
  125|  40.2M|		if (left)
  ------------------
  |  Branch (125:7): [True: 92.2k, False: 40.1M]
  ------------------
  126|  92.2k|			pLeft = left + MBPRED_SIZE;
  ------------------
  |  |  180|  92.2k|#define MBPRED_SIZE  15
  ------------------
  127|       |
  128|  40.2M|		if (top)
  ------------------
  |  Branch (128:7): [True: 75.3k, False: 40.1M]
  ------------------
  129|  75.3k|			pTop = top + (MBPRED_SIZE << 1);
  ------------------
  |  |  180|  75.3k|#define MBPRED_SIZE  15
  ------------------
  130|       |
  131|  40.2M|		if (diag)
  ------------------
  |  Branch (131:7): [True: 66.8k, False: 40.1M]
  ------------------
  132|  66.8k|			pDiag = diag + 3 * MBPRED_SIZE;
  ------------------
  |  |  180|  66.8k|#define MBPRED_SIZE  15
  ------------------
  133|       |
  134|  40.2M|		break;
  135|       |
  136|  40.2M|	case 1:
  ------------------
  |  Branch (136:2): [True: 40.2M, False: 201M]
  ------------------
  137|  40.2M|		pLeft = current;
  138|  40.2M|		left_quant = current_quant;
  139|       |
  140|  40.2M|		if (top) {
  ------------------
  |  Branch (140:7): [True: 75.3k, False: 40.1M]
  ------------------
  141|  75.3k|			pTop = top + 3 * MBPRED_SIZE;
  ------------------
  |  |  180|  75.3k|#define MBPRED_SIZE  15
  ------------------
  142|  75.3k|			pDiag = top + (MBPRED_SIZE << 1);
  ------------------
  |  |  180|  75.3k|#define MBPRED_SIZE  15
  ------------------
  143|  75.3k|		}
  144|  40.2M|		break;
  145|       |
  146|  40.2M|	case 2:
  ------------------
  |  Branch (146:2): [True: 40.2M, False: 201M]
  ------------------
  147|  40.2M|		if (left) {
  ------------------
  |  Branch (147:7): [True: 92.2k, False: 40.1M]
  ------------------
  148|  92.2k|			pLeft = left + 3 * MBPRED_SIZE;
  ------------------
  |  |  180|  92.2k|#define MBPRED_SIZE  15
  ------------------
  149|  92.2k|			pDiag = left + MBPRED_SIZE;
  ------------------
  |  |  180|  92.2k|#define MBPRED_SIZE  15
  ------------------
  150|  92.2k|		}
  151|       |
  152|  40.2M|		pTop = current;
  153|  40.2M|		top_quant = current_quant;
  154|       |
  155|  40.2M|		break;
  156|       |
  157|  40.2M|	case 3:
  ------------------
  |  Branch (157:2): [True: 40.2M, False: 201M]
  ------------------
  158|  40.2M|		pLeft = current + (MBPRED_SIZE << 1);
  ------------------
  |  |  180|  40.2M|#define MBPRED_SIZE  15
  ------------------
  159|  40.2M|		left_quant = current_quant;
  160|       |
  161|  40.2M|		pTop = current + MBPRED_SIZE;
  ------------------
  |  |  180|  40.2M|#define MBPRED_SIZE  15
  ------------------
  162|  40.2M|		top_quant = current_quant;
  163|       |
  164|  40.2M|		pDiag = current;
  165|       |
  166|  40.2M|		break;
  167|       |
  168|  40.2M|	case 4:
  ------------------
  |  Branch (168:2): [True: 40.2M, False: 201M]
  ------------------
  169|  40.2M|		if (left)
  ------------------
  |  Branch (169:7): [True: 92.2k, False: 40.1M]
  ------------------
  170|  92.2k|			pLeft = left + (MBPRED_SIZE << 2);
  ------------------
  |  |  180|  92.2k|#define MBPRED_SIZE  15
  ------------------
  171|  40.2M|		if (top)
  ------------------
  |  Branch (171:7): [True: 75.3k, False: 40.1M]
  ------------------
  172|  75.3k|			pTop = top + (MBPRED_SIZE << 2);
  ------------------
  |  |  180|  75.3k|#define MBPRED_SIZE  15
  ------------------
  173|  40.2M|		if (diag)
  ------------------
  |  Branch (173:7): [True: 66.8k, False: 40.1M]
  ------------------
  174|  66.8k|			pDiag = diag + (MBPRED_SIZE << 2);
  ------------------
  |  |  180|  66.8k|#define MBPRED_SIZE  15
  ------------------
  175|  40.2M|		break;
  176|       |
  177|  40.2M|	case 5:
  ------------------
  |  Branch (177:2): [True: 40.2M, False: 201M]
  ------------------
  178|  40.2M|		if (left)
  ------------------
  |  Branch (178:7): [True: 92.2k, False: 40.1M]
  ------------------
  179|  92.2k|			pLeft = left + 5 * MBPRED_SIZE;
  ------------------
  |  |  180|  92.2k|#define MBPRED_SIZE  15
  ------------------
  180|  40.2M|		if (top)
  ------------------
  |  Branch (180:7): [True: 75.3k, False: 40.1M]
  ------------------
  181|  75.3k|			pTop = top + 5 * MBPRED_SIZE;
  ------------------
  |  |  180|  75.3k|#define MBPRED_SIZE  15
  ------------------
  182|  40.2M|		if (diag)
  ------------------
  |  Branch (182:7): [True: 66.8k, False: 40.1M]
  ------------------
  183|  66.8k|			pDiag = diag + 5 * MBPRED_SIZE;
  ------------------
  |  |  180|  66.8k|#define MBPRED_SIZE  15
  ------------------
  184|  40.2M|		break;
  185|   241M|	}
  186|       |
  187|       |	/* determine ac prediction direction & ac/dc predictor place rescaled ac/dc
  188|       |	 * predictions into predictors[] for later use */
  189|   241M|	if (abs(pLeft[0] - pDiag[0]) < abs(pDiag[0] - pTop[0])) {
  ------------------
  |  Branch (189:6): [True: 27.9M, False: 213M]
  ------------------
  190|  27.9M|		*acpred_direction = 1;	/* vertical */
  191|  27.9M|		predictors[0] = DIV_DIV(pTop[0], iDcScaler);
  ------------------
  |  |  263|  27.9M|#define DIV_DIV(a,b)    (((a)>0) ? ((a)+((b)>>1))/(b) : ((a)-((b)>>1))/(b))
  |  |  ------------------
  |  |  |  Branch (263:26): [True: 24.6M, False: 3.32M]
  |  |  ------------------
  ------------------
  192|   223M|		for (i = 1; i < 8; i++) {
  ------------------
  |  Branch (192:15): [True: 195M, False: 27.9M]
  ------------------
  193|   195M|			predictors[i] = rescale(top_quant, current_quant, pTop[i]);
  194|   195M|		}
  195|   213M|	} else {
  196|   213M|		*acpred_direction = 2;	/* horizontal */
  197|   213M|		predictors[0] = DIV_DIV(pLeft[0], iDcScaler);
  ------------------
  |  |  263|   213M|#define DIV_DIV(a,b)    (((a)>0) ? ((a)+((b)>>1))/(b) : ((a)-((b)>>1))/(b))
  |  |  ------------------
  |  |  |  Branch (263:26): [True: 206M, False: 6.63M]
  |  |  ------------------
  ------------------
  198|  1.70G|		for (i = 1; i < 8; i++) {
  ------------------
  |  Branch (198:15): [True: 1.49G, False: 213M]
  ------------------
  199|  1.49G|			predictors[i] = rescale(left_quant, current_quant, pLeft[i + 7]);
  200|  1.49G|		}
  201|   213M|	}
  202|   241M|}
add_acdc:
  220|   241M|{
  221|   241M|	uint8_t acpred_direction = pMB->acpred_directions[block];
  222|   241M|	int16_t *pCurrent = (int16_t*)pMB->pred_values[block];
  223|   241M|	uint32_t i;
  224|       |
  225|   241M|	DPRINTF(XVID_DEBUG_COEFF,"predictor[0] %i\n", predictors[0]);
  ------------------
  |  |  201|   241M|#define XVID_DEBUG_COEFF     (1<< 5)
  ------------------
  226|       |
  227|   241M|	dct_codes[0] += predictors[0];	/* dc prediction */
  228|   241M|	pCurrent[0] = dct_codes[0]*iDcScaler;
  229|   241M|	if (bsversion > BS_VERSION_BUGGY_DC_CLIPPING) {
  ------------------
  |  |  211|   241M|#define BS_VERSION_BUGGY_DC_CLIPPING 34
  ------------------
  |  Branch (229:6): [True: 238M, False: 3.31M]
  ------------------
  230|   238M|		pCurrent[0] = CLIP(pCurrent[0], -2048, 2047);
  ------------------
  |  |  262|   238M|#define CLIP(X,AMIN,AMAX)   (((X)<(AMIN)) ? (AMIN) : ((X)>(AMAX)) ? (AMAX) : (X))
  |  |  ------------------
  |  |  |  Branch (262:30): [True: 92.0k, False: 237M]
  |  |  |  Branch (262:54): [True: 284k, False: 237M]
  |  |  ------------------
  ------------------
  231|   238M|	}
  232|       |
  233|   241M|	if (acpred_direction == 1) {
  ------------------
  |  Branch (233:6): [True: 116k, False: 241M]
  ------------------
  234|   934k|		for (i = 1; i < 8; i++) {
  ------------------
  |  Branch (234:15): [True: 817k, False: 116k]
  ------------------
  235|   817k|			int level = dct_codes[i] + predictors[i];
  236|       |
  237|   817k|			DPRINTF(XVID_DEBUG_COEFF,"predictor[%i] %i\n",i, predictors[i]);
  ------------------
  |  |  201|   817k|#define XVID_DEBUG_COEFF     (1<< 5)
  ------------------
  238|       |
  239|   817k|			dct_codes[i] = level;
  240|   817k|			pCurrent[i] = level;
  241|   817k|			pCurrent[i + 7] = dct_codes[i * 8];
  242|   817k|		}
  243|   241M|	} else if (acpred_direction == 2) {
  ------------------
  |  Branch (243:13): [True: 321k, False: 240M]
  ------------------
  244|  2.57M|		for (i = 1; i < 8; i++) {
  ------------------
  |  Branch (244:15): [True: 2.25M, False: 321k]
  ------------------
  245|  2.25M|			int level = dct_codes[i * 8] + predictors[i];
  246|  2.25M|			DPRINTF(XVID_DEBUG_COEFF,"predictor[%i] %i\n",i*8, predictors[i]);
  ------------------
  |  |  201|  2.25M|#define XVID_DEBUG_COEFF     (1<< 5)
  ------------------
  247|       |
  248|  2.25M|			dct_codes[i * 8] = level;
  249|  2.25M|			pCurrent[i + 7] = level;
  250|  2.25M|			pCurrent[i] = dct_codes[i];
  251|  2.25M|		}
  252|   240M|	} else {
  253|  1.92G|		for (i = 1; i < 8; i++) {
  ------------------
  |  Branch (253:15): [True: 1.68G, False: 240M]
  ------------------
  254|  1.68G|			pCurrent[i] = dct_codes[i];
  255|  1.68G|			pCurrent[i + 7] = dct_codes[i * 8];
  256|  1.68G|		}
  257|   240M|	}
  258|   241M|}
get_pmv2:
  460|   227k|{
  461|   227k|	int lx, ly, lz;		/* left */
  462|   227k|	int tx, ty, tz;		/* top */
  463|   227k|	int rx, ry, rz;		/* top-right */
  464|   227k|	int lpos, tpos, rpos;
  465|   227k|	int num_cand = 0, last_cand = 1;
  466|       |
  467|   227k|	VECTOR pmv[4];	/* left neighbour, top neighbour, top-right neighbour */
  468|       |
  469|   227k|	switch (block) {
  470|   110k|	case 0:
  ------------------
  |  Branch (470:2): [True: 110k, False: 117k]
  ------------------
  471|   110k|		lx = x - 1;	ly = y;		lz = 1;
  472|   110k|		tx = x;		ty = y - 1;	tz = 2;
  473|   110k|		rx = x + 1;	ry = y - 1;	rz = 2;
  474|   110k|		break;
  475|  39.1k|	case 1:
  ------------------
  |  Branch (475:2): [True: 39.1k, False: 188k]
  ------------------
  476|  39.1k|		lx = x;		ly = y;		lz = 0;
  477|  39.1k|		tx = x;		ty = y - 1;	tz = 3;
  478|  39.1k|		rx = x + 1;	ry = y - 1;	rz = 2;
  479|  39.1k|		break;
  480|  39.1k|	case 2:
  ------------------
  |  Branch (480:2): [True: 39.1k, False: 188k]
  ------------------
  481|  39.1k|		lx = x - 1;	ly = y;		lz = 3;
  482|  39.1k|		tx = x;		ty = y;		tz = 0;
  483|  39.1k|		rx = x;		ry = y;		rz = 1;
  484|  39.1k|		break;
  485|  39.1k|	default:
  ------------------
  |  Branch (485:2): [True: 39.1k, False: 188k]
  ------------------
  486|  39.1k|		lx = x;		ly = y;		lz = 2;
  487|  39.1k|		tx = x;		ty = y;		tz = 0;
  488|  39.1k|		rx = x;		ry = y;		rz = 1;
  489|   227k|	}
  490|       |
  491|   227k|	lpos = lx + ly * mb_width;
  492|   227k|	rpos = rx + ry * mb_width;
  493|   227k|	tpos = tx + ty * mb_width;
  494|       |
  495|   227k|	if (lpos >= bound && lx >= 0) {
  ------------------
  |  Branch (495:6): [True: 212k, False: 14.8k]
  |  Branch (495:23): [True: 203k, False: 8.73k]
  ------------------
  496|   203k|		num_cand++;
  497|   203k|		pmv[1] = mbs[lpos].mvs[lz];
  498|   203k|	} else pmv[1] = zeroMV;
  499|       |
  500|   227k|	if (tpos >= bound) {
  ------------------
  |  Branch (500:6): [True: 118k, False: 108k]
  ------------------
  501|   118k|		num_cand++;
  502|   118k|		last_cand = 2;
  503|   118k|		pmv[2] = mbs[tpos].mvs[tz];
  504|   118k|	} else pmv[2] = zeroMV;
  505|       |
  506|   227k|	if (rpos >= bound && rx < mb_width) {
  ------------------
  |  Branch (506:6): [True: 120k, False: 107k]
  |  Branch (506:23): [True: 109k, False: 10.7k]
  ------------------
  507|   109k|		num_cand++;
  508|   109k|		last_cand = 3;
  509|   109k|		pmv[3] = mbs[rpos].mvs[rz];
  510|   118k|	} else pmv[3] = zeroMV;
  511|       |
  512|       |	/* If there're more than one candidate, we return the median vector */
  513|       |
  514|   227k|	if (num_cand > 1) {
  ------------------
  |  Branch (514:6): [True: 118k, False: 109k]
  ------------------
  515|       |		/* set median */
  516|   118k|		pmv[0].x =
  517|   118k|			MIN(MAX(pmv[1].x, pmv[2].x),
  ------------------
  |  |   34|   945k|#define MIN(X, Y) ((X)<(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (34:20): [True: 13.4k, False: 104k]
  |  |  |  Branch (34:21): [True: 26.9k, False: 91.1k]
  |  |  |  Branch (34:25): [True: 23.5k, False: 94.6k]
  |  |  |  Branch (34:25): [True: 29.9k, False: 88.1k]
  |  |  |  Branch (34:25): [True: 3.10k, False: 16.8k]
  |  |  |  Branch (34:25): [True: 10.0k, False: 88.1k]
  |  |  |  Branch (34:25): [True: 19.9k, False: 98.2k]
  |  |  |  Branch (34:29): [True: 2.65k, False: 10.7k]
  |  |  |  Branch (34:33): [True: 23.5k, False: 81.2k]
  |  |  |  Branch (34:33): [True: 29.9k, False: 74.7k]
  |  |  |  Branch (34:33): [True: 3.10k, False: 16.8k]
  |  |  |  Branch (34:33): [True: 10.0k, False: 74.7k]
  |  |  |  Branch (34:33): [True: 19.9k, False: 84.7k]
  |  |  ------------------
  ------------------
  518|   118k|				MIN(MAX(pmv[2].x, pmv[3].x), MAX(pmv[1].x, pmv[3].x)));
  519|   118k|		pmv[0].y =
  520|   118k|			MIN(MAX(pmv[1].y, pmv[2].y),
  ------------------
  |  |   34|   945k|#define MIN(X, Y) ((X)<(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (34:20): [True: 11.0k, False: 107k]
  |  |  |  Branch (34:21): [True: 27.0k, False: 91.0k]
  |  |  |  Branch (34:25): [True: 12.6k, False: 105k]
  |  |  |  Branch (34:25): [True: 23.6k, False: 94.4k]
  |  |  |  Branch (34:25): [True: 2.17k, False: 14.6k]
  |  |  |  Branch (34:25): [True: 6.85k, False: 94.4k]
  |  |  |  Branch (34:25): [True: 16.8k, False: 101k]
  |  |  |  Branch (34:29): [True: 2.08k, False: 8.96k]
  |  |  |  Branch (34:33): [True: 12.6k, False: 94.4k]
  |  |  |  Branch (34:33): [True: 23.6k, False: 83.4k]
  |  |  |  Branch (34:33): [True: 2.17k, False: 14.6k]
  |  |  |  Branch (34:33): [True: 6.85k, False: 83.4k]
  |  |  |  Branch (34:33): [True: 16.8k, False: 90.2k]
  |  |  ------------------
  ------------------
  521|   118k|				MIN(MAX(pmv[2].y, pmv[3].y), MAX(pmv[1].y, pmv[3].y)));
  522|   118k|		return pmv[0];
  523|   118k|	}
  524|       |
  525|   109k|	return pmv[last_cand];	/* no point calculating median mv */
  526|   227k|}
get_pmv2_interlaced:
  534|  86.8k|{
  535|  86.8k|  int lx, ly, lz;   /* left */
  536|  86.8k|  int tx, ty, tz;   /* top */
  537|  86.8k|  int rx, ry, rz;   /* top-right */
  538|  86.8k|  int lpos, tpos, rpos;
  539|  86.8k|  int num_cand = 0, last_cand = 1;
  540|       |
  541|  86.8k|  VECTOR pmv[4];  /* left neighbour, top neighbour, top-right neighbour */
  542|       |
  543|  86.8k|  lx=x-1; ly=y;   lz=1;
  544|  86.8k|  tx=x;   ty=y-1; tz=2;
  545|  86.8k|  rx=x+1; ry=y-1; rz=2;
  546|       |
  547|  86.8k|  lpos=lx+ly*mb_width;
  548|  86.8k|  rpos=rx+ry*mb_width;
  549|  86.8k|  tpos=tx+ty*mb_width;
  550|       |
  551|  86.8k|  if(lx>=0 && lpos>=bound) 
  ------------------
  |  Branch (551:6): [True: 72.7k, False: 14.1k]
  |  Branch (551:15): [True: 72.1k, False: 540]
  ------------------
  552|  72.1k|  {
  553|  72.1k|    num_cand++;
  554|  72.1k|    if(mbs[lpos].field_pred)
  ------------------
  |  Branch (554:8): [True: 4.69k, False: 67.4k]
  ------------------
  555|  4.69k|     pmv[1] = mbs[lpos].mvs_avg;
  556|  67.4k|    else 
  557|  67.4k|     pmv[1] = mbs[lpos].mvs[lz];
  558|  72.1k|  }
  559|  14.6k|  else 
  560|  14.6k|  {
  561|  14.6k|    pmv[1] = zeroMV;
  562|  14.6k|  }  
  563|       |
  564|  86.8k|  if(tpos>=bound) 
  ------------------
  |  Branch (564:6): [True: 46.8k, False: 40.0k]
  ------------------
  565|  46.8k|  {
  566|  46.8k|    num_cand++;
  567|  46.8k|    last_cand=2;
  568|  46.8k|    if(mbs[tpos].field_pred)
  ------------------
  |  Branch (568:8): [True: 5.81k, False: 41.0k]
  ------------------
  569|  5.81k|     pmv[2] = mbs[tpos].mvs_avg;
  570|  41.0k|    else
  571|  41.0k|     pmv[2] = mbs[tpos].mvs[tz];
  572|  46.8k|  } 
  573|  40.0k|  else
  574|  40.0k|  { 
  575|  40.0k|    pmv[2] = zeroMV;
  576|  40.0k|  }
  577|       |        
  578|  86.8k|  if(rx<mb_width && rpos>=bound) 
  ------------------
  |  Branch (578:6): [True: 72.2k, False: 14.5k]
  |  Branch (578:21): [True: 33.0k, False: 39.2k]
  ------------------
  579|  33.0k|  {
  580|  33.0k|    num_cand++;
  581|  33.0k|    last_cand = 3;
  582|  33.0k|    if(mbs[rpos].field_pred)
  ------------------
  |  Branch (582:8): [True: 3.71k, False: 29.3k]
  ------------------
  583|  3.71k|     pmv[3] = mbs[rpos].mvs_avg;
  584|  29.3k|    else
  585|  29.3k|     pmv[3] = mbs[rpos].mvs[rz];
  586|  33.0k|  } 
  587|  53.8k|  else
  588|  53.8k|  { 
  589|  53.8k|    pmv[3] = zeroMV;
  590|  53.8k|  }  
  591|       |
  592|       |  /* If there're more than one candidate, we return the median vector */
  593|  86.8k|  if(num_cand>1) 
  ------------------
  |  Branch (593:6): [True: 45.3k, False: 41.4k]
  ------------------
  594|  45.3k|  {
  595|       |    /* set median */
  596|  45.3k|    pmv[0].x = MIN(MAX(pmv[1].x, pmv[2].x),
  ------------------
  |  |   34|   363k|#define MIN(X, Y) ((X)<(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (34:20): [True: 4.92k, False: 40.4k]
  |  |  |  Branch (34:21): [True: 7.32k, False: 38.0k]
  |  |  |  Branch (34:25): [True: 10.1k, False: 35.2k]
  |  |  |  Branch (34:25): [True: 11.7k, False: 33.6k]
  |  |  |  Branch (34:25): [True: 1.38k, False: 3.83k]
  |  |  |  Branch (34:25): [True: 6.52k, False: 33.6k]
  |  |  |  Branch (34:25): [True: 5.21k, False: 40.1k]
  |  |  |  Branch (34:29): [True: 840, False: 4.08k]
  |  |  |  Branch (34:33): [True: 10.1k, False: 30.3k]
  |  |  |  Branch (34:33): [True: 11.7k, False: 28.7k]
  |  |  |  Branch (34:33): [True: 1.38k, False: 3.83k]
  |  |  |  Branch (34:33): [True: 6.52k, False: 28.7k]
  |  |  |  Branch (34:33): [True: 5.21k, False: 35.2k]
  |  |  ------------------
  ------------------
  597|  45.3k|               MIN(MAX(pmv[2].x, pmv[3].x), MAX(pmv[1].x, pmv[3].x)));
  598|  45.3k|    pmv[0].y = MIN(MAX(pmv[1].y, pmv[2].y),
  ------------------
  |  |   34|   363k|#define MIN(X, Y) ((X)<(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (34:20): [True: 5.21k, False: 40.1k]
  |  |  |  Branch (34:21): [True: 9.20k, False: 36.1k]
  |  |  |  Branch (34:25): [True: 8.98k, False: 36.3k]
  |  |  |  Branch (34:25): [True: 11.3k, False: 34.0k]
  |  |  |  Branch (34:25): [True: 1.67k, False: 4.20k]
  |  |  |  Branch (34:25): [True: 5.42k, False: 34.0k]
  |  |  |  Branch (34:25): [True: 5.88k, False: 39.4k]
  |  |  |  Branch (34:29): [True: 803, False: 4.40k]
  |  |  |  Branch (34:33): [True: 8.98k, False: 31.1k]
  |  |  |  Branch (34:33): [True: 11.3k, False: 28.8k]
  |  |  |  Branch (34:33): [True: 1.67k, False: 4.20k]
  |  |  |  Branch (34:33): [True: 5.42k, False: 28.8k]
  |  |  |  Branch (34:33): [True: 5.88k, False: 34.2k]
  |  |  ------------------
  ------------------
  599|  45.3k|               MIN(MAX(pmv[2].y, pmv[3].y), MAX(pmv[1].y, pmv[3].y)));
  600|       |          
  601|  45.3k|    return pmv[0];
  602|  45.3k|  }
  603|       |
  604|  41.4k|  return pmv[last_cand];  /* no point calculating median mv */
  605|  86.8k|}
mbprediction.c:rescale:
   42|  1.68G|{
   43|  1.68G|	return (coeff != 0) ? DIV_DIV((coeff) * (predict_quant),
  ------------------
  |  |  263|   669k|#define DIV_DIV(a,b)    (((a)>0) ? ((a)+((b)>>1))/(b) : ((a)-((b)>>1))/(b))
  |  |  ------------------
  |  |  |  Branch (263:26): [True: 206k, False: 462k]
  |  |  ------------------
  ------------------
  |  Branch (43:9): [True: 669k, False: 1.68G]
  ------------------
   44|  1.68G|								  (current_quant)) : 0;
   45|  1.68G|}

dequant_h263_intra_c:
  161|   140M|{
  162|   140M|	const int32_t quant_m_2 = quant << 1;
  163|   140M|	const int32_t quant_add = (quant & 1 ? quant : quant - 1);
  ------------------
  |  Branch (163:29): [True: 93.3M, False: 46.8M]
  ------------------
  164|   140M|	int i;
  165|       |
  166|   140M|	data[0] = coeff[0] * dcscalar;
  167|   140M|	if (data[0] < -2048) {
  ------------------
  |  Branch (167:6): [True: 20.6k, False: 140M]
  ------------------
  168|  20.6k|		data[0] = -2048;
  169|   140M|	} else if (data[0] > 2047) {
  ------------------
  |  Branch (169:13): [True: 97.1k, False: 140M]
  ------------------
  170|  97.1k|		data[0] = 2047;
  171|  97.1k|	}
  172|       |
  173|  8.97G|	for (i = 1; i < 64; i++) {
  ------------------
  |  Branch (173:14): [True: 8.83G, False: 140M]
  ------------------
  174|  8.83G|		int32_t acLevel = coeff[i];
  175|       |
  176|  8.83G|		if (acLevel == 0) {
  ------------------
  |  Branch (176:7): [True: 8.83G, False: 621k]
  ------------------
  177|  8.83G|			data[i] = 0;
  178|  8.83G|		} else if (acLevel < 0) {
  ------------------
  |  Branch (178:14): [True: 418k, False: 203k]
  ------------------
  179|   418k|			acLevel = quant_m_2 * -acLevel + quant_add;
  180|   418k|			data[i] = (acLevel <= 2048 ? -acLevel : -2048);
  ------------------
  |  Branch (180:15): [True: 401k, False: 16.4k]
  ------------------
  181|   418k|		} else {
  182|   203k|			acLevel = quant_m_2 * acLevel + quant_add;
  183|   203k|			data[i] = (acLevel <= 2047 ? acLevel : 2047);
  ------------------
  |  Branch (183:15): [True: 189k, False: 13.9k]
  ------------------
  184|   203k|		}
  185|  8.83G|	}
  186|       |
  187|   140M|	return(0);
  188|   140M|}

get_intra_matrix:
   61|   101M|{
   62|   101M|	return(mpeg_quant_matrices + 0*64);
   63|   101M|}
get_inter_matrix:
   67|   631k|{
   68|   631k|	return(mpeg_quant_matrices + 4*64);
   69|   631k|}
get_default_intra_matrix:
   73|  8.76k|{
   74|  8.76k|	return default_intra_matrix;
   75|  8.76k|}
get_default_inter_matrix:
   79|  10.6k|{
   80|  10.6k|	return default_inter_matrix;
   81|  10.6k|}
set_intra_matrix:
  113|  22.4k|{
  114|  22.4k|	int i;
  115|  22.4k|	uint16_t *intra_matrix = mpeg_quant_matrices + 0*64;
  116|       |
  117|  1.45M|	for (i = 0; i < 64; i++) {
  ------------------
  |  Branch (117:14): [True: 1.43M, False: 22.4k]
  ------------------
  118|  1.43M|		intra_matrix[i] = (!i) ? (uint16_t)8: (uint16_t)MAX(1, matrix[i]);
  ------------------
  |  |  258|  2.85M|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (258:20): [True: 74.9k, False: 1.33M]
  |  |  ------------------
  ------------------
  |  Branch (118:21): [True: 22.4k, False: 1.41M]
  ------------------
  119|  1.43M|	}
  120|  22.4k|}
set_inter_matrix:
  137|  22.4k|{
  138|  22.4k|	int i;
  139|  22.4k|	uint16_t *inter_matrix = mpeg_quant_matrices + 4*64;
  140|  22.4k|	uint16_t *inter_matrix1 = mpeg_quant_matrices + 5*64;
  141|  22.4k|	uint16_t *inter_matrix_fix = mpeg_quant_matrices + 6*64;
  142|  22.4k|	uint16_t *inter_matrix_fixl = mpeg_quant_matrices + 7*64;
  143|       |
  144|  1.45M|	for (i = 0; i < 64; i++) {
  ------------------
  |  Branch (144:14): [True: 1.43M, False: 22.4k]
  ------------------
  145|  1.43M|		inter_matrix1[i] = ((inter_matrix[i] = (int16_t)MAX(1, matrix[i]))>>1);
  ------------------
  |  |  258|  1.43M|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (258:20): [True: 49.2k, False: 1.38M]
  |  |  ------------------
  ------------------
  146|  1.43M|		inter_matrix1[i] += ((inter_matrix[i] == 1) ? 1: 0);
  ------------------
  |  Branch (146:24): [True: 51.0k, False: 1.38M]
  ------------------
  147|  1.43M|		inter_matrix_fix[i] = (uint16_t) FIX(inter_matrix[i]);
  ------------------
  |  |   30|  1.43M|#define FIX(X)   (((X)==1) ? 0xFFFF : ((1UL << 16) / (X) + 1))
  |  |  ------------------
  |  |  |  Branch (30:19): [True: 51.0k, False: 1.38M]
  |  |  ------------------
  ------------------
  148|  1.43M|		inter_matrix_fixl[i] = (uint16_t) FIXL(inter_matrix[i]);
  ------------------
  |  |   31|  1.43M|#define FIXL(X)    ((1UL << 16) / (X) - 1)
  ------------------
  149|  1.43M|	}
  150|  22.4k|}
init_mpeg_matrix:
  153|  10.5k|init_mpeg_matrix(uint16_t * mpeg_quant_matrices) {
  154|       |
  155|  10.5k|	set_intra_matrix(mpeg_quant_matrices, default_intra_matrix);
  156|  10.5k|	set_inter_matrix(mpeg_quant_matrices, default_inter_matrix);
  157|  10.5k|}

dequant_mpeg_intra_c:
  145|   101M|{
  146|   101M|	const uint16_t *intra_matrix = get_intra_matrix(mpeg_quant_matrices);
  147|   101M|	int i;
  148|       |
  149|   101M|	data[0] = coeff[0] * dcscalar;
  150|   101M|	if (data[0] < -2048) {
  ------------------
  |  Branch (150:6): [True: 80.5k, False: 101M]
  ------------------
  151|  80.5k|		data[0] = -2048;
  152|   101M|	} else if (data[0] > 2047) {
  ------------------
  |  Branch (152:13): [True: 198k, False: 100M]
  ------------------
  153|   198k|		data[0] = 2047;
  154|   198k|	}
  155|       |
  156|  6.47G|	for (i = 1; i < 64; i++) {
  ------------------
  |  Branch (156:14): [True: 6.37G, False: 101M]
  ------------------
  157|  6.37G|		if (coeff[i] == 0) {
  ------------------
  |  Branch (157:7): [True: 6.37G, False: 1.48M]
  ------------------
  158|  6.37G|			data[i] = 0;
  159|  6.37G|		} else if (coeff[i] < 0) {
  ------------------
  |  Branch (159:14): [True: 1.01M, False: 469k]
  ------------------
  160|  1.01M|			uint32_t level = -coeff[i];
  161|       |
  162|  1.01M|			level = (level * intra_matrix[i] * quant) >> 3;
  163|  1.01M|			data[i] = (level <= 2048 ? -(int16_t) level : -2048);
  ------------------
  |  Branch (163:15): [True: 487k, False: 532k]
  ------------------
  164|  1.01M|		} else {
  165|   469k|			uint32_t level = coeff[i];
  166|       |
  167|   469k|			level = (level * intra_matrix[i] * quant) >> 3;
  168|   469k|			data[i] = (level <= 2047 ? level : 2047);
  ------------------
  |  Branch (168:15): [True: 325k, False: 144k]
  ------------------
  169|   469k|		}
  170|  6.37G|	}
  171|       |
  172|   101M|	return(0);
  173|   101M|}

emms_c:
   47|  82.5k|{
   48|  82.5k|}

xvid_malloc:
   50|   517k|{
   51|   517k|	uint8_t *mem_ptr;
   52|       |
   53|   517k|	if (!alignment) {
  ------------------
  |  Branch (53:6): [True: 0, False: 517k]
  ------------------
   54|       |
   55|       |		/* We have not to satisfy any alignment */
   56|      0|		if ((mem_ptr = (uint8_t *) malloc(size + 1)) != NULL) {
  ------------------
  |  Branch (56:7): [True: 0, False: 0]
  ------------------
   57|       |
   58|       |			/* Store (mem_ptr - "real allocated memory") in *(mem_ptr-1) */
   59|      0|			*mem_ptr = (uint8_t)1;
   60|       |
   61|       |			/* Return the mem_ptr pointer */
   62|      0|			return ((void *)(mem_ptr+1));
   63|      0|		}
   64|   517k|	} else {
   65|   517k|		uint8_t *tmp;
   66|       |
   67|       |		/* Allocate the required size memory + alignment so we
   68|       |		 * can realign the data if necessary */
   69|   517k|		if ((tmp = (uint8_t *) malloc(size + alignment)) != NULL) {
  ------------------
  |  Branch (69:7): [True: 517k, False: 0]
  ------------------
   70|       |
   71|       |			/* Align the tmp pointer */
   72|   517k|			mem_ptr =
   73|   517k|				(uint8_t *) ((ptr_t) (tmp + alignment - 1) &
   74|   517k|							 (~(ptr_t) (alignment - 1)));
   75|       |
   76|       |			/* Special case where malloc have already satisfied the alignment
   77|       |			 * We must add alignment to mem_ptr because we must store
   78|       |			 * (mem_ptr - tmp) in *(mem_ptr-1)
   79|       |			 * If we do not add alignment to mem_ptr then *(mem_ptr-1) points
   80|       |			 * to a forbidden memory space */
   81|   517k|			if (mem_ptr == tmp)
  ------------------
  |  Branch (81:8): [True: 137k, False: 380k]
  ------------------
   82|   137k|				mem_ptr += alignment;
   83|       |
   84|       |			/* (mem_ptr - tmp) is stored in *(mem_ptr-1) so we are able to retrieve
   85|       |			 * the real malloc block allocated and free it in xvid_free */
   86|   517k|			*(mem_ptr - 1) = (uint8_t) (mem_ptr - tmp);
   87|       |
   88|       |			/* Return the aligned pointer */
   89|   517k|			return ((void *)mem_ptr);
   90|   517k|		}
   91|   517k|	}
   92|       |
   93|      0|	return(NULL);
   94|   517k|}
xvid_free:
  108|   548k|{
  109|       |
  110|   548k|	uint8_t *ptr;
  111|       |
  112|   548k|	if (mem_ptr == NULL)
  ------------------
  |  Branch (112:6): [True: 31.5k, False: 517k]
  ------------------
  113|  31.5k|		return;
  114|       |
  115|       |	/* Aligned pointer */
  116|   517k|	ptr = mem_ptr;
  117|       |
  118|       |	/* *(ptr - 1) holds the offset to the real allocated block
  119|       |	 * we sub that offset os we free the real pointer */
  120|   517k|	ptr -= *(ptr - 1);
  121|       |
  122|       |	/* Free the memory */
  123|   517k|	free(ptr);
  124|   517k|}

transfer_16to8copy_c:
   91|   241M|{
   92|   241M|	int i, j;
   93|       |
   94|  2.17G|	for (j = 0; j < 8; j++) {
  ------------------
  |  Branch (94:14): [True: 1.93G, False: 241M]
  ------------------
   95|  17.3G|		for (i = 0; i < 8; i++) {
  ------------------
  |  Branch (95:15): [True: 15.4G, False: 1.93G]
  ------------------
   96|  15.4G|#ifdef USE_REFERENCE_C
   97|  15.4G|			int16_t pixel = src[j * 8 + i];
   98|       |
   99|  15.4G|			if (pixel < 0) {
  ------------------
  |  Branch (99:8): [True: 14.3M, False: 15.4G]
  ------------------
  100|  14.3M|				pixel = 0;
  101|  15.4G|			} else if (pixel > 255) {
  ------------------
  |  Branch (101:15): [True: 18.0M, False: 15.4G]
  ------------------
  102|  18.0M|				pixel = 255;
  103|  18.0M|			}
  104|  15.4G|			dst[j * stride + i] = (uint8_t) pixel;
  105|       |#else
  106|       |			const int16_t pixel = src[j * 8 + i];
  107|       |			const uint8_t value = (uint8_t)( (pixel&~255) ? (-pixel)>>(8*sizeof(pixel)-1) : pixel );
  108|       |			dst[j*stride + i] = value;
  109|       |#endif
  110|  15.4G|    }
  111|  1.93G|	}
  112|   241M|}
transfer_16to8add_c:
  232|   631k|{
  233|   631k|	int i, j;
  234|       |
  235|  5.68M|	for (j = 0; j < 8; j++) {
  ------------------
  |  Branch (235:14): [True: 5.05M, False: 631k]
  ------------------
  236|  45.4M|		for (i = 0; i < 8; i++) {
  ------------------
  |  Branch (236:15): [True: 40.4M, False: 5.05M]
  ------------------
  237|  40.4M|#ifdef USE_REFERENCE_C
  238|  40.4M|			int16_t pixel = (int16_t) dst[j * stride + i] + src[j * 8 + i];
  239|       |
  240|  40.4M|			if (pixel < 0) {
  ------------------
  |  Branch (240:8): [True: 2.93M, False: 37.4M]
  ------------------
  241|  2.93M|				pixel = 0;
  242|  37.4M|			} else if (pixel > 255) {
  ------------------
  |  Branch (242:15): [True: 128k, False: 37.3M]
  ------------------
  243|   128k|				pixel = 255;
  244|   128k|			}
  245|  40.4M|			dst[j * stride + i] = (uint8_t) pixel;
  246|       |#else
  247|       |      const int16_t pixel = (int16_t) dst[j * stride + i] + src[j * 8 + i];
  248|       |			const uint8_t value = (uint8_t)( (pixel&~255) ? (-pixel)>>(8*sizeof(pixel)-1) : pixel );
  249|       |			dst[j*stride + i] = value;
  250|       |#endif
  251|       |
  252|  40.4M|		}
  253|  5.05M|	}
  254|   631k|}
transfer8x8_copy_c:
  269|  5.84M|{
  270|  5.84M|	int j, i;
  271|       |
  272|  52.6M|	for (j = 0; j < 8; ++j) {
  ------------------
  |  Branch (272:14): [True: 46.7M, False: 5.84M]
  ------------------
  273|  46.7M|	    uint8_t *d = dst + j*stride;
  274|  46.7M|		const uint8_t *s = src + j*stride;
  275|       |
  276|   420M|		for (i = 0; i < 8; ++i)
  ------------------
  |  Branch (276:15): [True: 374M, False: 46.7M]
  ------------------
  277|   374M|		{
  278|   374M|			*d++ = *s++;
  279|   374M|		}
  280|  46.7M|	}
  281|  5.84M|}
transfer8x4_copy_c:
  296|  40.0k|{
  297|  40.0k|	uint32_t j;
  298|       |
  299|   200k|	for (j = 0; j < 4; j++) {
  ------------------
  |  Branch (299:14): [True: 160k, False: 40.0k]
  ------------------
  300|   160k|		uint32_t *d= (uint32_t*)(dst + j*stride);
  301|   160k|		const uint32_t *s = (const uint32_t*)(src + j*stride);
  302|   160k|		*(d+0) = *(s+0);
  303|   160k|		*(d+1) = *(s+1);
  304|   160k|	}
  305|  40.0k|}

decoder.c:init_timer:
  106|  10.5k|{
  107|  10.5k|}
decoder.c:write_timer:
  110|  10.5k|{
  111|  10.5k|}
decoder.c:start_global_timer:
   62|  72.0k|{
   63|  72.0k|}
decoder.c:stop_global_timer:
  126|  18.0k|{
  127|  18.0k|}
decoder.c:start_timer:
   58|  1.25G|{
   59|  1.25G|}
decoder.c:stop_prediction_timer:
  122|   482M|{
  123|   482M|}
decoder.c:stop_coding_timer:
  114|   242M|{
  115|   242M|}
decoder.c:stop_iquant_timer:
   94|   241M|{
   95|   241M|}
decoder.c:stop_idct_timer:
   70|   242M|{
   71|   242M|}
decoder.c:stop_transfer_timer:
  102|  41.3M|{
  103|  41.3M|}
decoder.c:stop_edges_timer:
   82|  8.28k|{
   83|  8.28k|}
decoder.c:stop_comp_timer:
   78|  1.24M|{
   79|  1.24M|}

xvid_global:
  812|      2|{
  813|      2|	switch(opt)
  814|      2|	{
  815|      2|		case XVID_GBL_INIT :
  ------------------
  |  |  235|      2|#define XVID_GBL_INIT    0 /* initialize xvidcore; must be called before using xvid_decore, or xvid_encore) */
  ------------------
  |  Branch (815:3): [True: 2, False: 0]
  ------------------
  816|      2|			return xvid_gbl_init((xvid_gbl_init_t*)param1);
  817|       |
  818|      0|        case XVID_GBL_INFO :
  ------------------
  |  |  236|      0|#define XVID_GBL_INFO    1 /* return some info about xvidcore, and the host computer */
  ------------------
  |  Branch (818:9): [True: 0, False: 2]
  ------------------
  819|      0|            return xvid_gbl_info((xvid_gbl_info_t*)param1);
  820|       |
  821|      0|		case XVID_GBL_CONVERT :
  ------------------
  |  |  237|      0|#define XVID_GBL_CONVERT 2 /* colorspace conversion utility */
  ------------------
  |  Branch (821:3): [True: 0, False: 2]
  ------------------
  822|      0|			return xvid_gbl_convert((xvid_gbl_convert_t*)param1);
  823|       |
  824|      0|		default :
  ------------------
  |  Branch (824:3): [True: 0, False: 2]
  ------------------
  825|      0|			return XVID_ERR_FAIL;
  ------------------
  |  |   95|      0|#define XVID_ERR_FAIL		-1		/* general fault */
  ------------------
  826|      2|	}
  827|      2|}
xvid_decore:
  844|  93.0k|{
  845|  93.0k|	switch (opt) {
  846|  10.5k|	case XVID_DEC_CREATE:
  ------------------
  |  |  246|  10.5k|#define XVID_DEC_CREATE  0 /* create decore instance; return 0 on success */
  ------------------
  |  Branch (846:2): [True: 10.5k, False: 82.5k]
  ------------------
  847|  10.5k|		return decoder_create((xvid_dec_create_t *) param1);
  848|       |
  849|  10.5k|	case XVID_DEC_DESTROY:
  ------------------
  |  |  247|  10.5k|#define XVID_DEC_DESTROY 1 /* destroy decore instance: return 0 on success */
  ------------------
  |  Branch (849:2): [True: 10.5k, False: 82.5k]
  ------------------
  850|  10.5k|		return decoder_destroy((DECODER *) handle);
  851|       |
  852|  72.0k|	case XVID_DEC_DECODE:
  ------------------
  |  |  248|  72.0k|#define XVID_DEC_DECODE  2 /* decode a frame: returns number of bytes consumed >= 0 */
  ------------------
  |  Branch (852:2): [True: 72.0k, False: 21.0k]
  ------------------
  853|  72.0k|		return decoder_decode((DECODER *) handle, (xvid_dec_frame_t *) param1, (xvid_dec_stats_t*) param2);
  854|       |
  855|      0|	default:
  ------------------
  |  Branch (855:2): [True: 0, False: 93.0k]
  ------------------
  856|      0|		return XVID_ERR_FAIL;
  ------------------
  |  |   95|      0|#define XVID_ERR_FAIL		-1		/* general fault */
  ------------------
  857|  93.0k|	}
  858|  93.0k|}
xvid.c:xvid_gbl_init:
  200|      2|{
  201|      2|	unsigned int cpu_flags;
  202|       |
  203|      2|	if (XVID_VERSION_MAJOR(init->version) != 1) /* v1.x.x */
  ------------------
  |  |   63|      2|#define XVID_VERSION_MAJOR(a)    ((char)(((a)>>16) & 0xff))
  ------------------
  |  Branch (203:6): [True: 0, False: 2]
  ------------------
  204|      0|		return XVID_ERR_VERSION;
  ------------------
  |  |   98|      0|#define XVID_ERR_VERSION	-4		/* structure version not supported */
  ------------------
  205|       |
  206|      2|	cpu_flags = (init->cpu_flags & XVID_CPU_FORCE) ? init->cpu_flags : detect_cpu_flags();
  ------------------
  |  |  180|      2|#define XVID_CPU_FORCE    (1<<31) /* force passed cpu flags */
  ------------------
  |  Branch (206:14): [True: 0, False: 2]
  ------------------
  207|       |
  208|       |	/* Initialize the function pointers */
  209|      2|	init_vlc_tables();
  210|       |
  211|       |	/* Fixed Point Forward/Inverse DCT transformations */
  212|      2|	fdct = fdct_int32;
  213|      2|	idct = idct_int32;
  214|       |
  215|       |	/* Only needed on PPC Altivec archs */
  216|      2|	sadInit = NULL;
  217|       |
  218|       |	/* Restore FPU context : emms_c is a nop functions */
  219|      2|	emms = emms_c;
  220|       |
  221|       |	/* Qpel stuff */
  222|      2|	xvid_QP_Funcs = &xvid_QP_Funcs_C;
  223|      2|	xvid_QP_Add_Funcs = &xvid_QP_Add_Funcs_C;
  224|      2|	xvid_Init_QP();
  225|       |
  226|       |	/* Quantization functions */
  227|      2|	quant_h263_intra   = quant_h263_intra_c;
  228|      2|	quant_h263_inter   = quant_h263_inter_c;
  229|      2|	dequant_h263_intra = dequant_h263_intra_c;
  230|      2|	dequant_h263_inter = dequant_h263_inter_c;
  231|       |
  232|      2|	quant_mpeg_intra   = quant_mpeg_intra_c;
  233|      2|	quant_mpeg_inter   = quant_mpeg_inter_c;
  234|      2|	dequant_mpeg_intra = dequant_mpeg_intra_c;
  235|      2|	dequant_mpeg_inter = dequant_mpeg_inter_c;
  236|       |
  237|       |	/* Block transfer related functions */
  238|      2|	transfer_8to16copy = transfer_8to16copy_c;
  239|      2|	transfer_16to8copy = transfer_16to8copy_c;
  240|      2|	transfer_8to16sub  = transfer_8to16sub_c;
  241|      2|	transfer_8to16subro  = transfer_8to16subro_c;
  242|      2|	transfer_8to16sub2 = transfer_8to16sub2_c;
  243|      2|	transfer_8to16sub2ro = transfer_8to16sub2ro_c;
  244|      2|	transfer_16to8add  = transfer_16to8add_c;
  245|      2|	transfer8x8_copy   = transfer8x8_copy_c;
  246|      2|	transfer8x4_copy   = transfer8x4_copy_c;
  247|       |
  248|       |	/* Interlacing functions */
  249|      2|	MBFieldTest = MBFieldTest_c;
  250|       |
  251|       |	/* Image interpolation related functions */
  252|      2|	interpolate8x8_halfpel_h  = interpolate8x8_halfpel_h_c;
  253|      2|	interpolate8x8_halfpel_v  = interpolate8x8_halfpel_v_c;
  254|      2|	interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_c;
  255|       |
  256|      2|	interpolate8x4_halfpel_h  = interpolate8x4_halfpel_h_c;
  257|      2|	interpolate8x4_halfpel_v  = interpolate8x4_halfpel_v_c;
  258|      2|	interpolate8x4_halfpel_hv = interpolate8x4_halfpel_hv_c;
  259|       |
  260|      2|	interpolate8x8_halfpel_add = interpolate8x8_halfpel_add_c;
  261|      2|	interpolate8x8_halfpel_h_add = interpolate8x8_halfpel_h_add_c;
  262|      2|	interpolate8x8_halfpel_v_add = interpolate8x8_halfpel_v_add_c;
  263|      2|	interpolate8x8_halfpel_hv_add = interpolate8x8_halfpel_hv_add_c;
  264|       |
  265|      2|	interpolate16x16_lowpass_h = interpolate16x16_lowpass_h_c;
  266|      2|	interpolate16x16_lowpass_v = interpolate16x16_lowpass_v_c;
  267|      2|	interpolate16x16_lowpass_hv = interpolate16x16_lowpass_hv_c;
  268|       |
  269|      2|	interpolate8x8_lowpass_h = interpolate8x8_lowpass_h_c;
  270|      2|	interpolate8x8_lowpass_v = interpolate8x8_lowpass_v_c;
  271|      2|	interpolate8x8_lowpass_hv = interpolate8x8_lowpass_hv_c;
  272|       |
  273|      2|	interpolate8x8_6tap_lowpass_h = interpolate8x8_6tap_lowpass_h_c;
  274|      2|	interpolate8x8_6tap_lowpass_v = interpolate8x8_6tap_lowpass_v_c;
  275|       |
  276|      2|	interpolate8x8_avg2 = interpolate8x8_avg2_c;
  277|      2|	interpolate8x8_avg4 = interpolate8x8_avg4_c;
  278|       |
  279|       |	/* postprocessing */
  280|      2|	image_brightness = image_brightness_c;
  281|       |
  282|       |	/* Initialize internal colorspace transformation tables */
  283|      2|	colorspace_init();
  284|       |
  285|       |	/* All colorspace transformation functions User Format->YV12 */
  286|      2|	yv12_to_yv12    = yv12_to_yv12_c;
  287|      2|	rgb555_to_yv12  = rgb555_to_yv12_c;
  288|      2|	rgb565_to_yv12  = rgb565_to_yv12_c;
  289|      2|	rgb_to_yv12     = rgb_to_yv12_c;
  290|      2|	bgr_to_yv12     = bgr_to_yv12_c;
  291|      2|	bgra_to_yv12    = bgra_to_yv12_c;
  292|      2|	abgr_to_yv12    = abgr_to_yv12_c;
  293|      2|	rgba_to_yv12    = rgba_to_yv12_c;
  294|      2|	argb_to_yv12    = argb_to_yv12_c;
  295|      2|	yuyv_to_yv12    = yuyv_to_yv12_c;
  296|      2|	uyvy_to_yv12    = uyvy_to_yv12_c;
  297|       |
  298|      2|	rgb555i_to_yv12 = rgb555i_to_yv12_c;
  299|      2|	rgb565i_to_yv12 = rgb565i_to_yv12_c;
  300|      2|	bgri_to_yv12    = bgri_to_yv12_c;
  301|      2|	bgrai_to_yv12   = bgrai_to_yv12_c;
  302|      2|	abgri_to_yv12   = abgri_to_yv12_c;
  303|      2|	rgbai_to_yv12   = rgbai_to_yv12_c;
  304|      2|	argbi_to_yv12   = argbi_to_yv12_c;
  305|      2|	yuyvi_to_yv12   = yuyvi_to_yv12_c;
  306|      2|	uyvyi_to_yv12   = uyvyi_to_yv12_c;
  307|       |
  308|       |	/* All colorspace transformation functions YV12->User format */
  309|      2|	yv12_to_rgb555  = yv12_to_rgb555_c;
  310|      2|	yv12_to_rgb565  = yv12_to_rgb565_c;
  311|      2|	yv12_to_rgb     = yv12_to_rgb_c;
  312|      2|	yv12_to_bgr     = yv12_to_bgr_c;
  313|      2|	yv12_to_bgra    = yv12_to_bgra_c;
  314|      2|	yv12_to_abgr    = yv12_to_abgr_c;
  315|      2|	yv12_to_rgba    = yv12_to_rgba_c;
  316|      2|	yv12_to_argb    = yv12_to_argb_c;
  317|      2|	yv12_to_yuyv    = yv12_to_yuyv_c;
  318|      2|	yv12_to_uyvy    = yv12_to_uyvy_c;
  319|       |
  320|      2|	yv12_to_rgb555i = yv12_to_rgb555i_c;
  321|      2|	yv12_to_rgb565i = yv12_to_rgb565i_c;
  322|      2|	yv12_to_bgri    = yv12_to_bgri_c;
  323|      2|	yv12_to_bgrai   = yv12_to_bgrai_c;
  324|      2|	yv12_to_abgri   = yv12_to_abgri_c;
  325|      2|	yv12_to_rgbai   = yv12_to_rgbai_c;
  326|      2|	yv12_to_argbi   = yv12_to_argbi_c;
  327|      2|	yv12_to_yuyvi   = yv12_to_yuyvi_c;
  328|      2|	yv12_to_uyvyi   = yv12_to_uyvyi_c;
  329|       |
  330|       |	/* Functions used in motion estimation algorithms */
  331|      2|	calc_cbp      = calc_cbp_c;
  332|      2|	sad16         = sad16_c;
  333|      2|	sad8          = sad8_c;
  334|      2|	sad16bi       = sad16bi_c;
  335|      2|	sad8bi        = sad8bi_c;
  336|      2|	dev16         = dev16_c;
  337|      2|	sad16v        = sad16v_c;
  338|      2|	sse8_16bit    = sse8_16bit_c;
  339|      2|	sse8_8bit     = sse8_8bit_c;
  340|       |
  341|      2|	sseh8_16bit   = sseh8_16bit_c;
  342|      2|	coeff8_energy = coeff8_energy_c;
  343|      2|	blocksum8     = blocksum8_c;
  344|       |
  345|      2|	init_GMC(cpu_flags);
  346|       |
  347|       |#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
  348|       |
  349|       |	if ((cpu_flags & XVID_CPU_MMX) || (cpu_flags & XVID_CPU_MMXEXT) ||
  350|       |		(cpu_flags & XVID_CPU_3DNOW) || (cpu_flags & XVID_CPU_3DNOWEXT) ||
  351|       |		(cpu_flags & XVID_CPU_SSE) || (cpu_flags & XVID_CPU_SSE2) ||
  352|       |        (cpu_flags & XVID_CPU_SSE3) || (cpu_flags & XVID_CPU_SSE41))
  353|       |	{
  354|       |		/* Restore FPU context : emms_c is a nop functions */
  355|       |		emms = emms_mmx;
  356|       |	}
  357|       |
  358|       |	if ((cpu_flags & XVID_CPU_MMX)) {
  359|       |
  360|       |		/* Forward and Inverse Discrete Cosine Transformation functions */
  361|       |		fdct = fdct_mmx_skal;
  362|       |		idct = idct_mmx;
  363|       |
  364|       |		/* Qpel stuff */
  365|       |		xvid_QP_Funcs = &xvid_QP_Funcs_mmx;
  366|       |		xvid_QP_Add_Funcs = &xvid_QP_Add_Funcs_mmx;
  367|       |
  368|       |		/* Quantization related functions */
  369|       |		quant_h263_intra   = quant_h263_intra_mmx;
  370|       |		quant_h263_inter   = quant_h263_inter_mmx;
  371|       |		dequant_h263_intra = dequant_h263_intra_mmx;
  372|       |		dequant_h263_inter = dequant_h263_inter_mmx;
  373|       |		quant_mpeg_intra   = quant_mpeg_intra_mmx;
  374|       |		quant_mpeg_inter   = quant_mpeg_inter_mmx;
  375|       |		dequant_mpeg_intra = dequant_mpeg_intra_mmx;
  376|       |		dequant_mpeg_inter = dequant_mpeg_inter_mmx;
  377|       |
  378|       |
  379|       |		/* Block related functions */
  380|       |		transfer_8to16copy = transfer_8to16copy_mmx;
  381|       |		transfer_16to8copy = transfer_16to8copy_mmx;
  382|       |		transfer_8to16sub  = transfer_8to16sub_mmx;
  383|       |		transfer_8to16subro  = transfer_8to16subro_mmx;
  384|       |		transfer_8to16sub2 = transfer_8to16sub2_mmx;
  385|       |		transfer_16to8add  = transfer_16to8add_mmx;
  386|       |		transfer8x8_copy   = transfer8x8_copy_mmx;
  387|       |		transfer8x4_copy   = transfer8x4_copy_mmx;
  388|       |
  389|       |		/* Interlacing Functions */
  390|       |		MBFieldTest = MBFieldTest_mmx;
  391|       |
  392|       |		/* Image Interpolation related functions */
  393|       |		interpolate8x8_halfpel_h  = interpolate8x8_halfpel_h_mmx;
  394|       |		interpolate8x8_halfpel_v  = interpolate8x8_halfpel_v_mmx;
  395|       |		interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_mmx;
  396|       |
  397|       |		interpolate8x4_halfpel_h  = interpolate8x4_halfpel_h_mmx;
  398|       |		interpolate8x4_halfpel_v  = interpolate8x4_halfpel_v_mmx;
  399|       |		interpolate8x4_halfpel_hv = interpolate8x4_halfpel_hv_mmx;
  400|       |
  401|       |		interpolate8x8_halfpel_add = interpolate8x8_halfpel_add_mmx;
  402|       |		interpolate8x8_halfpel_h_add = interpolate8x8_halfpel_h_add_mmx;
  403|       |		interpolate8x8_halfpel_v_add = interpolate8x8_halfpel_v_add_mmx;
  404|       |		interpolate8x8_halfpel_hv_add = interpolate8x8_halfpel_hv_add_mmx;
  405|       |
  406|       |		interpolate8x8_6tap_lowpass_h = interpolate8x8_6tap_lowpass_h_mmx;
  407|       |		interpolate8x8_6tap_lowpass_v = interpolate8x8_6tap_lowpass_v_mmx;
  408|       |
  409|       |		interpolate8x8_avg2 = interpolate8x8_avg2_mmx;
  410|       |		interpolate8x8_avg4 = interpolate8x8_avg4_mmx;
  411|       |
  412|       |		/* postprocessing */
  413|       |		image_brightness = image_brightness_mmx;
  414|       |
  415|       |		/* image input xxx_to_yv12 related functions */
  416|       |
  417|       |		yv12_to_yv12  = yv12_to_yv12_mmx;
  418|       |
  419|       |		bgr_to_yv12   = bgr_to_yv12_mmx;
  420|       |		rgb_to_yv12   = rgb_to_yv12_mmx;
  421|       |		bgra_to_yv12  = bgra_to_yv12_mmx;
  422|       |		rgba_to_yv12  = rgba_to_yv12_mmx;
  423|       |		yuyv_to_yv12  = yuyv_to_yv12_mmx;
  424|       |		uyvy_to_yv12  = uyvy_to_yv12_mmx;
  425|       |
  426|       |		/* image output yv12_to_xxx related functions */
  427|       |		yv12_to_bgr   = yv12_to_bgr_mmx;
  428|       |		yv12_to_bgra  = yv12_to_bgra_mmx;
  429|       |		yv12_to_yuyv  = yv12_to_yuyv_mmx;
  430|       |		yv12_to_uyvy  = yv12_to_uyvy_mmx;
  431|       |
  432|       |		yv12_to_yuyvi = yv12_to_yuyvi_mmx;
  433|       |		yv12_to_uyvyi = yv12_to_uyvyi_mmx;
  434|       |
  435|       |		/* Motion estimation related functions */
  436|       |		calc_cbp   = calc_cbp_mmx;
  437|       |		sad16      = sad16_mmx;
  438|       |		sad8       = sad8_mmx;
  439|       |		sad16bi    = sad16bi_mmx;
  440|       |		sad8bi     = sad8bi_mmx;
  441|       |		dev16      = dev16_mmx;
  442|       |		sad16v	   = sad16v_mmx;
  443|       |		sse8_16bit = sse8_16bit_mmx;
  444|       |		sse8_8bit  = sse8_8bit_mmx;
  445|       |	}
  446|       |
  447|       |	/* these 3dnow functions are faster than mmx, but slower than xmm. */
  448|       |	if ((cpu_flags & XVID_CPU_3DNOW)) {
  449|       |
  450|       |		emms = emms_3dn;
  451|       |
  452|       |		/* ME functions */
  453|       |		sad16bi = sad16bi_3dn;
  454|       |		sad8bi  = sad8bi_3dn;
  455|       |
  456|       |		yuyv_to_yv12  = yuyv_to_yv12_3dn;
  457|       |		uyvy_to_yv12  = uyvy_to_yv12_3dn;
  458|       |
  459|       |	}
  460|       |
  461|       |
  462|       |	if ((cpu_flags & XVID_CPU_MMXEXT)) {
  463|       |
  464|       |		/* DCT */
  465|       |		fdct = fdct_xmm_skal;
  466|       |		idct = idct_xmm;
  467|       |
  468|       |		/* Interpolation */
  469|       |		interpolate8x8_halfpel_h  = interpolate8x8_halfpel_h_xmm;
  470|       |		interpolate8x8_halfpel_v  = interpolate8x8_halfpel_v_xmm;
  471|       |		interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_xmm;
  472|       |		
  473|       |		interpolate8x4_halfpel_h  = interpolate8x4_halfpel_h_xmm;
  474|       |		interpolate8x4_halfpel_v  = interpolate8x4_halfpel_v_xmm;
  475|       |		interpolate8x4_halfpel_hv = interpolate8x4_halfpel_hv_xmm;
  476|       |		
  477|       |		interpolate8x8_halfpel_add = interpolate8x8_halfpel_add_xmm;
  478|       |		interpolate8x8_halfpel_h_add = interpolate8x8_halfpel_h_add_xmm;
  479|       |		interpolate8x8_halfpel_v_add = interpolate8x8_halfpel_v_add_xmm;
  480|       |		interpolate8x8_halfpel_hv_add = interpolate8x8_halfpel_hv_add_xmm;
  481|       |
  482|       |        /* Quantization */
  483|       |		quant_mpeg_inter = quant_mpeg_inter_xmm;
  484|       |
  485|       |		dequant_h263_intra = dequant_h263_intra_xmm;
  486|       |		dequant_h263_inter = dequant_h263_inter_xmm;
  487|       |
  488|       |        /* Buffer transfer */
  489|       |		transfer_8to16sub2 = transfer_8to16sub2_xmm;
  490|       |		transfer_8to16sub2ro = transfer_8to16sub2ro_xmm;
  491|       |
  492|       |		/* Colorspace transformation */
  493|       |		/* yv12_to_yv12  = yv12_to_yv12_xmm; */ /* appears to be slow on many machines */
  494|       |		yuyv_to_yv12  = yuyv_to_yv12_xmm;
  495|       |		uyvy_to_yv12  = uyvy_to_yv12_xmm;
  496|       |
  497|       |		/* ME functions */
  498|       |		sad16 = sad16_xmm;
  499|       |		sad8  = sad8_xmm;
  500|       |		sad16bi = sad16bi_xmm;
  501|       |		sad8bi  = sad8bi_xmm;
  502|       |		dev16 = dev16_xmm;
  503|       |		sad16v	 = sad16v_xmm;
  504|       |	}
  505|       |
  506|       |	if ((cpu_flags & XVID_CPU_3DNOW)) {
  507|       |
  508|       |		/* Interpolation */
  509|       |		interpolate8x8_halfpel_h = interpolate8x8_halfpel_h_3dn;
  510|       |		interpolate8x8_halfpel_v = interpolate8x8_halfpel_v_3dn;
  511|       |		interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_3dn;
  512|       |
  513|       |		interpolate8x4_halfpel_h = interpolate8x4_halfpel_h_3dn;
  514|       |		interpolate8x4_halfpel_v = interpolate8x4_halfpel_v_3dn;
  515|       |		interpolate8x4_halfpel_hv = interpolate8x4_halfpel_hv_3dn;
  516|       |	}
  517|       |
  518|       |	if ((cpu_flags & XVID_CPU_3DNOWEXT)) {
  519|       |
  520|       |		/* Buffer transfer */
  521|       |		transfer_8to16copy =  transfer_8to16copy_3dne;
  522|       |		transfer_16to8copy = transfer_16to8copy_3dne;
  523|       |		transfer_8to16sub =  transfer_8to16sub_3dne;
  524|       |		transfer_8to16subro =  transfer_8to16subro_3dne;
  525|       |		transfer_16to8add = transfer_16to8add_3dne;
  526|       |		transfer8x8_copy = transfer8x8_copy_3dne;
  527|       |		transfer8x4_copy = transfer8x4_copy_3dne;
  528|       |
  529|       |		if ((cpu_flags & XVID_CPU_MMXEXT)) {
  530|       |			/* Inverse DCT */
  531|       |			idct =  idct_3dne;
  532|       |
  533|       |			/* Buffer transfer */
  534|       |			transfer_8to16sub2 =  transfer_8to16sub2_3dne;
  535|       |
  536|       |			/* Interpolation */
  537|       |			interpolate8x8_halfpel_h = interpolate8x8_halfpel_h_3dne;
  538|       |			interpolate8x8_halfpel_v = interpolate8x8_halfpel_v_3dne;
  539|       |			interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_3dne;
  540|       |
  541|       |			interpolate8x4_halfpel_h = interpolate8x4_halfpel_h_3dne;
  542|       |			interpolate8x4_halfpel_v = interpolate8x4_halfpel_v_3dne;
  543|       |			interpolate8x4_halfpel_hv = interpolate8x4_halfpel_hv_3dne;
  544|       |
  545|       |            /* Quantization */
  546|       |			quant_h263_intra = quant_h263_intra_3dne;		/* cmov only */
  547|       |			quant_h263_inter = quant_h263_inter_3dne;
  548|       |			dequant_mpeg_intra = dequant_mpeg_intra_3dne;	/* cmov only */
  549|       |			dequant_mpeg_inter = dequant_mpeg_inter_3dne;
  550|       |			dequant_h263_intra = dequant_h263_intra_3dne;
  551|       |			dequant_h263_inter = dequant_h263_inter_3dne;
  552|       |
  553|       |            /* ME functions */
  554|       |			sad16 = sad16_3dne;
  555|       |			sad8 = sad8_3dne;
  556|       |			sad16bi = sad16bi_3dne;
  557|       |			sad8bi = sad8bi_3dne;
  558|       |			dev16 = dev16_3dne;
  559|       |		}
  560|       |	}
  561|       | 
  562|       |	if ((cpu_flags & XVID_CPU_SSE2)) {
  563|       |
  564|       |		calc_cbp = calc_cbp_sse2;
  565|       |
  566|       |		/* Quantization */
  567|       |		quant_h263_intra   = quant_h263_intra_sse2;
  568|       |		quant_h263_inter   = quant_h263_inter_sse2;
  569|       |		dequant_h263_intra = dequant_h263_intra_sse2;
  570|       |		dequant_h263_inter = dequant_h263_inter_sse2;
  571|       |
  572|       |		/* SAD operators */
  573|       |		sad16       = sad16_sse2;
  574|       |		dev16       = dev16_sse2;
  575|       |
  576|       |		/* PSNR-HVS-M distortion metric */
  577|       |		sseh8_16bit   = sseh8_16bit_sse2;
  578|       |		coeff8_energy = coeff8_energy_sse2;
  579|       |		blocksum8     = blocksum8_sse2;
  580|       |
  581|       |		/* DCT operators */
  582|       |		fdct = fdct_sse2_skal;
  583|       |		idct = idct_sse2_skal;   /* Is now IEEE1180 and Walken compliant. */
  584|       |
  585|       |		/* postprocessing */
  586|       |		image_brightness = image_brightness_sse2;
  587|       |
  588|       |	}
  589|       |
  590|       |	if ((cpu_flags & XVID_CPU_SSE3)) {
  591|       |
  592|       |		/* SAD operators */
  593|       |		sad16    = sad16_sse3;
  594|       |		dev16    = dev16_sse3;
  595|       |	}
  596|       |
  597|       |#endif /* ARCH_IS_IA32 */
  598|       |
  599|       |#if defined(ARCH_IS_IA64)
  600|       |	if ((cpu_flags & XVID_CPU_ASM)) { /* use assembler routines? */
  601|       |	  idct_ia64_init();
  602|       |	  fdct = fdct_ia64;
  603|       |	  idct = idct_ia64;   /*not yet working, crashes */
  604|       |	  interpolate8x8_halfpel_h = interpolate8x8_halfpel_h_ia64;
  605|       |	  interpolate8x8_halfpel_v = interpolate8x8_halfpel_v_ia64;
  606|       |	  interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_ia64;
  607|       |	  sad16 = sad16_ia64;
  608|       |	  sad16bi = sad16bi_ia64;
  609|       |	  sad8 = sad8_ia64;
  610|       |	  dev16 = dev16_ia64;
  611|       |/*	  Halfpel8_Refine = Halfpel8_Refine_ia64; */
  612|       |	  quant_h263_intra = quant_h263_intra_ia64;
  613|       |	  quant_h263_inter = quant_h263_inter_ia64;
  614|       |	  dequant_h263_intra = dequant_h263_intra_ia64;
  615|       |	  dequant_h263_inter = dequant_h263_inter_ia64;
  616|       |	  transfer_8to16copy = transfer_8to16copy_ia64;
  617|       |	  transfer_16to8copy = transfer_16to8copy_ia64;
  618|       |	  transfer_8to16sub = transfer_8to16sub_ia64;
  619|       |	  transfer_8to16sub2 = transfer_8to16sub2_ia64;
  620|       |	  transfer_16to8add = transfer_16to8add_ia64;
  621|       |	  transfer8x8_copy = transfer8x8_copy_ia64;
  622|       |	}
  623|       |#endif
  624|       |
  625|       |#if defined(ARCH_IS_PPC)
  626|       |	if ((cpu_flags & XVID_CPU_ALTIVEC)) {
  627|       |          /* sad operators */
  628|       |		  sad16 = sad16_altivec_c;
  629|       |		  sad16bi = sad16bi_altivec_c;
  630|       |		  sad8 = sad8_altivec_c;
  631|       |		  dev16 = dev16_altivec_c;
  632|       |          
  633|       |          sse8_16bit = sse8_16bit_altivec_c;
  634|       |          
  635|       |          /* mem transfer */
  636|       |          transfer_8to16copy = transfer_8to16copy_altivec_c;
  637|       |          transfer_16to8copy = transfer_16to8copy_altivec_c;
  638|       |          transfer_8to16sub = transfer_8to16sub_altivec_c;
  639|       |          transfer_8to16subro = transfer_8to16subro_altivec_c;
  640|       |          transfer_8to16sub2 = transfer_8to16sub2_altivec_c;
  641|       |          transfer_16to8add = transfer_16to8add_altivec_c;
  642|       |          transfer8x8_copy = transfer8x8_copy_altivec_c;
  643|       |           
  644|       |          /* Inverse DCT */
  645|       |          idct = idct_altivec_c;
  646|       |          
  647|       |          /* Interpolation */
  648|       |          interpolate8x8_halfpel_h = interpolate8x8_halfpel_h_altivec_c;
  649|       |          interpolate8x8_halfpel_v = interpolate8x8_halfpel_v_altivec_c;
  650|       |          interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_altivec_c;
  651|       |		  
  652|       |          interpolate8x8_avg2 = interpolate8x8_avg2_altivec_c;
  653|       |          interpolate8x8_avg4 = interpolate8x8_avg4_altivec_c;
  654|       |		  
  655|       |		  interpolate8x8_halfpel_add = interpolate8x8_halfpel_add_altivec_c;
  656|       |		  interpolate8x8_halfpel_h_add = interpolate8x8_halfpel_h_add_altivec_c;
  657|       |		  interpolate8x8_halfpel_v_add = interpolate8x8_halfpel_v_add_altivec_c;
  658|       |		  interpolate8x8_halfpel_hv_add = interpolate8x8_halfpel_hv_add_altivec_c;
  659|       |          
  660|       |          /* Colorspace conversion */
  661|       |          bgra_to_yv12 = bgra_to_yv12_altivec_c;
  662|       |          abgr_to_yv12 = abgr_to_yv12_altivec_c;
  663|       |          rgba_to_yv12 = rgba_to_yv12_altivec_c;
  664|       |          argb_to_yv12 = argb_to_yv12_altivec_c;
  665|       |          
  666|       |          yuyv_to_yv12 = yuyv_to_yv12_altivec_c;
  667|       |          uyvy_to_yv12 = uyvy_to_yv12_altivec_c;
  668|       |          
  669|       |          yv12_to_yuyv = yv12_to_yuyv_altivec_c;
  670|       |          yv12_to_uyvy = yv12_to_uyvy_altivec_c;
  671|       |          
  672|       |          /* Quantization */
  673|       |          quant_h263_intra = quant_h263_intra_altivec_c;
  674|       |          quant_h263_inter = quant_h263_inter_altivec_c;
  675|       |          dequant_h263_intra = dequant_h263_intra_altivec_c;
  676|       |          dequant_h263_inter = dequant_h263_inter_altivec_c;
  677|       |
  678|       |		  dequant_mpeg_intra = dequant_mpeg_intra_altivec_c;
  679|       |		  dequant_mpeg_inter = dequant_mpeg_inter_altivec_c;
  680|       |		  
  681|       |		  /* Qpel stuff */
  682|       |		  xvid_QP_Funcs = &xvid_QP_Funcs_Altivec_C;
  683|       |		  xvid_QP_Add_Funcs = &xvid_QP_Add_Funcs_Altivec_C;
  684|       |        }
  685|       |#endif
  686|       |
  687|       |#if defined(_DEBUG)
  688|       |    xvid_debug = init->debug;
  689|       |#endif
  690|       |
  691|      2|    return(0);
  692|      2|}
xvid.c:detect_cpu_flags:
  151|      2|{
  152|       |	/* enable native assembly optimizations by default */
  153|      2|	unsigned int cpu_flags = XVID_CPU_ASM;
  ------------------
  |  |  181|      2|#define XVID_CPU_ASM      (1<< 7) /* native assembly */
  ------------------
  154|       |
  155|       |#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
  156|       |	cpu_flags |= check_cpu_features();
  157|       |	if ((cpu_flags & XVID_CPU_SSE) && sigill_check(sse_os_trigger))
  158|       |		cpu_flags &= ~XVID_CPU_SSE;
  159|       |
  160|       |	if ((cpu_flags & (XVID_CPU_SSE2|XVID_CPU_SSE3|XVID_CPU_SSE41)) && sigill_check(sse2_os_trigger))
  161|       |		cpu_flags &= ~(XVID_CPU_SSE2|XVID_CPU_SSE3|XVID_CPU_SSE41);
  162|       |#endif
  163|       |
  164|       |#if defined(ARCH_IS_PPC)
  165|       |#if defined(__amigaos4__)
  166|       |        {
  167|       |                uint32_t vector_unit = VECTORTYPE_NONE;
  168|       |                IExec->GetCPUInfoTags(GCIT_VectorUnit, &vector_unit, TAG_END);
  169|       |                if (vector_unit == VECTORTYPE_ALTIVEC) {
  170|       |                        cpu_flags |= XVID_CPU_ALTIVEC;
  171|       |                }
  172|       |        }
  173|       |#else
  174|       |	if (!sigill_check(altivec_trigger))
  175|       |		cpu_flags |= XVID_CPU_ALTIVEC;
  176|       |#endif
  177|       |#endif
  178|       |
  179|      2|	return cpu_flags;
  180|      2|}

