LLVMFuzzerInitialize:
   12|      2|extern "C" int LLVMFuzzerInitialize(int *argc, char ***argv) {
   13|      2|    memset(&glb, 0, sizeof(glb));
   14|      2|    glb.version = XVID_VERSION;
  ------------------
  |  |   71|      2|#define XVID_VERSION             XVID_MAKE_VERSION(1,4,-127)
  |  |  ------------------
  |  |  |  |   62|      2|#define XVID_MAKE_VERSION(a,b,c) ((((a)&0xff)<<16) | (((b)&0xff)<<8) | ((c)&0xff))
  |  |  ------------------
  ------------------
   15|      2|    if ( xvid_global(nullptr, XVID_GBL_INIT, &glb, nullptr) ) {
  ------------------
  |  |  235|      2|#define XVID_GBL_INIT    0 /* initialize xvidcore; must be called before using xvid_decore, or xvid_encore) */
  ------------------
  |  Branch (15:10): [True: 0, False: 2]
  ------------------
   16|      0|        abort();
   17|      0|    }
   18|       |
   19|      2|    return 0;
   20|      2|}
LLVMFuzzerTestOneInput:
   22|  10.2k|extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
   23|  10.2k|	xvid_dec_stats_t stats;
   24|  10.2k|    xvid_dec_create_t ctx;
   25|  10.2k|    xvid_dec_frame_t frame;
   26|       |
   27|  10.2k|    uint32_t width = 0;
   28|  10.2k|    uint32_t height = 0;
   29|  10.2k|    int remaining = size;
   30|       |
   31|  10.2k|    uint8_t* out = nullptr;
   32|  10.2k|    uint8_t* dataCopy = (uint8_t*)calloc(1, size + 10240);
   33|  10.2k|    memcpy(dataCopy, data, size);
   34|       |
   35|  10.2k|	uint8_t* inptr = dataCopy;
   36|       |
   37|  10.2k|    {
   38|  10.2k|        memset(&ctx, 0, sizeof(ctx));
   39|       |
   40|  10.2k|        ctx.version = XVID_VERSION;
  ------------------
  |  |   71|  10.2k|#define XVID_VERSION             XVID_MAKE_VERSION(1,4,-127)
  |  |  ------------------
  |  |  |  |   62|  10.2k|#define XVID_MAKE_VERSION(a,b,c) ((((a)&0xff)<<16) | (((b)&0xff)<<8) | ((c)&0xff))
  |  |  ------------------
  ------------------
   41|  10.2k|        ctx.width = width;
   42|  10.2k|        ctx.height = height;
   43|  10.2k|    }
   44|       |
   45|  10.2k|    if ( xvid_decore(nullptr, XVID_DEC_CREATE, &ctx, nullptr) ) {
  ------------------
  |  |  246|  10.2k|#define XVID_DEC_CREATE  0 /* create decore instance; return 0 on success */
  ------------------
  |  Branch (45:10): [True: 0, False: 10.2k]
  ------------------
   46|      0|        abort();
   47|      0|    }
   48|       |
   49|       |
   50|  10.2k|    int loops = 0;
   51|  69.9k|    do {
   52|  69.9k|        {
   53|  69.9k|            memset(&stats, 0, sizeof(xvid_dec_stats_t));
   54|  69.9k|            stats.version = XVID_VERSION;
  ------------------
  |  |   71|  69.9k|#define XVID_VERSION             XVID_MAKE_VERSION(1,4,-127)
  |  |  ------------------
  |  |  |  |   62|  69.9k|#define XVID_MAKE_VERSION(a,b,c) ((((a)&0xff)<<16) | (((b)&0xff)<<8) | ((c)&0xff))
  |  |  ------------------
  ------------------
   55|  69.9k|        }
   56|       |
   57|  69.9k|        {
   58|  69.9k|            memset(&frame, 0, sizeof(xvid_dec_frame_t));
   59|       |
   60|  69.9k|            frame.version = XVID_VERSION;
  ------------------
  |  |   71|  69.9k|#define XVID_VERSION             XVID_MAKE_VERSION(1,4,-127)
  |  |  ------------------
  |  |  |  |   62|  69.9k|#define XVID_MAKE_VERSION(a,b,c) ((((a)&0xff)<<16) | (((b)&0xff)<<8) | ((c)&0xff))
  |  |  ------------------
  ------------------
   61|  69.9k|            frame.general = 0;
   62|       |
   63|  69.9k|            frame.bitstream = inptr;
   64|  69.9k|            frame.length = remaining;
   65|       |
   66|  69.9k|            frame.output.plane[0]  = out;
   67|  69.9k|            frame.output.stride[0] = width * 3;
   68|       |
   69|  69.9k|            frame.output.csp = XVID_CSP_BGR;
  ------------------
  |  |  121|  69.9k|#define XVID_CSP_BGR      (1<< 9) /* 24-bit bgr packed */
  ------------------
   70|  69.9k|        }
   71|       |
   72|  69.9k|        const int used_bytes = xvid_decore(ctx.handle, XVID_DEC_DECODE, &frame, &stats);
  ------------------
  |  |  248|  69.9k|#define XVID_DEC_DECODE  2 /* decode a frame: returns number of bytes consumed >= 0 */
  ------------------
   73|  69.9k|        if ( stats.type == XVID_TYPE_VOL) {
  ------------------
  |  |  165|  69.9k|#define XVID_TYPE_VOL     -1 /* decoder only: vol was decoded */
  ------------------
  |  Branch (73:14): [True: 34.5k, False: 35.4k]
  ------------------
   74|       |            /* Resize buffer */
   75|       |
   76|  34.5k|            if ( (width != stats.data.vol.width) || (height != stats.data.vol.height) ) {
  ------------------
  |  Branch (76:18): [True: 12.4k, False: 22.1k]
  |  Branch (76:53): [True: 528, False: 21.6k]
  ------------------
   77|  12.9k|                if ( width * height < stats.data.vol.width * stats.data.vol.height ) {
  ------------------
  |  Branch (77:22): [True: 10.6k, False: 2.29k]
  ------------------
   78|  10.6k|                    if (out) {
  ------------------
  |  Branch (78:25): [True: 2.66k, False: 7.98k]
  ------------------
   79|  2.66k|                        free(out);
   80|  2.66k|                    }
   81|  10.6k|                    out = (uint8_t*)malloc(stats.data.vol.width * stats.data.vol.height * 4);
   82|  10.6k|                }
   83|  12.9k|                width = stats.data.vol.width;
   84|  12.9k|                height = stats.data.vol.height;
   85|  12.9k|            }
   86|  34.5k|        }
   87|       |
   88|  69.9k|        if ( used_bytes > 0 ) {
  ------------------
  |  Branch (88:14): [True: 69.2k, False: 717]
  ------------------
   89|  69.2k|            inptr += used_bytes;
   90|  69.2k|            remaining -= used_bytes;
   91|  69.2k|        } else {
   92|    717|            break;
   93|    717|        }
   94|       |
   95|  69.2k|        loops++;
   96|  69.2k|    } while (stats.type <= 0 && remaining > 1);
  ------------------
  |  Branch (96:14): [True: 63.1k, False: 6.04k]
  |  Branch (96:33): [True: 59.6k, False: 3.53k]
  ------------------
   97|       |
   98|  10.2k|end:
   99|  10.2k|    free(dataCopy);
  100|  10.2k|    free(out);
  101|       |
  102|  10.2k|    xvid_decore(ctx.handle, XVID_DEC_DESTROY, nullptr, nullptr);
  ------------------
  |  |  247|  10.2k|#define XVID_DEC_DESTROY 1 /* destroy decore instance: return 0 on success */
  ------------------
  103|  10.2k|    return 0;
  104|  10.2k|}

read_video_packet_header:
  105|  28.2k|{
  106|  28.2k|	int startcode_bits = NUMBITS_VP_RESYNC_MARKER + addbits;
  ------------------
  |  |  111|  28.2k|#define NUMBITS_VP_RESYNC_MARKER  17
  ------------------
  107|  28.2k|	int mbnum_bits = log2bin(dec->mb_width *  dec->mb_height - 1);
  108|  28.2k|	int mbnum;
  109|  28.2k|	int hec = 0;
  110|       |
  111|  28.2k|	BitstreamSkip(bs, BitstreamNumBitsToByteAlign(bs));
  112|  28.2k|	BitstreamSkip(bs, startcode_bits);
  113|       |
  114|  28.2k|	DPRINTF(XVID_DEBUG_STARTCODE, "<video_packet_header>\n");
  ------------------
  |  |  197|  28.2k|#define XVID_DEBUG_STARTCODE (1<< 1)
  ------------------
  115|       |
  116|  28.2k|	if (dec->shape != VIDOBJLAY_SHAPE_RECTANGULAR)
  ------------------
  |  |   87|  28.2k|#define VIDOBJLAY_SHAPE_RECTANGULAR		0
  ------------------
  |  Branch (116:6): [True: 14.7k, False: 13.5k]
  ------------------
  117|  14.7k|	{
  118|  14.7k|		hec = BitstreamGetBit(bs);		/* header_extension_code */
  119|  14.7k|		if (hec && !(dec->sprite_enable == SPRITE_STATIC /* && current_coding_type = I_VOP */))
  ------------------
  |  |   94|  11.7k|#define SPRITE_STATIC	1
  ------------------
  |  Branch (119:7): [True: 11.7k, False: 2.95k]
  |  Branch (119:14): [True: 8.69k, False: 3.07k]
  ------------------
  120|  8.69k|		{
  121|  8.69k|			BitstreamSkip(bs, 13);			/* vop_width */
  122|  8.69k|			READ_MARKER();
  ------------------
  |  |   99|  8.69k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  123|  8.69k|			BitstreamSkip(bs, 13);			/* vop_height */
  124|  8.69k|			READ_MARKER();
  ------------------
  |  |   99|  8.69k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  125|  8.69k|			BitstreamSkip(bs, 13);			/* vop_horizontal_mc_spatial_ref */
  126|  8.69k|			READ_MARKER();
  ------------------
  |  |   99|  8.69k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  127|  8.69k|			BitstreamSkip(bs, 13);			/* vop_vertical_mc_spatial_ref */
  128|  8.69k|			READ_MARKER();
  ------------------
  |  |   99|  8.69k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  129|  8.69k|		}
  130|  14.7k|	}
  131|       |
  132|  28.2k|	mbnum = (mbnum_bits == 0) ? 0 : BitstreamGetBits(bs, mbnum_bits);		/* macroblock_number */
  ------------------
  |  Branch (132:10): [True: 9, False: 28.2k]
  ------------------
  133|  28.2k|	DPRINTF(XVID_DEBUG_HEADER, "mbnum %i\n", mbnum);
  ------------------
  |  |  198|  28.2k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  134|       |
  135|  28.2k|	if (dec->shape != VIDOBJLAY_SHAPE_BINARY_ONLY)
  ------------------
  |  |   89|  28.2k|#define VIDOBJLAY_SHAPE_BINARY_ONLY		2
  ------------------
  |  Branch (135:6): [True: 14.6k, False: 13.5k]
  ------------------
  136|  14.6k|	{
  137|  14.6k|		*quant = BitstreamGetBits(bs, dec->quant_bits);	/* quant_scale */
  138|  14.6k|		DPRINTF(XVID_DEBUG_HEADER, "quant %i\n", *quant);
  ------------------
  |  |  198|  14.6k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  139|  14.6k|	}
  140|       |
  141|  28.2k|	if (dec->shape == VIDOBJLAY_SHAPE_RECTANGULAR)
  ------------------
  |  |   87|  28.2k|#define VIDOBJLAY_SHAPE_RECTANGULAR		0
  ------------------
  |  Branch (141:6): [True: 13.5k, False: 14.7k]
  ------------------
  142|  13.5k|		hec = BitstreamGetBit(bs);		/* header_extension_code */
  143|       |
  144|       |
  145|  28.2k|	DPRINTF(XVID_DEBUG_HEADER, "header_extension_code %i\n", hec);
  ------------------
  |  |  198|  28.2k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  146|  28.2k|	if (hec)
  ------------------
  |  Branch (146:6): [True: 18.9k, False: 9.30k]
  ------------------
  147|  18.9k|	{
  148|  18.9k|		int time_base;
  149|  18.9k|		int time_increment;
  150|  18.9k|		int coding_type;
  151|       |
  152|  39.4k|		for (time_base=0; BitstreamGetBit(bs)!=0; time_base++);		/* modulo_time_base */
  ------------------
  |  Branch (152:21): [True: 20.4k, False: 18.9k]
  ------------------
  153|  18.9k|		READ_MARKER();
  ------------------
  |  |   99|  18.9k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  154|  18.9k|		if (dec->time_inc_bits)
  ------------------
  |  Branch (154:7): [True: 18.9k, False: 0]
  ------------------
  155|  18.9k|			time_increment = (BitstreamGetBits(bs, dec->time_inc_bits));	/* vop_time_increment */
  156|      0|		else
  157|      0|			time_increment = 0;
  158|  18.9k|		READ_MARKER();
  ------------------
  |  |   99|  18.9k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  159|  18.9k|		DPRINTF(XVID_DEBUG_HEADER,"time %i:%i\n", time_base, time_increment);
  ------------------
  |  |  198|  18.9k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  160|       |
  161|  18.9k|		coding_type = BitstreamGetBits(bs, 2);
  162|  18.9k|		DPRINTF(XVID_DEBUG_HEADER,"coding_type %i\n", coding_type);
  ------------------
  |  |  198|  18.9k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  163|       |
  164|  18.9k|		if (dec->shape != VIDOBJLAY_SHAPE_RECTANGULAR)
  ------------------
  |  |   87|  18.9k|#define VIDOBJLAY_SHAPE_RECTANGULAR		0
  ------------------
  |  Branch (164:7): [True: 11.7k, False: 7.18k]
  ------------------
  165|  11.7k|		{
  166|  11.7k|			BitstreamSkip(bs, 1);	/* change_conv_ratio_disable */
  167|  11.7k|			if (coding_type != I_VOP)
  ------------------
  |  |  104|  11.7k|#define I_VOP	0
  ------------------
  |  Branch (167:8): [True: 3.68k, False: 8.08k]
  ------------------
  168|  3.68k|				BitstreamSkip(bs, 1);	/* vop_shape_coding_type */
  169|  11.7k|		}
  170|       |
  171|  18.9k|		if (dec->shape != VIDOBJLAY_SHAPE_BINARY_ONLY)
  ------------------
  |  |   89|  18.9k|#define VIDOBJLAY_SHAPE_BINARY_ONLY		2
  ------------------
  |  Branch (171:7): [True: 8.13k, False: 10.8k]
  ------------------
  172|  8.13k|		{
  173|  8.13k|			*intra_dc_threshold = intra_dc_threshold_table[BitstreamGetBits(bs, 3)];
  174|       |
  175|  8.13k|			if (dec->sprite_enable == SPRITE_GMC && coding_type == S_VOP &&
  ------------------
  |  |   95|  16.2k|#define SPRITE_GMC		2
  ------------------
              			if (dec->sprite_enable == SPRITE_GMC && coding_type == S_VOP &&
  ------------------
  |  |  107|  9.21k|#define S_VOP	3
  ------------------
  |  Branch (175:8): [True: 1.08k, False: 7.04k]
  |  Branch (175:44): [True: 143, False: 941]
  ------------------
  176|  8.13k|				dec->sprite_warping_points > 0)
  ------------------
  |  Branch (176:5): [True: 138, False: 5]
  ------------------
  177|    138|			{
  178|       |				/* TODO: sprite trajectory */
  179|    138|			}
  180|  8.13k|			if (dec->reduced_resolution_enable &&
  ------------------
  |  Branch (180:8): [True: 4.35k, False: 3.77k]
  ------------------
  181|  8.13k|				dec->shape == VIDOBJLAY_SHAPE_RECTANGULAR &&
  ------------------
  |  |   87|  12.4k|#define VIDOBJLAY_SHAPE_RECTANGULAR		0
  ------------------
  |  Branch (181:5): [True: 3.54k, False: 807]
  ------------------
  182|  8.13k|				(coding_type == P_VOP || coding_type == I_VOP))
  ------------------
  |  |  105|  7.09k|#define P_VOP	1
  ------------------
              				(coding_type == P_VOP || coding_type == I_VOP))
  ------------------
  |  |  104|  2.49k|#define I_VOP	0
  ------------------
  |  Branch (182:6): [True: 1.05k, False: 2.49k]
  |  Branch (182:30): [True: 1.04k, False: 1.44k]
  ------------------
  183|  2.09k|			{
  184|  2.09k|				BitstreamSkip(bs, 1); /* XXX: vop_reduced_resolution */
  185|  2.09k|			}
  186|       |
  187|  8.13k|			if (coding_type != I_VOP && fcode_forward)
  ------------------
  |  |  104|  16.2k|#define I_VOP	0
  ------------------
  |  Branch (187:8): [True: 5.04k, False: 3.08k]
  |  Branch (187:32): [True: 2.67k, False: 2.36k]
  ------------------
  188|  2.67k|			{
  189|  2.67k|				*fcode_forward = BitstreamGetBits(bs, 3);
  190|  2.67k|				DPRINTF(XVID_DEBUG_HEADER,"fcode_forward %i\n", *fcode_forward);
  ------------------
  |  |  198|  2.67k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  191|  2.67k|			}
  192|       |
  193|  8.13k|			if (coding_type == B_VOP && fcode_backward)
  ------------------
  |  |  106|  16.2k|#define B_VOP	2
  ------------------
  |  Branch (193:8): [True: 2.88k, False: 5.25k]
  |  Branch (193:32): [True: 1.60k, False: 1.27k]
  ------------------
  194|  1.60k|			{
  195|  1.60k|				*fcode_backward = BitstreamGetBits(bs, 3);
  196|  1.60k|				DPRINTF(XVID_DEBUG_HEADER,"fcode_backward %i\n", *fcode_backward);
  ------------------
  |  |  198|  1.60k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  197|  1.60k|			}
  198|  8.13k|		}
  199|  18.9k|	}
  200|       |
  201|  28.2k|	if (dec->newpred_enable)
  ------------------
  |  Branch (201:6): [True: 11.8k, False: 16.3k]
  ------------------
  202|  11.8k|	{
  203|  11.8k|		int vop_id;
  204|  11.8k|		int vop_id_for_prediction;
  205|       |
  206|  11.8k|		vop_id = BitstreamGetBits(bs, MIN(dec->time_inc_bits + 3, 15));
  ------------------
  |  |  255|  11.8k|#define MIN(X, Y) ((X)<(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (255:20): [True: 11.2k, False: 680]
  |  |  ------------------
  ------------------
  207|  11.8k|		DPRINTF(XVID_DEBUG_HEADER, "vop_id %i\n", vop_id);
  ------------------
  |  |  198|  11.8k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  208|  11.8k|		if (BitstreamGetBit(bs))	/* vop_id_for_prediction_indication */
  ------------------
  |  Branch (208:7): [True: 5.85k, False: 6.02k]
  ------------------
  209|  5.85k|		{
  210|  5.85k|			vop_id_for_prediction = BitstreamGetBits(bs, MIN(dec->time_inc_bits + 3, 15));
  ------------------
  |  |  255|  5.85k|#define MIN(X, Y) ((X)<(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (255:20): [True: 5.25k, False: 598]
  |  |  ------------------
  ------------------
  211|  5.85k|			DPRINTF(XVID_DEBUG_HEADER, "vop_id_for_prediction %i\n", vop_id_for_prediction);
  ------------------
  |  |  198|  5.85k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  212|  5.85k|		}
  213|  11.8k|		READ_MARKER();
  ------------------
  |  |   99|  11.8k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  214|  11.8k|	}
  215|       |
  216|  28.2k|	return mbnum;
  217|  28.2k|}
BitstreamReadHeaders:
  397|  1.86M|{
  398|  1.86M|	uint32_t vol_ver_id;
  399|  1.86M|	uint32_t coding_type;
  400|  1.86M|	uint32_t start_code;
  401|  1.86M|	uint32_t time_incr = 0;
  402|  1.86M|	int32_t time_increment = 0;
  403|       |
  404|  22.8M|	while ((BitstreamPos(bs) >> 3) + 4 <= bs->length) {
  ------------------
  |  Branch (404:9): [True: 22.8M, False: 3.06k]
  ------------------
  405|       |
  406|  22.8M|		BitstreamByteAlign(bs);
  407|  22.8M|		start_code = BitstreamShowBits(bs, 32);
  408|       |
  409|  22.8M|		if (start_code == VISOBJSEQ_START_CODE) {
  ------------------
  |  |   42|  22.8M|#define VISOBJSEQ_START_CODE	0x000001b0
  ------------------
  |  Branch (409:7): [True: 2.98k, False: 22.8M]
  ------------------
  410|       |
  411|  2.98k|			int profile;
  412|       |
  413|  2.98k|			DPRINTF(XVID_DEBUG_STARTCODE, "<visual_object_sequence>\n");
  ------------------
  |  |  197|  2.98k|#define XVID_DEBUG_STARTCODE (1<< 1)
  ------------------
  414|       |
  415|  2.98k|			BitstreamSkip(bs, 32);	/* visual_object_sequence_start_code */
  416|  2.98k|			profile = BitstreamGetBits(bs, 8);	/* profile_and_level_indication */
  417|       |
  418|  2.98k|			DPRINTF(XVID_DEBUG_HEADER, "profile_and_level_indication %i\n", profile);
  ------------------
  |  |  198|  2.98k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  419|       |
  420|  22.8M|		} else if (start_code == VISOBJSEQ_STOP_CODE) {
  ------------------
  |  |   43|  22.8M|#define VISOBJSEQ_STOP_CODE		0x000001b1	/* ??? */
  ------------------
  |  Branch (420:14): [True: 1.01k, False: 22.8M]
  ------------------
  421|       |
  422|  1.01k|			BitstreamSkip(bs, 32);	/* visual_object_sequence_stop_code */
  423|       |
  424|  1.01k|			DPRINTF(XVID_DEBUG_STARTCODE, "</visual_object_sequence>\n");
  ------------------
  |  |  197|  1.01k|#define XVID_DEBUG_STARTCODE (1<< 1)
  ------------------
  425|       |
  426|  22.8M|		} else if (start_code == VISOBJ_START_CODE) {
  ------------------
  |  |   47|  22.8M|#define VISOBJ_START_CODE		0x000001b5
  ------------------
  |  Branch (426:14): [True: 8.49k, False: 22.8M]
  ------------------
  427|       |
  428|  8.49k|			DPRINTF(XVID_DEBUG_STARTCODE, "<visual_object>\n");
  ------------------
  |  |  197|  8.49k|#define XVID_DEBUG_STARTCODE (1<< 1)
  ------------------
  429|       |
  430|  8.49k|			BitstreamSkip(bs, 32);	/* visual_object_start_code */
  431|  8.49k|			if (BitstreamGetBit(bs))	/* is_visual_object_identified */
  ------------------
  |  Branch (431:8): [True: 2.83k, False: 5.65k]
  ------------------
  432|  2.83k|			{
  433|  2.83k|				dec->ver_id = BitstreamGetBits(bs, 4);	/* visual_object_ver_id */
  434|  2.83k|				DPRINTF(XVID_DEBUG_HEADER,"visobj_ver_id %i\n", dec->ver_id);
  ------------------
  |  |  198|  2.83k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  435|  2.83k|				BitstreamSkip(bs, 3);	/* visual_object_priority */
  436|  5.65k|			} else {
  437|  5.65k|				dec->ver_id = 1;
  438|  5.65k|			}
  439|       |
  440|  8.49k|			if (BitstreamShowBits(bs, 4) != VISOBJ_TYPE_VIDEO)	/* visual_object_type */
  ------------------
  |  |   52|  8.49k|#define VISOBJ_TYPE_VIDEO				1
  ------------------
  |  Branch (440:8): [True: 4.39k, False: 4.10k]
  ------------------
  441|  4.39k|			{
  442|  4.39k|				DPRINTF(XVID_DEBUG_ERROR, "visual_object_type != video\n");
  ------------------
  |  |  196|  4.39k|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
  443|  4.39k|				return -1;
  444|  4.39k|			}
  445|  4.10k|			BitstreamSkip(bs, 4);
  446|       |
  447|       |			/* video_signal_type */
  448|       |
  449|  4.10k|			if (BitstreamGetBit(bs))	/* video_signal_type */
  ------------------
  |  Branch (449:8): [True: 3.62k, False: 476]
  ------------------
  450|  3.62k|			{
  451|  3.62k|				DPRINTF(XVID_DEBUG_HEADER,"+ video_signal_type\n");
  ------------------
  |  |  198|  3.62k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  452|  3.62k|				BitstreamSkip(bs, 3);	/* video_format */
  453|  3.62k|				BitstreamSkip(bs, 1);	/* video_range */
  454|  3.62k|				if (BitstreamGetBit(bs))	/* color_description */
  ------------------
  |  Branch (454:9): [True: 2.77k, False: 850]
  ------------------
  455|  2.77k|				{
  456|  2.77k|					DPRINTF(XVID_DEBUG_HEADER,"+ color_description");
  ------------------
  |  |  198|  2.77k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  457|  2.77k|					BitstreamSkip(bs, 8);	/* color_primaries */
  458|  2.77k|					BitstreamSkip(bs, 8);	/* transfer_characteristics */
  459|  2.77k|					BitstreamSkip(bs, 8);	/* matrix_coefficients */
  460|  2.77k|				}
  461|  3.62k|			}
  462|  22.8M|		} else if ((start_code & ~VIDOBJ_START_CODE_MASK) == VIDOBJ_START_CODE) {
  ------------------
  |  |  385|  22.8M|#define VIDOBJ_START_CODE_MASK		0x0000001f
  ------------------
              		} else if ((start_code & ~VIDOBJ_START_CODE_MASK) == VIDOBJ_START_CODE) {
  ------------------
  |  |   40|  22.8M|#define VIDOBJ_START_CODE		0x00000100	/* ..0x0000011f  */
  ------------------
  |  Branch (462:14): [True: 9.04k, False: 22.8M]
  ------------------
  463|       |
  464|  9.04k|			DPRINTF(XVID_DEBUG_STARTCODE, "<video_object>\n");
  ------------------
  |  |  197|  9.04k|#define XVID_DEBUG_STARTCODE (1<< 1)
  ------------------
  465|  9.04k|			DPRINTF(XVID_DEBUG_HEADER, "vo id %i\n", start_code & VIDOBJ_START_CODE_MASK);
  ------------------
  |  |  198|  9.04k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
              			DPRINTF(XVID_DEBUG_HEADER, "vo id %i\n", start_code & VIDOBJ_START_CODE_MASK);
  ------------------
  |  |  385|  9.04k|#define VIDOBJ_START_CODE_MASK		0x0000001f
  ------------------
  466|       |
  467|  9.04k|			BitstreamSkip(bs, 32);	/* video_object_start_code */
  468|       |
  469|  22.8M|		} else if ((start_code & ~VIDOBJLAY_START_CODE_MASK) == VIDOBJLAY_START_CODE) {
  ------------------
  |  |  386|  22.8M|#define VIDOBJLAY_START_CODE_MASK	0x0000000f
  ------------------
              		} else if ((start_code & ~VIDOBJLAY_START_CODE_MASK) == VIDOBJLAY_START_CODE) {
  ------------------
  |  |   41|  22.8M|#define VIDOBJLAY_START_CODE	0x00000120	/* ..0x0000012f */
  ------------------
  |  Branch (469:14): [True: 45.3k, False: 22.7M]
  ------------------
  470|  45.3k|			uint32_t width = 0, height = 0;
  471|       |
  472|  45.3k|			DPRINTF(XVID_DEBUG_STARTCODE, "<video_object_layer>\n");
  ------------------
  |  |  197|  45.3k|#define XVID_DEBUG_STARTCODE (1<< 1)
  ------------------
  473|  45.3k|			DPRINTF(XVID_DEBUG_HEADER, "vol id %i\n", start_code & VIDOBJLAY_START_CODE_MASK);
  ------------------
  |  |  198|  45.3k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
              			DPRINTF(XVID_DEBUG_HEADER, "vol id %i\n", start_code & VIDOBJLAY_START_CODE_MASK);
  ------------------
  |  |  386|  45.3k|#define VIDOBJLAY_START_CODE_MASK	0x0000000f
  ------------------
  474|       |
  475|  45.3k|			BitstreamSkip(bs, 32);	/* video_object_layer_start_code */
  476|  45.3k|			BitstreamSkip(bs, 1);	/* random_accessible_vol */
  477|       |
  478|  45.3k|            BitstreamSkip(bs, 8);   /* video_object_type_indication */
  479|       |
  480|  45.3k|			if (BitstreamGetBit(bs))	/* is_object_layer_identifier */
  ------------------
  |  Branch (480:8): [True: 14.9k, False: 30.4k]
  ------------------
  481|  14.9k|			{
  482|  14.9k|				DPRINTF(XVID_DEBUG_HEADER, "+ is_object_layer_identifier\n");
  ------------------
  |  |  198|  14.9k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  483|  14.9k|				vol_ver_id = BitstreamGetBits(bs, 4);	/* video_object_layer_verid */
  484|  14.9k|				DPRINTF(XVID_DEBUG_HEADER,"ver_id %i\n", vol_ver_id);
  ------------------
  |  |  198|  14.9k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  485|  14.9k|				BitstreamSkip(bs, 3);	/* video_object_layer_priority */
  486|  30.4k|			} else {
  487|  30.4k|				vol_ver_id = dec->ver_id;
  488|  30.4k|			}
  489|       |
  490|  45.3k|			dec->aspect_ratio = BitstreamGetBits(bs, 4);
  491|       |
  492|  45.3k|			if (dec->aspect_ratio == VIDOBJLAY_AR_EXTPAR)	/* aspect_ratio_info */
  ------------------
  |  |   84|  45.3k|#define VIDOBJLAY_AR_EXTPAR				15
  ------------------
  |  Branch (492:8): [True: 1.92k, False: 43.4k]
  ------------------
  493|  1.92k|			{
  494|  1.92k|				DPRINTF(XVID_DEBUG_HEADER, "+ aspect_ratio_info\n");
  ------------------
  |  |  198|  1.92k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  495|  1.92k|				dec->par_width = BitstreamGetBits(bs, 8);	/* par_width */
  496|  1.92k|				dec->par_height = BitstreamGetBits(bs, 8);	/* par_height */
  497|  1.92k|			}
  498|       |
  499|  45.3k|			if (BitstreamGetBit(bs))	/* vol_control_parameters */
  ------------------
  |  Branch (499:8): [True: 12.6k, False: 32.7k]
  ------------------
  500|  12.6k|			{
  501|  12.6k|				DPRINTF(XVID_DEBUG_HEADER, "+ vol_control_parameters\n");
  ------------------
  |  |  198|  12.6k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  502|  12.6k|				BitstreamSkip(bs, 2);	/* chroma_format */
  503|  12.6k|				dec->low_delay = BitstreamGetBit(bs);	/* low_delay */
  504|  12.6k|				DPRINTF(XVID_DEBUG_HEADER, "low_delay %i\n", dec->low_delay);
  ------------------
  |  |  198|  12.6k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  505|  12.6k|				if (BitstreamGetBit(bs))	/* vbv_parameters */
  ------------------
  |  Branch (505:9): [True: 1.11k, False: 11.5k]
  ------------------
  506|  1.11k|				{
  507|  1.11k|					unsigned int bitrate;
  508|  1.11k|					unsigned int buffer_size;
  509|  1.11k|					unsigned int occupancy;
  510|       |
  511|  1.11k|					DPRINTF(XVID_DEBUG_HEADER,"+ vbv_parameters\n");
  ------------------
  |  |  198|  1.11k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  512|       |
  513|  1.11k|					bitrate = BitstreamGetBits(bs,15) << 15;	/* first_half_bit_rate */
  514|  1.11k|					READ_MARKER();
  ------------------
  |  |   99|  1.11k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  515|  1.11k|					bitrate |= BitstreamGetBits(bs,15);		/* latter_half_bit_rate */
  516|  1.11k|					READ_MARKER();
  ------------------
  |  |   99|  1.11k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  517|       |
  518|  1.11k|					buffer_size = BitstreamGetBits(bs, 15) << 3;	/* first_half_vbv_buffer_size */
  519|  1.11k|					READ_MARKER();
  ------------------
  |  |   99|  1.11k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  520|  1.11k|					buffer_size |= BitstreamGetBits(bs, 3);		/* latter_half_vbv_buffer_size */
  521|       |
  522|  1.11k|					occupancy = BitstreamGetBits(bs, 11) << 15;	/* first_half_vbv_occupancy */
  523|  1.11k|					READ_MARKER();
  ------------------
  |  |   99|  1.11k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  524|  1.11k|					occupancy |= BitstreamGetBits(bs, 15);	/* latter_half_vbv_occupancy */
  525|  1.11k|					READ_MARKER();
  ------------------
  |  |   99|  1.11k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  526|       |
  527|  1.11k|					DPRINTF(XVID_DEBUG_HEADER,"bitrate %d (unit=400 bps)\n", bitrate);
  ------------------
  |  |  198|  1.11k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  528|  1.11k|					DPRINTF(XVID_DEBUG_HEADER,"buffer_size %d (unit=16384 bits)\n", buffer_size);
  ------------------
  |  |  198|  1.11k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  529|  1.11k|					DPRINTF(XVID_DEBUG_HEADER,"occupancy %d (unit=64 bits)\n", occupancy);
  ------------------
  |  |  198|  1.11k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  530|  1.11k|				}
  531|  32.7k|			}else{
  532|  32.7k|				dec->low_delay = dec->low_delay_default;
  533|  32.7k|			}
  534|       |
  535|  45.3k|			dec->shape = BitstreamGetBits(bs, 2);	/* video_object_layer_shape */
  536|       |
  537|  45.3k|			DPRINTF(XVID_DEBUG_HEADER, "shape %i\n", dec->shape);
  ------------------
  |  |  198|  45.3k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  538|  45.3k|			if (dec->shape != VIDOBJLAY_SHAPE_RECTANGULAR)
  ------------------
  |  |   87|  45.3k|#define VIDOBJLAY_SHAPE_RECTANGULAR		0
  ------------------
  |  Branch (538:8): [True: 17.5k, False: 27.8k]
  ------------------
  539|  17.5k|			{
  540|  17.5k|				DPRINTF(XVID_DEBUG_ERROR,"non-rectangular shapes are not supported\n");
  ------------------
  |  |  196|  17.5k|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
  541|  17.5k|			}
  542|       |
  543|  45.3k|			if (dec->shape == VIDOBJLAY_SHAPE_GRAYSCALE && vol_ver_id != 1) {
  ------------------
  |  |   90|  90.7k|#define VIDOBJLAY_SHAPE_GRAYSCALE		3
  ------------------
  |  Branch (543:8): [True: 5.87k, False: 39.5k]
  |  Branch (543:51): [True: 1.29k, False: 4.57k]
  ------------------
  544|  1.29k|				BitstreamSkip(bs, 4);	/* video_object_layer_shape_extension */
  545|  1.29k|			}
  546|       |
  547|  45.3k|			READ_MARKER();
  ------------------
  |  |   99|  45.3k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  548|       |
  549|       |			/********************** for decode B-frame time ***********************/
  550|  45.3k|			dec->time_inc_resolution = BitstreamGetBits(bs, 16);	/* vop_time_increment_resolution */
  551|  45.3k|			DPRINTF(XVID_DEBUG_HEADER,"vop_time_increment_resolution %i\n", dec->time_inc_resolution);
  ------------------
  |  |  198|  45.3k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  552|       |
  553|  45.3k|			if (dec->time_inc_resolution > 0) {
  ------------------
  |  Branch (553:8): [True: 32.1k, False: 13.2k]
  ------------------
  554|  32.1k|				dec->time_inc_bits = MAX(log2bin(dec->time_inc_resolution-1), 1);
  ------------------
  |  |  258|  32.1k|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (258:20): [True: 25.2k, False: 6.90k]
  |  |  ------------------
  ------------------
  555|  32.1k|			} else {
  556|       |				/* for "old" xvid compatibility, set time_inc_bits = 1 */
  557|  13.2k|				dec->time_inc_bits = 1;
  558|  13.2k|			}
  559|       |
  560|  45.3k|			READ_MARKER();
  ------------------
  |  |   99|  45.3k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  561|       |
  562|  45.3k|			if (BitstreamGetBit(bs))	/* fixed_vop_rate */
  ------------------
  |  Branch (562:8): [True: 10.2k, False: 35.1k]
  ------------------
  563|  10.2k|			{
  564|  10.2k|				DPRINTF(XVID_DEBUG_HEADER, "+ fixed_vop_rate\n");
  ------------------
  |  |  198|  10.2k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  565|  10.2k|				BitstreamSkip(bs, dec->time_inc_bits);	/* fixed_vop_time_increment */
  566|  10.2k|			}
  567|       |
  568|  45.3k|			if (dec->shape != VIDOBJLAY_SHAPE_BINARY_ONLY) {
  ------------------
  |  |   89|  45.3k|#define VIDOBJLAY_SHAPE_BINARY_ONLY		2
  ------------------
  |  Branch (568:8): [True: 41.4k, False: 3.95k]
  ------------------
  569|       |
  570|  41.4k|				if (dec->shape == VIDOBJLAY_SHAPE_RECTANGULAR) {
  ------------------
  |  |   87|  41.4k|#define VIDOBJLAY_SHAPE_RECTANGULAR		0
  ------------------
  |  Branch (570:9): [True: 27.8k, False: 13.5k]
  ------------------
  571|  27.8k|					READ_MARKER();
  ------------------
  |  |   99|  27.8k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  572|  27.8k|					width = BitstreamGetBits(bs, 13);	/* video_object_layer_width */
  573|  27.8k|					READ_MARKER();
  ------------------
  |  |   99|  27.8k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  574|  27.8k|					height = BitstreamGetBits(bs, 13);	/* video_object_layer_height */
  575|  27.8k|					READ_MARKER();
  ------------------
  |  |   99|  27.8k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  576|       |
  577|  27.8k|					DPRINTF(XVID_DEBUG_HEADER, "width %i\n", width);
  ------------------
  |  |  198|  27.8k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  578|  27.8k|					DPRINTF(XVID_DEBUG_HEADER, "height %i\n", height);
  ------------------
  |  |  198|  27.8k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  579|  27.8k|				}
  580|       |
  581|  41.4k|				dec->interlacing = BitstreamGetBit(bs);
  582|  41.4k|				DPRINTF(XVID_DEBUG_HEADER, "interlacing %i\n", dec->interlacing);
  ------------------
  |  |  198|  41.4k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  583|       |
  584|  41.4k|				if (!BitstreamGetBit(bs))	/* obmc_disable */
  ------------------
  |  Branch (584:9): [True: 28.1k, False: 13.2k]
  ------------------
  585|  28.1k|				{
  586|  28.1k|					DPRINTF(XVID_DEBUG_ERROR, "obmc_disabled==false not supported\n");
  ------------------
  |  |  196|  28.1k|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
  587|       |					/* TODO */
  588|       |					/* fucking divx4.02 has this enabled */
  589|  28.1k|				}
  590|       |
  591|  41.4k|				dec->sprite_enable = BitstreamGetBits(bs, (vol_ver_id == 1 ? 1 : 2));	/* sprite_enable */
  ------------------
  |  Branch (591:48): [True: 27.2k, False: 14.1k]
  ------------------
  592|       |
  593|  41.4k|				if (dec->sprite_enable == SPRITE_STATIC || dec->sprite_enable == SPRITE_GMC)
  ------------------
  |  |   94|  82.8k|#define SPRITE_STATIC	1
  ------------------
              				if (dec->sprite_enable == SPRITE_STATIC || dec->sprite_enable == SPRITE_GMC)
  ------------------
  |  |   95|  35.4k|#define SPRITE_GMC		2
  ------------------
  |  Branch (593:9): [True: 6.00k, False: 35.4k]
  |  Branch (593:48): [True: 4.76k, False: 30.6k]
  ------------------
  594|  10.7k|				{
  595|  10.7k|					int low_latency_sprite_enable;
  596|       |
  597|  10.7k|					if (dec->sprite_enable != SPRITE_GMC)
  ------------------
  |  |   95|  10.7k|#define SPRITE_GMC		2
  ------------------
  |  Branch (597:10): [True: 6.00k, False: 4.76k]
  ------------------
  598|  6.00k|					{
  599|  6.00k|						int sprite_width;
  600|  6.00k|						int sprite_height;
  601|  6.00k|						int sprite_left_coord;
  602|  6.00k|						int sprite_top_coord;
  603|  6.00k|						sprite_width = BitstreamGetBits(bs, 13);		/* sprite_width */
  604|  6.00k|						READ_MARKER();
  ------------------
  |  |   99|  6.00k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  605|  6.00k|						sprite_height = BitstreamGetBits(bs, 13);	/* sprite_height */
  606|  6.00k|						READ_MARKER();
  ------------------
  |  |   99|  6.00k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  607|  6.00k|						sprite_left_coord = BitstreamGetBits(bs, 13);	/* sprite_left_coordinate */
  608|  6.00k|						READ_MARKER();
  ------------------
  |  |   99|  6.00k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  609|  6.00k|						sprite_top_coord = BitstreamGetBits(bs, 13);	/* sprite_top_coordinate */
  610|  6.00k|						READ_MARKER();
  ------------------
  |  |   99|  6.00k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  611|  6.00k|					}
  612|  10.7k|					dec->sprite_warping_points = BitstreamGetBits(bs, 6);		/* no_of_sprite_warping_points */
  613|  10.7k|					dec->sprite_warping_accuracy = BitstreamGetBits(bs, 2);		/* sprite_warping_accuracy */
  614|  10.7k|					dec->sprite_brightness_change = BitstreamGetBits(bs, 1);		/* brightness_change */
  615|  10.7k|					if (dec->sprite_enable != SPRITE_GMC)
  ------------------
  |  |   95|  10.7k|#define SPRITE_GMC		2
  ------------------
  |  Branch (615:10): [True: 6.00k, False: 4.76k]
  ------------------
  616|  6.00k|					{
  617|  6.00k|						low_latency_sprite_enable = BitstreamGetBits(bs, 1);		/* low_latency_sprite_enable */
  618|  6.00k|					}
  619|  10.7k|				}
  620|       |
  621|  41.4k|				if (vol_ver_id != 1 &&
  ------------------
  |  Branch (621:9): [True: 14.1k, False: 27.2k]
  ------------------
  622|  41.4k|					dec->shape != VIDOBJLAY_SHAPE_RECTANGULAR) {
  ------------------
  |  |   87|  14.1k|#define VIDOBJLAY_SHAPE_RECTANGULAR		0
  ------------------
  |  Branch (622:6): [True: 2.83k, False: 11.3k]
  ------------------
  623|  2.83k|					BitstreamSkip(bs, 1);	/* sadct_disable */
  624|  2.83k|				}
  625|       |
  626|  41.4k|				if (BitstreamGetBit(bs))	/* not_8_bit */
  ------------------
  |  Branch (626:9): [True: 11.9k, False: 29.4k]
  ------------------
  627|  11.9k|				{
  628|  11.9k|					DPRINTF(XVID_DEBUG_HEADER, "not_8_bit==true (ignored)\n");
  ------------------
  |  |  198|  11.9k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  629|  11.9k|					dec->quant_bits = BitstreamGetBits(bs, 4);	/* quant_precision */
  630|  11.9k|					BitstreamSkip(bs, 4);	/* bits_per_pixel */
  631|  29.4k|				} else {
  632|  29.4k|					dec->quant_bits = 5;
  633|  29.4k|				}
  634|       |
  635|  41.4k|				if (dec->shape == VIDOBJLAY_SHAPE_GRAYSCALE) {
  ------------------
  |  |   90|  41.4k|#define VIDOBJLAY_SHAPE_GRAYSCALE		3
  ------------------
  |  Branch (635:9): [True: 5.87k, False: 35.5k]
  ------------------
  636|  5.87k|					BitstreamSkip(bs, 1);	/* no_gray_quant_update */
  637|  5.87k|					BitstreamSkip(bs, 1);	/* composition_method */
  638|  5.87k|					BitstreamSkip(bs, 1);	/* linear_composition */
  639|  5.87k|				}
  640|       |
  641|  41.4k|				dec->quant_type = BitstreamGetBit(bs);	/* quant_type */
  642|  41.4k|				DPRINTF(XVID_DEBUG_HEADER, "quant_type %i\n", dec->quant_type);
  ------------------
  |  |  198|  41.4k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  643|       |
  644|  41.4k|				if (dec->quant_type) {
  ------------------
  |  Branch (644:9): [True: 12.0k, False: 29.3k]
  ------------------
  645|  12.0k|					if (BitstreamGetBit(bs))	/* load_intra_quant_mat */
  ------------------
  |  Branch (645:10): [True: 3.11k, False: 8.96k]
  ------------------
  646|  3.11k|					{
  647|  3.11k|						uint8_t matrix[64];
  648|       |
  649|  3.11k|						DPRINTF(XVID_DEBUG_HEADER, "load_intra_quant_mat\n");
  ------------------
  |  |  198|  3.11k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  650|       |
  651|  3.11k|						bs_get_matrix(bs, matrix);
  652|  3.11k|						set_intra_matrix(dec->mpeg_quant_matrices, matrix);
  653|  3.11k|					} else
  654|  8.96k|						set_intra_matrix(dec->mpeg_quant_matrices, get_default_intra_matrix());
  655|       |
  656|  12.0k|					if (BitstreamGetBit(bs))	/* load_inter_quant_mat */
  ------------------
  |  Branch (656:10): [True: 1.30k, False: 10.7k]
  ------------------
  657|  1.30k|					{
  658|  1.30k|						uint8_t matrix[64];
  659|       |
  660|  1.30k|						DPRINTF(XVID_DEBUG_HEADER, "load_inter_quant_mat\n");
  ------------------
  |  |  198|  1.30k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  661|       |
  662|  1.30k|						bs_get_matrix(bs, matrix);
  663|  1.30k|						set_inter_matrix(dec->mpeg_quant_matrices, matrix);
  664|  1.30k|					} else
  665|  10.7k|						set_inter_matrix(dec->mpeg_quant_matrices, get_default_inter_matrix());
  666|       |
  667|  12.0k|					if (dec->shape == VIDOBJLAY_SHAPE_GRAYSCALE) {
  ------------------
  |  |   90|  12.0k|#define VIDOBJLAY_SHAPE_GRAYSCALE		3
  ------------------
  |  Branch (667:10): [True: 2.37k, False: 9.70k]
  ------------------
  668|  2.37k|						DPRINTF(XVID_DEBUG_ERROR, "greyscale matrix not supported\n");
  ------------------
  |  |  196|  2.37k|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
  669|  2.37k|						return -1;
  670|  2.37k|					}
  671|       |
  672|  12.0k|				}
  673|       |
  674|       |
  675|  39.0k|				if (vol_ver_id != 1) {
  ------------------
  |  Branch (675:9): [True: 13.9k, False: 25.1k]
  ------------------
  676|  13.9k|					dec->quarterpel = BitstreamGetBit(bs);	/* quarter_sample */
  677|  13.9k|					DPRINTF(XVID_DEBUG_HEADER,"quarterpel %i\n", dec->quarterpel);
  ------------------
  |  |  198|  13.9k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  678|  13.9k|				}
  679|  25.1k|				else
  680|  25.1k|					dec->quarterpel = 0;
  681|       |
  682|       |
  683|  39.0k|				dec->complexity_estimation_disable = BitstreamGetBit(bs);	/* complexity estimation disable */
  684|  39.0k|				if (!dec->complexity_estimation_disable)
  ------------------
  |  Branch (684:9): [True: 25.6k, False: 13.3k]
  ------------------
  685|  25.6k|				{
  686|  25.6k|					read_vol_complexity_estimation_header(bs, dec);
  687|  25.6k|				}
  688|       |
  689|  39.0k|				BitstreamSkip(bs, 1);	/* resync_marker_disable */
  690|       |
  691|  39.0k|				if (BitstreamGetBit(bs))	/* data_partitioned */
  ------------------
  |  Branch (691:9): [True: 11.7k, False: 27.2k]
  ------------------
  692|  11.7k|				{
  693|  11.7k|					DPRINTF(XVID_DEBUG_ERROR, "data_partitioned not supported\n");
  ------------------
  |  |  196|  11.7k|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
  694|  11.7k|					BitstreamSkip(bs, 1);	/* reversible_vlc */
  695|  11.7k|				}
  696|       |
  697|  39.0k|				if (vol_ver_id != 1) {
  ------------------
  |  Branch (697:9): [True: 13.9k, False: 25.1k]
  ------------------
  698|  13.9k|					dec->newpred_enable = BitstreamGetBit(bs);
  699|  13.9k|					if (dec->newpred_enable)	/* newpred_enable */
  ------------------
  |  Branch (699:10): [True: 4.02k, False: 9.91k]
  ------------------
  700|  4.02k|					{
  701|  4.02k|						DPRINTF(XVID_DEBUG_HEADER, "+ newpred_enable\n");
  ------------------
  |  |  198|  4.02k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  702|  4.02k|						BitstreamSkip(bs, 2);	/* requested_upstream_message_type */
  703|  4.02k|						BitstreamSkip(bs, 1);	/* newpred_segment_type */
  704|  4.02k|					}
  705|  13.9k|					dec->reduced_resolution_enable = BitstreamGetBit(bs);	/* reduced_resolution_vop_enable */
  706|  13.9k|					DPRINTF(XVID_DEBUG_HEADER, "reduced_resolution_enable %i\n", dec->reduced_resolution_enable);
  ------------------
  |  |  198|  13.9k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  707|  13.9k|				}
  708|  25.1k|				else
  709|  25.1k|				{
  710|  25.1k|					dec->newpred_enable = 0;
  711|  25.1k|					dec->reduced_resolution_enable = 0;
  712|  25.1k|				}
  713|       |
  714|  39.0k|				dec->scalability = BitstreamGetBit(bs);	/* scalability */
  715|  39.0k|				if (dec->scalability)
  ------------------
  |  Branch (715:9): [True: 8.14k, False: 30.9k]
  ------------------
  716|  8.14k|				{
  717|  8.14k|					DPRINTF(XVID_DEBUG_ERROR, "scalability not supported\n");
  ------------------
  |  |  196|  8.14k|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
  718|  8.14k|					BitstreamSkip(bs, 1);	/* hierarchy_type */
  719|  8.14k|					BitstreamSkip(bs, 4);	/* ref_layer_id */
  720|  8.14k|					BitstreamSkip(bs, 1);	/* ref_layer_sampling_direc */
  721|  8.14k|					BitstreamSkip(bs, 5);	/* hor_sampling_factor_n */
  722|  8.14k|					BitstreamSkip(bs, 5);	/* hor_sampling_factor_m */
  723|  8.14k|					BitstreamSkip(bs, 5);	/* vert_sampling_factor_n */
  724|  8.14k|					BitstreamSkip(bs, 5);	/* vert_sampling_factor_m */
  725|  8.14k|					BitstreamSkip(bs, 1);	/* enhancement_type */
  726|  8.14k|					if(dec->shape == VIDOBJLAY_SHAPE_BINARY /* && hierarchy_type==0 */) {
  ------------------
  |  |   88|  8.14k|#define VIDOBJLAY_SHAPE_BINARY			1
  ------------------
  |  Branch (726:9): [True: 1.66k, False: 6.48k]
  ------------------
  727|  1.66k|						BitstreamSkip(bs, 1);	/* use_ref_shape */
  728|  1.66k|						BitstreamSkip(bs, 1);	/* use_ref_texture */
  729|  1.66k|						BitstreamSkip(bs, 5);	/* shape_hor_sampling_factor_n */
  730|  1.66k|						BitstreamSkip(bs, 5);	/* shape_hor_sampling_factor_m */
  731|  1.66k|						BitstreamSkip(bs, 5);	/* shape_vert_sampling_factor_n */
  732|  1.66k|						BitstreamSkip(bs, 5);	/* shape_vert_sampling_factor_m */
  733|  1.66k|					}
  734|  8.14k|					return -1;
  735|  8.14k|				}
  736|  39.0k|			} else				/* dec->shape == BINARY_ONLY */
  737|  3.95k|			{
  738|  3.95k|				if (vol_ver_id != 1) {
  ------------------
  |  Branch (738:9): [True: 2.02k, False: 1.93k]
  ------------------
  739|  2.02k|					dec->scalability = BitstreamGetBit(bs); /* scalability */
  740|  2.02k|					if (dec->scalability)
  ------------------
  |  Branch (740:10): [True: 333, False: 1.69k]
  ------------------
  741|    333|					{
  742|    333|						DPRINTF(XVID_DEBUG_ERROR, "scalability not supported\n");
  ------------------
  |  |  196|    333|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
  743|    333|						BitstreamSkip(bs, 4);	/* ref_layer_id */
  744|    333|						BitstreamSkip(bs, 5);	/* hor_sampling_factor_n */
  745|    333|						BitstreamSkip(bs, 5);	/* hor_sampling_factor_m */
  746|    333|						BitstreamSkip(bs, 5);	/* vert_sampling_factor_n */
  747|    333|						BitstreamSkip(bs, 5);	/* vert_sampling_factor_m */
  748|    333|						return -1;
  749|    333|					}
  750|  2.02k|				}
  751|  3.62k|				BitstreamSkip(bs, 1);	/* resync_marker_disable */
  752|       |
  753|  3.62k|			}
  754|       |
  755|  34.5k|			if (((width > 0) && (height > 0)) && (dec->width != width || dec->height != height))
  ------------------
  |  Branch (755:9): [True: 16.4k, False: 18.1k]
  |  Branch (755:24): [True: 14.8k, False: 1.58k]
  |  Branch (755:42): [True: 12.4k, False: 2.44k]
  |  Branch (755:65): [True: 528, False: 1.91k]
  ------------------
  756|  12.9k|			{
  757|  12.9k|				if (dec->fixed_dimensions)
  ------------------
  |  Branch (757:9): [True: 0, False: 12.9k]
  ------------------
  758|      0|				{
  759|      0|					DPRINTF(XVID_DEBUG_ERROR, "decoder width/height does not match bitstream\n");
  ------------------
  |  |  196|      0|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
  760|      0|					return -1;
  761|      0|				}
  762|  12.9k|				dec->width = width;
  763|  12.9k|				dec->height = height;
  764|  12.9k|				return -3;
  765|  12.9k|			}
  766|       |
  767|  21.6k|			return -2;	/* VOL */
  768|       |
  769|  22.7M|		} else if (start_code == GRPOFVOP_START_CODE) {
  ------------------
  |  |   45|  22.7M|#define GRPOFVOP_START_CODE		0x000001b3
  ------------------
  |  Branch (769:14): [True: 12.7k, False: 22.7M]
  ------------------
  770|       |
  771|  12.7k|			DPRINTF(XVID_DEBUG_STARTCODE, "<group_of_vop>\n");
  ------------------
  |  |  197|  12.7k|#define XVID_DEBUG_STARTCODE (1<< 1)
  ------------------
  772|       |
  773|  12.7k|			BitstreamSkip(bs, 32);
  774|  12.7k|			{
  775|  12.7k|				int hours, minutes, seconds;
  776|       |
  777|  12.7k|				hours = BitstreamGetBits(bs, 5);
  778|  12.7k|				minutes = BitstreamGetBits(bs, 6);
  779|  12.7k|				READ_MARKER();
  ------------------
  |  |   99|  12.7k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  780|  12.7k|				seconds = BitstreamGetBits(bs, 6);
  781|       |
  782|  12.7k|				DPRINTF(XVID_DEBUG_HEADER, "time %ih%im%is\n", hours,minutes,seconds);
  ------------------
  |  |  198|  12.7k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  783|  12.7k|			}
  784|  12.7k|			BitstreamSkip(bs, 1);	/* closed_gov */
  785|  12.7k|			BitstreamSkip(bs, 1);	/* broken_link */
  786|       |
  787|  22.7M|		} else if (start_code == VOP_START_CODE) {
  ------------------
  |  |   48|  22.7M|#define VOP_START_CODE			0x000001b6
  ------------------
  |  Branch (787:14): [True: 1.81M, False: 20.9M]
  ------------------
  788|       |
  789|  1.81M|			DPRINTF(XVID_DEBUG_STARTCODE, "<vop>\n");
  ------------------
  |  |  197|  1.81M|#define XVID_DEBUG_STARTCODE (1<< 1)
  ------------------
  790|       |
  791|  1.81M|			BitstreamSkip(bs, 32);	/* vop_start_code */
  792|       |
  793|  1.81M|			coding_type = BitstreamGetBits(bs, 2);	/* vop_coding_type */
  794|  1.81M|			DPRINTF(XVID_DEBUG_HEADER, "coding_type %i\n", coding_type);
  ------------------
  |  |  198|  1.81M|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  795|       |
  796|       |			/*********************** for decode B-frame time ***********************/
  797|  11.8M|			while (BitstreamGetBit(bs) != 0)	/* time_base */
  ------------------
  |  Branch (797:11): [True: 10.0M, False: 1.81M]
  ------------------
  798|  10.0M|				time_incr++;
  799|       |
  800|  1.81M|			READ_MARKER();
  ------------------
  |  |   99|  1.81M|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  801|       |
  802|  1.81M|			if (dec->time_inc_bits) {
  ------------------
  |  Branch (802:8): [True: 1.81M, False: 1.56k]
  ------------------
  803|  1.81M|				time_increment = (BitstreamGetBits(bs, dec->time_inc_bits));	/* vop_time_increment */
  804|  1.81M|			}
  805|       |
  806|  1.81M|			DPRINTF(XVID_DEBUG_HEADER, "time_base %i\n", time_incr);
  ------------------
  |  |  198|  1.81M|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  807|  1.81M|			DPRINTF(XVID_DEBUG_HEADER, "time_increment %i\n", time_increment);
  ------------------
  |  |  198|  1.81M|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  808|       |
  809|  1.81M|			DPRINTF(XVID_DEBUG_TIMECODE, "%c %i:%i\n",
  ------------------
  |  |  199|  1.81M|#define XVID_DEBUG_TIMECODE  (1<< 3)
  ------------------
  810|  1.81M|				coding_type == I_VOP ? 'I' : coding_type == P_VOP ? 'P' : coding_type == B_VOP ? 'B' : 'S',
  ------------------
  |  |  104|  1.81M|#define I_VOP	0
  ------------------
              				coding_type == I_VOP ? 'I' : coding_type == P_VOP ? 'P' : coding_type == B_VOP ? 'B' : 'S',
  ------------------
  |  |  105|  1.75M|#define P_VOP	1
  ------------------
              				coding_type == I_VOP ? 'I' : coding_type == P_VOP ? 'P' : coding_type == B_VOP ? 'B' : 'S',
  ------------------
  |  |  106|  1.68M|#define B_VOP	2
  ------------------
  |  Branch (810:5): [True: 63.6k, False: 1.75M]
  |  Branch (810:34): [True: 69.3k, False: 1.68M]
  |  Branch (810:63): [True: 172k, False: 1.51M]
  ------------------
  811|  1.81M|				time_incr, time_increment);
  812|       |
  813|  1.81M|			if (coding_type != B_VOP) {
  ------------------
  |  |  106|  1.81M|#define B_VOP	2
  ------------------
  |  Branch (813:8): [True: 1.64M, False: 172k]
  ------------------
  814|  1.64M|				dec->last_time_base = dec->time_base;
  815|  1.64M|				dec->time_base += time_incr;
  816|  1.64M|				dec->time = dec->time_base*dec->time_inc_resolution + time_increment;
  817|  1.64M|				dec->time_pp = (int32_t)(dec->time - dec->last_non_b_time);
  818|  1.64M|                dec->last_non_b_time = dec->time;
  819|  1.64M|			} else {
  820|   172k|                dec->time = (dec->last_time_base + time_incr)*dec->time_inc_resolution + time_increment;
  821|   172k|				dec->time_bp = dec->time_pp - (int32_t)(dec->last_non_b_time - dec->time);
  822|   172k|			}
  823|  1.81M|            if (dec->time_pp <= 0) dec->time_pp = 1;
  ------------------
  |  Branch (823:17): [True: 614k, False: 1.20M]
  ------------------
  824|  1.81M|			DPRINTF(XVID_DEBUG_HEADER,"time_pp=%i\n", dec->time_pp);
  ------------------
  |  |  198|  1.81M|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  825|  1.81M|			DPRINTF(XVID_DEBUG_HEADER,"time_bp=%i\n", dec->time_bp);
  ------------------
  |  |  198|  1.81M|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  826|       |
  827|  1.81M|			READ_MARKER();
  ------------------
  |  |   99|  1.81M|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  828|       |
  829|  1.81M|			if (!BitstreamGetBit(bs))	/* vop_coded */
  ------------------
  |  Branch (829:8): [True: 218k, False: 1.59M]
  ------------------
  830|   218k|			{
  831|   218k|				DPRINTF(XVID_DEBUG_HEADER, "vop_coded==false\n");
  ------------------
  |  |  198|   218k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  832|   218k|				return N_VOP;
  ------------------
  |  |  108|   218k|#define N_VOP	4
  ------------------
  833|   218k|			}
  834|       |
  835|  1.59M|			if (dec->newpred_enable)
  ------------------
  |  Branch (835:8): [True: 616k, False: 980k]
  ------------------
  836|   616k|			{
  837|   616k|				int vop_id;
  838|   616k|				int vop_id_for_prediction;
  839|       |
  840|   616k|				vop_id = BitstreamGetBits(bs, MIN(dec->time_inc_bits + 3, 15));
  ------------------
  |  |  255|   616k|#define MIN(X, Y) ((X)<(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (255:20): [True: 488k, False: 127k]
  |  |  ------------------
  ------------------
  841|   616k|				DPRINTF(XVID_DEBUG_HEADER, "vop_id %i\n", vop_id);
  ------------------
  |  |  198|   616k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  842|   616k|				if (BitstreamGetBit(bs))	/* vop_id_for_prediction_indication */
  ------------------
  |  Branch (842:9): [True: 67.5k, False: 549k]
  ------------------
  843|  67.5k|				{
  844|  67.5k|					vop_id_for_prediction = BitstreamGetBits(bs, MIN(dec->time_inc_bits + 3, 15));
  ------------------
  |  |  255|  67.5k|#define MIN(X, Y) ((X)<(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (255:20): [True: 65.5k, False: 1.95k]
  |  |  ------------------
  ------------------
  845|  67.5k|					DPRINTF(XVID_DEBUG_HEADER, "vop_id_for_prediction %i\n", vop_id_for_prediction);
  ------------------
  |  |  198|  67.5k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  846|  67.5k|				}
  847|   616k|				READ_MARKER();
  ------------------
  |  |   99|   616k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  848|   616k|			}
  849|       |
  850|       |
  851|       |
  852|       |			/* fix a little bug by MinChen <chenm002@163.com> */
  853|  1.59M|			if ((dec->shape != VIDOBJLAY_SHAPE_BINARY_ONLY) &&
  ------------------
  |  |   89|  1.59M|#define VIDOBJLAY_SHAPE_BINARY_ONLY		2
  ------------------
  |  Branch (853:8): [True: 1.54M, False: 49.5k]
  ------------------
  854|  1.59M|				( (coding_type == P_VOP) || (coding_type == S_VOP && dec->sprite_enable == SPRITE_GMC) ) ) {
  ------------------
  |  |  105|  1.54M|#define P_VOP	1
  ------------------
              				( (coding_type == P_VOP) || (coding_type == S_VOP && dec->sprite_enable == SPRITE_GMC) ) ) {
  ------------------
  |  |  107|  3.00M|#define S_VOP	3
  ------------------
              				( (coding_type == P_VOP) || (coding_type == S_VOP && dec->sprite_enable == SPRITE_GMC) ) ) {
  ------------------
  |  |   95|  1.36M|#define SPRITE_GMC		2
  ------------------
  |  Branch (854:7): [True: 45.1k, False: 1.50M]
  |  Branch (854:34): [True: 1.36M, False: 137k]
  |  Branch (854:58): [True: 416k, False: 948k]
  ------------------
  855|   461k|				*rounding = BitstreamGetBit(bs);	/* rounding_type */
  856|   461k|				DPRINTF(XVID_DEBUG_HEADER, "rounding %i\n", *rounding);
  ------------------
  |  |  198|   461k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  857|   461k|			}
  858|       |
  859|  1.59M|			if (dec->reduced_resolution_enable &&
  ------------------
  |  Branch (859:8): [True: 691k, False: 906k]
  ------------------
  860|  1.59M|				dec->shape == VIDOBJLAY_SHAPE_RECTANGULAR &&
  ------------------
  |  |   87|  2.28M|#define VIDOBJLAY_SHAPE_RECTANGULAR		0
  ------------------
  |  Branch (860:5): [True: 134k, False: 557k]
  ------------------
  861|  1.59M|				(coding_type == P_VOP || coding_type == I_VOP)) {
  ------------------
  |  |  105|   268k|#define P_VOP	1
  ------------------
              				(coding_type == P_VOP || coding_type == I_VOP)) {
  ------------------
  |  |  104|   128k|#define I_VOP	0
  ------------------
  |  Branch (861:6): [True: 5.05k, False: 128k]
  |  Branch (861:30): [True: 2.08k, False: 126k]
  ------------------
  862|       |
  863|  7.14k|				if (BitstreamGetBit(bs)) {
  ------------------
  |  Branch (863:9): [True: 1.55k, False: 5.58k]
  ------------------
  864|  1.55k|					DPRINTF(XVID_DEBUG_ERROR, "RRV not supported (anymore)\n");
  ------------------
  |  |  196|  1.55k|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
  865|  1.55k|				}
  866|  7.14k|			}
  867|       |
  868|  1.59M|			if (dec->shape != VIDOBJLAY_SHAPE_RECTANGULAR) {
  ------------------
  |  |   87|  1.59M|#define VIDOBJLAY_SHAPE_RECTANGULAR		0
  ------------------
  |  Branch (868:8): [True: 869k, False: 728k]
  ------------------
  869|   869k|				if(!(dec->sprite_enable == SPRITE_STATIC && coding_type == I_VOP)) {
  ------------------
  |  |   94|  1.73M|#define SPRITE_STATIC	1
  ------------------
              				if(!(dec->sprite_enable == SPRITE_STATIC && coding_type == I_VOP)) {
  ------------------
  |  |  104|   262k|#define I_VOP	0
  ------------------
  |  Branch (869:10): [True: 262k, False: 607k]
  |  Branch (869:49): [True: 5.17k, False: 256k]
  ------------------
  870|       |
  871|   864k|					uint32_t width, height;
  872|   864k|					uint32_t horiz_mc_ref, vert_mc_ref;
  873|       |
  874|   864k|					width = BitstreamGetBits(bs, 13);
  875|   864k|					READ_MARKER();
  ------------------
  |  |   99|   864k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  876|   864k|					height = BitstreamGetBits(bs, 13);
  877|   864k|					READ_MARKER();
  ------------------
  |  |   99|   864k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  878|   864k|					horiz_mc_ref = BitstreamGetBits(bs, 13);
  879|   864k|					READ_MARKER();
  ------------------
  |  |   99|   864k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  880|   864k|					vert_mc_ref = BitstreamGetBits(bs, 13);
  881|   864k|					READ_MARKER();
  ------------------
  |  |   99|   864k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  882|       |
  883|   864k|					DPRINTF(XVID_DEBUG_HEADER, "width %i\n", width);
  ------------------
  |  |  198|   864k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  884|   864k|					DPRINTF(XVID_DEBUG_HEADER, "height %i\n", height);
  ------------------
  |  |  198|   864k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  885|   864k|					DPRINTF(XVID_DEBUG_HEADER, "horiz_mc_ref %i\n", horiz_mc_ref);
  ------------------
  |  |  198|   864k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  886|   864k|					DPRINTF(XVID_DEBUG_HEADER, "vert_mc_ref %i\n", vert_mc_ref);
  ------------------
  |  |  198|   864k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  887|   864k|				}
  888|       |
  889|   869k|				BitstreamSkip(bs, 1);	/* change_conv_ratio_disable */
  890|   869k|				if (BitstreamGetBit(bs))	/* vop_constant_alpha */
  ------------------
  |  Branch (890:9): [True: 550k, False: 319k]
  ------------------
  891|   550k|				{
  892|   550k|					BitstreamSkip(bs, 8);	/* vop_constant_alpha_value */
  893|   550k|				}
  894|   869k|			}
  895|       |
  896|  1.59M|			if (dec->shape != VIDOBJLAY_SHAPE_BINARY_ONLY) {
  ------------------
  |  |   89|  1.59M|#define VIDOBJLAY_SHAPE_BINARY_ONLY		2
  ------------------
  |  Branch (896:8): [True: 1.54M, False: 49.5k]
  ------------------
  897|       |
  898|  1.54M|				if (!dec->complexity_estimation_disable)
  ------------------
  |  Branch (898:9): [True: 1.36M, False: 184k]
  ------------------
  899|  1.36M|				{
  900|  1.36M|					read_vop_complexity_estimation_header(bs, dec, coding_type);
  901|  1.36M|				}
  902|       |
  903|       |				/* intra_dc_vlc_threshold */
  904|  1.54M|				*intra_dc_threshold =
  905|  1.54M|					intra_dc_threshold_table[BitstreamGetBits(bs, 3)];
  906|       |
  907|  1.54M|				dec->top_field_first = 0;
  908|  1.54M|				dec->alternate_vertical_scan = 0;
  909|       |
  910|  1.54M|				if (dec->interlacing) {
  ------------------
  |  Branch (910:9): [True: 769k, False: 778k]
  ------------------
  911|   769k|					dec->top_field_first = BitstreamGetBit(bs);
  912|   769k|					DPRINTF(XVID_DEBUG_HEADER, "interlace top_field_first %i\n", dec->top_field_first);
  ------------------
  |  |  198|   769k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  913|   769k|					dec->alternate_vertical_scan = BitstreamGetBit(bs);
  914|   769k|					DPRINTF(XVID_DEBUG_HEADER, "interlace alternate_vertical_scan %i\n", dec->alternate_vertical_scan);
  ------------------
  |  |  198|   769k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  915|       |
  916|   769k|				}
  917|  1.54M|			}
  918|       |
  919|  1.59M|			if ((dec->sprite_enable == SPRITE_STATIC || dec->sprite_enable== SPRITE_GMC) && coding_type == S_VOP) {
  ------------------
  |  |   94|  3.19M|#define SPRITE_STATIC	1
  ------------------
              			if ((dec->sprite_enable == SPRITE_STATIC || dec->sprite_enable== SPRITE_GMC) && coding_type == S_VOP) {
  ------------------
  |  |   95|  1.00M|#define SPRITE_GMC		2
  ------------------
              			if ((dec->sprite_enable == SPRITE_STATIC || dec->sprite_enable== SPRITE_GMC) && coding_type == S_VOP) {
  ------------------
  |  |  107|  1.03M|#define S_VOP	3
  ------------------
  |  Branch (919:9): [True: 597k, False: 1.00M]
  |  Branch (919:48): [True: 438k, False: 561k]
  |  Branch (919:84): [True: 932k, False: 102k]
  ------------------
  920|       |
  921|   932k|				int i;
  922|       |
  923|  2.74M|				for (i = 0 ; i < MIN(4, dec->sprite_warping_points); i++)
  ------------------
  |  |  255|  2.74M|#define MIN(X, Y) ((X)<(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (255:20): [True: 2.19M, False: 542k]
  |  |  ------------------
  ------------------
  |  Branch (923:18): [True: 1.80M, False: 932k]
  ------------------
  924|  1.80M|				{
  925|  1.80M|					int length;
  926|  1.80M|					int x = 0, y = 0;
  927|       |
  928|       |					/* sprite code borowed from ffmpeg; thx Michael Niedermayer <michaelni@gmx.at> */
  929|  1.80M|					length = bs_get_spritetrajectory(bs);
  930|  1.80M|					if(length){
  ------------------
  |  Branch (930:9): [True: 1.17M, False: 628k]
  ------------------
  931|  1.17M|						x= BitstreamGetBits(bs, length);
  932|  1.17M|						if ((x >> (length - 1)) == 0) /* if MSB not set it is negative*/
  ------------------
  |  Branch (932:11): [True: 878k, False: 299k]
  ------------------
  933|   878k|							x = - (x ^ ((1 << length) - 1));
  934|  1.17M|					}
  935|  1.80M|					READ_MARKER();
  ------------------
  |  |   99|  1.80M|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  936|       |
  937|  1.80M|					length = bs_get_spritetrajectory(bs);
  938|  1.80M|					if(length){
  ------------------
  |  Branch (938:9): [True: 1.15M, False: 653k]
  ------------------
  939|  1.15M|						y = BitstreamGetBits(bs, length);
  940|  1.15M|						if ((y >> (length - 1)) == 0) /* if MSB not set it is negative*/
  ------------------
  |  Branch (940:11): [True: 876k, False: 277k]
  ------------------
  941|   876k|							y = - (y ^ ((1 << length) - 1));
  942|  1.15M|					}
  943|  1.80M|					READ_MARKER();
  ------------------
  |  |   99|  1.80M|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  944|       |
  945|  1.80M|					gmc_warp->duv[i].x = x;
  946|  1.80M|					gmc_warp->duv[i].y = y;
  947|       |
  948|  1.80M|					DPRINTF(XVID_DEBUG_HEADER,"sprite_warping_point[%i] xy=(%i,%i)\n", i, x, y);
  ------------------
  |  |  198|  1.80M|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  949|  1.80M|				}
  950|       |
  951|   932k|				if (dec->sprite_brightness_change)
  ------------------
  |  Branch (951:9): [True: 155k, False: 777k]
  ------------------
  952|   155k|				{
  953|       |					/* XXX: brightness_change_factor() */
  954|   155k|				}
  955|   932k|				if (dec->sprite_enable == SPRITE_STATIC)
  ------------------
  |  |   94|   932k|#define SPRITE_STATIC	1
  ------------------
  |  Branch (955:9): [True: 515k, False: 417k]
  ------------------
  956|   515k|				{
  957|       |					/* XXX: todo */
  958|   515k|				}
  959|       |
  960|   932k|			}
  961|       |
  962|  1.59M|			if ((*quant = BitstreamGetBits(bs, dec->quant_bits)) < 1)	/* vop_quant */
  ------------------
  |  Branch (962:8): [True: 694k, False: 903k]
  ------------------
  963|   694k|				*quant = 1;
  964|  1.59M|			DPRINTF(XVID_DEBUG_HEADER, "quant %i\n", *quant);
  ------------------
  |  |  198|  1.59M|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  965|       |
  966|  1.59M|			if (coding_type != I_VOP) {
  ------------------
  |  |  104|  1.59M|#define I_VOP	0
  ------------------
  |  Branch (966:8): [True: 1.56M, False: 27.7k]
  ------------------
  967|  1.56M|				*fcode_forward = BitstreamGetBits(bs, 3);	/* fcode_forward */
  968|  1.56M|				DPRINTF(XVID_DEBUG_HEADER, "fcode_forward %i\n", *fcode_forward);
  ------------------
  |  |  198|  1.56M|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  969|  1.56M|			}
  970|       |
  971|  1.59M|			if (coding_type == B_VOP) {
  ------------------
  |  |  106|  1.59M|#define B_VOP	2
  ------------------
  |  Branch (971:8): [True: 112k, False: 1.48M]
  ------------------
  972|   112k|				*fcode_backward = BitstreamGetBits(bs, 3);	/* fcode_backward */
  973|   112k|				DPRINTF(XVID_DEBUG_HEADER, "fcode_backward %i\n", *fcode_backward);
  ------------------
  |  |  198|   112k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  974|   112k|			}
  975|  1.59M|			if (!dec->scalability) {
  ------------------
  |  Branch (975:8): [True: 509k, False: 1.08M]
  ------------------
  976|   509k|				if ((dec->shape != VIDOBJLAY_SHAPE_RECTANGULAR) &&
  ------------------
  |  |   87|   509k|#define VIDOBJLAY_SHAPE_RECTANGULAR		0
  ------------------
  |  Branch (976:9): [True: 62.2k, False: 446k]
  ------------------
  977|   509k|					(coding_type != I_VOP)) {
  ------------------
  |  |  104|  62.2k|#define I_VOP	0
  ------------------
  |  Branch (977:6): [True: 61.1k, False: 1.08k]
  ------------------
  978|  61.1k|					BitstreamSkip(bs, 1);	/* vop_shape_coding_type */
  979|  61.1k|				}
  980|   509k|			}
  981|  1.59M|			return coding_type;
  982|       |
  983|  20.9M|		} else if (start_code == USERDATA_START_CODE) {
  ------------------
  |  |   44|  20.9M|#define USERDATA_START_CODE		0x000001b2
  ------------------
  |  Branch (983:14): [True: 7.75k, False: 20.9M]
  ------------------
  984|  7.75k|			char tmp[256];
  985|  7.75k|			int i, version = 0, build = 0;
  986|  7.75k|			char packed;
  987|       |
  988|  7.75k|			BitstreamSkip(bs, 32);	/* user_data_start_code */
  989|       |
  990|  7.75k|			memset(tmp, 0, 256);
  991|  7.75k|			tmp[0] = BitstreamShowBits(bs, 8);
  992|       |
  993|  71.5k|			for(i = 1; i < 255; i++){
  ------------------
  |  Branch (993:15): [True: 71.4k, False: 71]
  ------------------
  994|  71.4k|				tmp[i] = (BitstreamShowBits(bs, 16) & 0xFF);
  995|       |
  996|  71.4k|				if(tmp[i] == 0)
  ------------------
  |  Branch (996:8): [True: 7.68k, False: 63.7k]
  ------------------
  997|  7.68k|					break;
  998|       |
  999|  63.7k|				BitstreamSkip(bs, 8);
 1000|  63.7k|			}
 1001|       |
 1002|  7.75k|			DPRINTF(XVID_DEBUG_STARTCODE, "<user_data>: %s\n", tmp);
  ------------------
  |  |  197|  7.75k|#define XVID_DEBUG_STARTCODE (1<< 1)
  ------------------
 1003|       |
 1004|       |			/* read xvid bitstream version */
 1005|  7.75k|			if(strncmp(tmp, "XviD", 4) == 0) {
  ------------------
  |  Branch (1005:7): [True: 2.98k, False: 4.76k]
  ------------------
 1006|  2.98k|				if (tmp[strlen(tmp)-1] == 'C') {				
  ------------------
  |  Branch (1006:9): [True: 286, False: 2.70k]
  ------------------
 1007|    286|					sscanf(tmp, "XviD%dC", &dec->bs_version);
 1008|    286|					dec->cartoon_mode = 1;
 1009|    286|				}
 1010|  2.70k|				else
 1011|  2.70k|					sscanf(tmp, "XviD%d", &dec->bs_version);
 1012|       |
 1013|  2.98k|				DPRINTF(XVID_DEBUG_HEADER, "xvid bitstream version=%i\n", dec->bs_version);
  ------------------
  |  |  198|  2.98k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
 1014|  2.98k|			}
 1015|       |
 1016|       |		    /* divx detection */
 1017|  7.75k|			i = sscanf(tmp, "DivX%dBuild%d%c", &version, &build, &packed);
 1018|  7.75k|			if (i < 2)
  ------------------
  |  Branch (1018:8): [True: 7.75k, False: 0]
  ------------------
 1019|  7.75k|				i = sscanf(tmp, "DivX%db%d%c", &version, &build, &packed);
 1020|       |
 1021|  7.75k|			if (i >= 2)
  ------------------
  |  Branch (1021:8): [True: 2.53k, False: 5.21k]
  ------------------
 1022|  2.53k|			{
 1023|  2.53k|				dec->packed_mode = (i == 3 && packed == 'p');
  ------------------
  |  Branch (1023:25): [True: 613, False: 1.92k]
  |  Branch (1023:35): [True: 35, False: 578]
  ------------------
 1024|  2.53k|				DPRINTF(XVID_DEBUG_HEADER, "divx version=%i, build=%i packed=%i\n",
  ------------------
  |  |  198|  2.53k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
 1025|  2.53k|						version, build, dec->packed_mode);
 1026|  2.53k|			}
 1027|       |
 1028|  7.75k|			if ((dec->bs_version == 0) && (build > 0) &&
  ------------------
  |  Branch (1028:8): [True: 4.46k, False: 3.29k]
  |  Branch (1028:34): [True: 1.80k, False: 2.65k]
  ------------------
 1029|  7.75k|				(build != 1393)) { /* non-xvid stream with xvid fourcc */
  ------------------
  |  Branch (1029:5): [True: 1.72k, False: 81]
  ------------------
 1030|  1.72k|				dec->bs_version = 0xffff;
 1031|  1.72k|			}
 1032|       |
 1033|  7.75k|		} else					/* start_code == ? */
 1034|  20.9M|		{
 1035|  20.9M|			if (BitstreamShowBits(bs, 24) == 0x000001) {
  ------------------
  |  Branch (1035:8): [True: 367k, False: 20.5M]
  ------------------
 1036|   367k|				DPRINTF(XVID_DEBUG_STARTCODE, "<unknown: %x>\n", BitstreamShowBits(bs, 32));
  ------------------
  |  |  197|   367k|#define XVID_DEBUG_STARTCODE (1<< 1)
  ------------------
 1037|   367k|			}
 1038|  20.9M|			BitstreamSkip(bs, 8);
 1039|  20.9M|		}
 1040|  22.8M|	}
 1041|       |
 1042|       |#if 0
 1043|       |	DPRINTF("*** WARNING: no vop_start_code found");
 1044|       |#endif
 1045|  3.06k|	return -1;					/* ignore it */
 1046|  1.86M|}
bitstream.c:log2bin:
   39|  85.6k|{
   40|  85.6k|  int n = 0;
   41|  85.6k|  if (value & 0xffff0000) {
  ------------------
  |  Branch (41:7): [True: 436, False: 85.2k]
  ------------------
   42|    436|    value >>= 16;
   43|    436|    n += 16;
   44|    436|  }
   45|  85.6k|  if (value & 0xff00) {
  ------------------
  |  Branch (45:7): [True: 56.5k, False: 29.1k]
  ------------------
   46|  56.5k|    value >>= 8;
   47|  56.5k|    n += 8;
   48|  56.5k|  }
   49|  85.6k|  if (value & 0xf0) {
  ------------------
  |  Branch (49:7): [True: 48.7k, False: 36.9k]
  ------------------
   50|  48.7k|    value >>= 4;
   51|  48.7k|    n += 4;
   52|  48.7k|  }
   53|  85.6k| return n + log2_tab_16[value];
   54|  85.6k|}
bitstream.c:bs_get_matrix:
   71|  4.41k|{
   72|  4.41k|	int i = 0;
   73|  4.41k|	int last, value = 0;
   74|       |
   75|  42.5k|	do {
   76|  42.5k|		last = value;
   77|  42.5k|		value = BitstreamGetBits(bs, 8);
   78|  42.5k|		matrix[scan_tables[0][i++]] = value;
   79|  42.5k|	}
   80|  42.5k|	while (value != 0 && i < 64);
  ------------------
  |  Branch (80:9): [True: 38.3k, False: 4.14k]
  |  Branch (80:23): [True: 38.1k, False: 267]
  ------------------
   81|       |
   82|  4.41k|	if (value != 0) return;
  ------------------
  |  Branch (82:6): [True: 267, False: 4.14k]
  ------------------
   83|       |
   84|  4.14k|	i--;
   85|   248k|	while (i < 64) {
  ------------------
  |  Branch (85:9): [True: 244k, False: 4.14k]
  ------------------
   86|   244k|		matrix[scan_tables[0][i++]] = last;
   87|   244k|	}
   88|  4.14k|}
bitstream.c:read_vol_complexity_estimation_header:
  225|  25.6k|{
  226|  25.6k|	ESTIMATION * e = &dec->estimation;
  227|       |
  228|  25.6k|	e->method = BitstreamGetBits(bs, 2);	/* estimation_method */
  229|  25.6k|	DPRINTF(XVID_DEBUG_HEADER,"+ complexity_estimation_header; method=%i\n", e->method);
  ------------------
  |  |  198|  25.6k|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
  230|       |
  231|  25.6k|	if (e->method == 0 || e->method == 1)
  ------------------
  |  Branch (231:6): [True: 17.0k, False: 8.67k]
  |  Branch (231:24): [True: 5.27k, False: 3.39k]
  ------------------
  232|  22.2k|	{
  233|  22.2k|		if (!BitstreamGetBit(bs))		/* shape_complexity_estimation_disable */
  ------------------
  |  Branch (233:7): [True: 19.2k, False: 3.01k]
  ------------------
  234|  19.2k|		{
  235|  19.2k|			e->opaque = BitstreamGetBit(bs);		/* opaque */
  236|  19.2k|			e->transparent = BitstreamGetBit(bs);		/* transparent */
  237|  19.2k|			e->intra_cae = BitstreamGetBit(bs);		/* intra_cae */
  238|  19.2k|			e->inter_cae = BitstreamGetBit(bs);		/* inter_cae */
  239|  19.2k|			e->no_update = BitstreamGetBit(bs);		/* no_update */
  240|  19.2k|			e->upsampling = BitstreamGetBit(bs);		/* upsampling */
  241|  19.2k|		}
  242|       |
  243|  22.2k|		if (!BitstreamGetBit(bs))	/* texture_complexity_estimation_set_1_disable */
  ------------------
  |  Branch (243:7): [True: 15.1k, False: 7.17k]
  ------------------
  244|  15.1k|		{
  245|  15.1k|			e->intra_blocks = BitstreamGetBit(bs);		/* intra_blocks */
  246|  15.1k|			e->inter_blocks = BitstreamGetBit(bs);		/* inter_blocks */
  247|  15.1k|			e->inter4v_blocks = BitstreamGetBit(bs);		/* inter4v_blocks */
  248|  15.1k|			e->not_coded_blocks = BitstreamGetBit(bs);		/* not_coded_blocks */
  249|  15.1k|		}
  250|  22.2k|	}
  251|       |
  252|  25.6k|	READ_MARKER();
  ------------------
  |  |   99|  25.6k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  253|       |
  254|  25.6k|	if (!BitstreamGetBit(bs))		/* texture_complexity_estimation_set_2_disable */
  ------------------
  |  Branch (254:6): [True: 15.1k, False: 10.4k]
  ------------------
  255|  15.1k|	{
  256|  15.1k|		e->dct_coefs = BitstreamGetBit(bs);		/* dct_coefs */
  257|  15.1k|		e->dct_lines = BitstreamGetBit(bs);		/* dct_lines */
  258|  15.1k|		e->vlc_symbols = BitstreamGetBit(bs);		/* vlc_symbols */
  259|  15.1k|		e->vlc_bits = BitstreamGetBit(bs);		/* vlc_bits */
  260|  15.1k|	}
  261|       |
  262|  25.6k|	if (!BitstreamGetBit(bs))		/* motion_compensation_complexity_disable */
  ------------------
  |  Branch (262:6): [True: 17.2k, False: 8.38k]
  ------------------
  263|  17.2k|	{
  264|  17.2k|		e->apm = BitstreamGetBit(bs);		/* apm */
  265|  17.2k|		e->npm = BitstreamGetBit(bs);		/* npm */
  266|  17.2k|		e->interpolate_mc_q = BitstreamGetBit(bs);		/* interpolate_mc_q */
  267|  17.2k|		e->forw_back_mc_q = BitstreamGetBit(bs);		/* forw_back_mc_q */
  268|  17.2k|		e->halfpel2 = BitstreamGetBit(bs);		/* halfpel2 */
  269|  17.2k|		e->halfpel4 = BitstreamGetBit(bs);		/* halfpel4 */
  270|  17.2k|	}
  271|       |
  272|  25.6k|	READ_MARKER();
  ------------------
  |  |   99|  25.6k|#define READ_MARKER()	BitstreamSkip(bs, 1)
  ------------------
  273|       |
  274|  25.6k|	if (e->method == 1)
  ------------------
  |  Branch (274:6): [True: 5.27k, False: 20.4k]
  ------------------
  275|  5.27k|	{
  276|  5.27k|		if (!BitstreamGetBit(bs))	/* version2_complexity_estimation_disable */
  ------------------
  |  Branch (276:7): [True: 4.17k, False: 1.10k]
  ------------------
  277|  4.17k|		{
  278|  4.17k|			e->sadct = BitstreamGetBit(bs);		/* sadct */
  279|  4.17k|			e->quarterpel = BitstreamGetBit(bs);		/* quarterpel */
  280|  4.17k|		}
  281|  5.27k|	}
  282|  25.6k|}
bitstream.c:read_vop_complexity_estimation_header:
  288|  1.36M|{
  289|  1.36M|	ESTIMATION * e = &dec->estimation;
  290|       |
  291|  1.36M|	if (e->method == 0 || e->method == 1)
  ------------------
  |  Branch (291:6): [True: 1.02M, False: 335k]
  |  Branch (291:24): [True: 263k, False: 72.1k]
  ------------------
  292|  1.29M|	{
  293|  1.29M|		if (coding_type == I_VOP) {
  ------------------
  |  |  104|  1.29M|#define I_VOP	0
  ------------------
  |  Branch (293:7): [True: 18.6k, False: 1.27M]
  ------------------
  294|  18.6k|			if (e->opaque)		BitstreamSkip(bs, 8);	/* dcecs_opaque */
  ------------------
  |  Branch (294:8): [True: 4.09k, False: 14.5k]
  ------------------
  295|  18.6k|			if (e->transparent) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (295:8): [True: 8.66k, False: 9.98k]
  ------------------
  296|  18.6k|			if (e->intra_cae)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (296:8): [True: 4.43k, False: 14.2k]
  ------------------
  297|  18.6k|			if (e->inter_cae)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (297:8): [True: 7.62k, False: 11.0k]
  ------------------
  298|  18.6k|			if (e->no_update)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (298:8): [True: 5.22k, False: 13.4k]
  ------------------
  299|  18.6k|			if (e->upsampling)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (299:8): [True: 3.65k, False: 14.9k]
  ------------------
  300|  18.6k|			if (e->intra_blocks) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (300:8): [True: 4.65k, False: 13.9k]
  ------------------
  301|  18.6k|			if (e->not_coded_blocks) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (301:8): [True: 6.20k, False: 12.4k]
  ------------------
  302|  18.6k|			if (e->dct_coefs)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (302:8): [True: 4.17k, False: 14.4k]
  ------------------
  303|  18.6k|			if (e->dct_lines)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (303:8): [True: 4.97k, False: 13.6k]
  ------------------
  304|  18.6k|			if (e->vlc_symbols) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (304:8): [True: 6.79k, False: 11.8k]
  ------------------
  305|  18.6k|			if (e->vlc_bits)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (305:8): [True: 3.99k, False: 14.6k]
  ------------------
  306|  18.6k|			if (e->sadct)		BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (306:8): [True: 4.55k, False: 14.1k]
  ------------------
  307|  18.6k|		}
  308|       |
  309|  1.29M|		if (coding_type == P_VOP) {
  ------------------
  |  |  105|  1.29M|#define P_VOP	1
  ------------------
  |  Branch (309:7): [True: 35.1k, False: 1.25M]
  ------------------
  310|  35.1k|			if (e->opaque) BitstreamSkip(bs, 8);		/* */
  ------------------
  |  Branch (310:8): [True: 10.3k, False: 24.7k]
  ------------------
  311|  35.1k|			if (e->transparent) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (311:8): [True: 7.63k, False: 27.5k]
  ------------------
  312|  35.1k|			if (e->intra_cae)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (312:8): [True: 11.5k, False: 23.6k]
  ------------------
  313|  35.1k|			if (e->inter_cae)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (313:8): [True: 9.23k, False: 25.9k]
  ------------------
  314|  35.1k|			if (e->no_update)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (314:8): [True: 8.14k, False: 27.0k]
  ------------------
  315|  35.1k|			if (e->upsampling) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (315:8): [True: 9.21k, False: 25.9k]
  ------------------
  316|  35.1k|			if (e->intra_blocks) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (316:8): [True: 5.55k, False: 29.6k]
  ------------------
  317|  35.1k|			if (e->not_coded_blocks)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (317:8): [True: 6.88k, False: 28.2k]
  ------------------
  318|  35.1k|			if (e->dct_coefs)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (318:8): [True: 15.0k, False: 20.1k]
  ------------------
  319|  35.1k|			if (e->dct_lines)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (319:8): [True: 8.63k, False: 26.5k]
  ------------------
  320|  35.1k|			if (e->vlc_symbols) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (320:8): [True: 11.8k, False: 23.2k]
  ------------------
  321|  35.1k|			if (e->vlc_bits)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (321:8): [True: 11.4k, False: 23.6k]
  ------------------
  322|  35.1k|			if (e->inter_blocks) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (322:8): [True: 6.22k, False: 28.9k]
  ------------------
  323|  35.1k|			if (e->inter4v_blocks) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (323:8): [True: 8.80k, False: 26.3k]
  ------------------
  324|  35.1k|			if (e->apm)			BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (324:8): [True: 15.6k, False: 19.5k]
  ------------------
  325|  35.1k|			if (e->npm)			BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (325:8): [True: 14.8k, False: 20.3k]
  ------------------
  326|  35.1k|			if (e->forw_back_mc_q) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (326:8): [True: 19.3k, False: 15.8k]
  ------------------
  327|  35.1k|			if (e->halfpel2)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (327:8): [True: 14.1k, False: 21.0k]
  ------------------
  328|  35.1k|			if (e->halfpel4)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (328:8): [True: 10.5k, False: 24.5k]
  ------------------
  329|  35.1k|			if (e->sadct)		BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (329:8): [True: 6.68k, False: 28.4k]
  ------------------
  330|  35.1k|			if (e->quarterpel)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (330:8): [True: 6.20k, False: 28.9k]
  ------------------
  331|  35.1k|		}
  332|  1.29M|		if (coding_type == B_VOP) {
  ------------------
  |  |  106|  1.29M|#define B_VOP	2
  ------------------
  |  Branch (332:7): [True: 93.3k, False: 1.19M]
  ------------------
  333|  93.3k|			if (e->opaque)		BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (333:8): [True: 42.2k, False: 51.0k]
  ------------------
  334|  93.3k|			if (e->transparent)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (334:8): [True: 47.3k, False: 45.9k]
  ------------------
  335|  93.3k|			if (e->intra_cae)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (335:8): [True: 15.1k, False: 78.1k]
  ------------------
  336|  93.3k|			if (e->inter_cae)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (336:8): [True: 49.3k, False: 44.0k]
  ------------------
  337|  93.3k|			if (e->no_update)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (337:8): [True: 24.0k, False: 69.2k]
  ------------------
  338|  93.3k|			if (e->upsampling)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (338:8): [True: 12.0k, False: 81.2k]
  ------------------
  339|  93.3k|			if (e->intra_blocks) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (339:8): [True: 20.1k, False: 73.2k]
  ------------------
  340|  93.3k|			if (e->not_coded_blocks) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (340:8): [True: 10.1k, False: 83.2k]
  ------------------
  341|  93.3k|			if (e->dct_coefs)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (341:8): [True: 13.4k, False: 79.8k]
  ------------------
  342|  93.3k|			if (e->dct_lines)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (342:8): [True: 18.1k, False: 75.2k]
  ------------------
  343|  93.3k|			if (e->vlc_symbols)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (343:8): [True: 33.8k, False: 59.5k]
  ------------------
  344|  93.3k|			if (e->vlc_bits)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (344:8): [True: 14.1k, False: 79.1k]
  ------------------
  345|  93.3k|			if (e->inter_blocks) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (345:8): [True: 16.3k, False: 77.0k]
  ------------------
  346|  93.3k|			if (e->inter4v_blocks) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (346:8): [True: 20.0k, False: 73.3k]
  ------------------
  347|  93.3k|			if (e->apm)			BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (347:8): [True: 54.7k, False: 38.5k]
  ------------------
  348|  93.3k|			if (e->npm)			BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (348:8): [True: 60.1k, False: 33.1k]
  ------------------
  349|  93.3k|			if (e->forw_back_mc_q) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (349:8): [True: 23.6k, False: 69.7k]
  ------------------
  350|  93.3k|			if (e->halfpel2)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (350:8): [True: 26.7k, False: 66.6k]
  ------------------
  351|  93.3k|			if (e->halfpel4)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (351:8): [True: 38.6k, False: 54.7k]
  ------------------
  352|  93.3k|			if (e->interpolate_mc_q) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (352:8): [True: 55.6k, False: 37.7k]
  ------------------
  353|  93.3k|			if (e->sadct)		BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (353:8): [True: 37.5k, False: 55.7k]
  ------------------
  354|  93.3k|			if (e->quarterpel)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (354:8): [True: 29.6k, False: 63.6k]
  ------------------
  355|  93.3k|		}
  356|       |
  357|  1.29M|		if (coding_type == S_VOP && dec->sprite_enable == SPRITE_STATIC) {
  ------------------
  |  |  107|  2.58M|#define S_VOP	3
  ------------------
              		if (coding_type == S_VOP && dec->sprite_enable == SPRITE_STATIC) {
  ------------------
  |  |   94|  1.14M|#define SPRITE_STATIC	1
  ------------------
  |  Branch (357:7): [True: 1.14M, False: 147k]
  |  Branch (357:31): [True: 421k, False: 721k]
  ------------------
  358|   421k|			if (e->intra_blocks) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (358:8): [True: 49.6k, False: 372k]
  ------------------
  359|   421k|			if (e->not_coded_blocks) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (359:8): [True: 251k, False: 170k]
  ------------------
  360|   421k|			if (e->dct_coefs)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (360:8): [True: 33.4k, False: 388k]
  ------------------
  361|   421k|			if (e->dct_lines)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (361:8): [True: 34.3k, False: 387k]
  ------------------
  362|   421k|			if (e->vlc_symbols)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (362:8): [True: 108k, False: 313k]
  ------------------
  363|   421k|			if (e->vlc_bits)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (363:8): [True: 20.1k, False: 401k]
  ------------------
  364|   421k|			if (e->inter_blocks) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (364:8): [True: 50.8k, False: 371k]
  ------------------
  365|   421k|			if (e->inter4v_blocks)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (365:8): [True: 265k, False: 156k]
  ------------------
  366|   421k|			if (e->apm)			BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (366:8): [True: 119k, False: 302k]
  ------------------
  367|   421k|			if (e->npm)			BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (367:8): [True: 347k, False: 74.9k]
  ------------------
  368|   421k|			if (e->forw_back_mc_q)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (368:8): [True: 263k, False: 158k]
  ------------------
  369|   421k|			if (e->halfpel2)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (369:8): [True: 289k, False: 132k]
  ------------------
  370|   421k|			if (e->halfpel4)	BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (370:8): [True: 269k, False: 151k]
  ------------------
  371|   421k|			if (e->interpolate_mc_q) BitstreamSkip(bs, 8);	/* */
  ------------------
  |  Branch (371:8): [True: 355k, False: 66.8k]
  ------------------
  372|   421k|		}
  373|  1.29M|	}
  374|  1.36M|}

mbcoding.c:BitstreamShowBits:
  254|   685M|{
  255|   685M|	int nbit = (bits + bs->pos) - 32;
  256|       |
  257|   685M|	if (nbit > 0) {
  ------------------
  |  Branch (257:6): [True: 301M, False: 383M]
  ------------------
  258|   301M|		return ((bs->bufa & (0xffffffff >> bs->pos)) << nbit) | (bs->
  259|   301M|																 bufb >> (32 -
  260|   301M|																		  nbit));
  261|   383M|	} else {
  262|   383M|		return (bs->bufa & (0xffffffff >> bs->pos)) >> (32 - bs->pos - bits);
  263|   383M|	}
  264|   685M|}
mbcoding.c:BitstreamSkip:
  272|   277M|{
  273|   277M|	bs->pos += bits;
  274|       |
  275|   277M|	if (bs->pos >= 32) {
  ------------------
  |  Branch (275:6): [True: 42.7M, False: 234M]
  ------------------
  276|  42.7M|		uint32_t tmp;
  277|       |
  278|  42.7M|		bs->bufa = bs->bufb;
  279|  42.7M|#if defined(XVID_SAFE_BS_TAIL)
  280|  42.7M|		if (bs->tail<(bs->start+((bs->length+3)>>2)))
  ------------------
  |  Branch (280:7): [True: 5.85M, False: 36.9M]
  ------------------
  281|  5.85M|#endif
  282|  5.85M|		{
  283|  5.85M|			tmp = *((uint32_t *) bs->tail + 2);
  284|  5.85M|#ifndef ARCH_IS_BIG_ENDIAN
  285|  5.85M|			BSWAP(tmp);
  ------------------
  |  |  365|  5.85M|	((a) = (((a) & 0xff) << 24)  | (((a) & 0xff00) << 8) | \
  |  |  366|  5.85M|	 (((a) >> 8) & 0xff00) | (((a) >> 24) & 0xff))
  ------------------
  286|  5.85M|#endif
  287|  5.85M|			bs->bufb = tmp;
  288|  5.85M|			bs->tail++;
  289|  5.85M|		}
  290|  36.9M|#if defined(XVID_SAFE_BS_TAIL)
  291|  36.9M|		else {
  292|  36.9M|			bs->bufb = 0;
  293|  36.9M|		}
  294|  42.7M|#endif
  295|  42.7M|		bs->pos -= 32;
  296|  42.7M|	}
  297|   277M|}
mbcoding.c:BitstreamNumBitsToByteAlign:
  303|  56.1M|{
  304|  56.1M|	uint32_t n = (32 - bs->pos) % 8;
  305|  56.1M|	return n == 0 ? 8 : n;
  ------------------
  |  Branch (305:9): [True: 6.85M, False: 49.2M]
  ------------------
  306|  56.1M|}
mbcoding.c:BitstreamShowBitsFromByteAlign:
  312|  5.33M|{
  313|  5.33M|	int bspos = bs->pos + BitstreamNumBitsToByteAlign(bs);
  314|  5.33M|	int nbit = (bits + bspos) - 32;
  315|       |
  316|  5.33M|	if (bspos >= 32) {
  ------------------
  |  Branch (316:6): [True: 1.10M, False: 4.22M]
  ------------------
  317|  1.10M|		return bs->bufb >> (32 - nbit);
  318|  4.22M|	} else	if (nbit > 0) {
  ------------------
  |  Branch (318:13): [True: 2.13M, False: 2.09M]
  ------------------
  319|  2.13M|		return ((bs->bufa & (0xffffffff >> bspos)) << nbit) | (bs->
  320|  2.13M|																 bufb >> (32 -
  321|  2.13M|																		  nbit));
  322|  2.13M|	} else {
  323|  2.09M|		return (bs->bufa & (0xffffffff >> bspos)) >> (32 - bspos - bits);
  324|  2.09M|	}
  325|       |
  326|  5.33M|}
mbcoding.c:BitstreamGetBit:
  418|  1.45M|{
  419|  1.45M|	return BitstreamGetBits(bs, 1);
  420|  1.45M|}
mbcoding.c:BitstreamGetBits:
  406|  91.0M|{
  407|  91.0M|	uint32_t ret = BitstreamShowBits(bs, n);
  408|       |
  409|  91.0M|	BitstreamSkip(bs, n);
  410|  91.0M|	return ret;
  411|  91.0M|}
decoder.c:BitstreamInit:
  184|  69.9k|{
  185|  69.9k|	uint32_t tmp;
  186|  69.9k|	size_t bitpos;
  187|  69.9k|	ptr_t adjbitstream = (ptr_t)bitstream;
  ------------------
  |  |  132|  69.9k|#    define ptr_t uint64_t
  ------------------
  188|       |
  189|       |	/*
  190|       |	 * Start the stream on a uint32_t boundary, by rounding down to the
  191|       |	 * previous uint32_t and skipping the intervening bytes.
  192|       |	 */
  193|  69.9k|	bitpos = ((sizeof(uint32_t)-1) & (size_t)bitstream);
  194|  69.9k|	adjbitstream = adjbitstream - bitpos;
  195|  69.9k|	bs->start = bs->tail = (uint32_t *) adjbitstream;
  196|       |
  197|  69.9k|	tmp = *bs->start;
  198|  69.9k|#ifndef ARCH_IS_BIG_ENDIAN
  199|  69.9k|	BSWAP(tmp);
  ------------------
  |  |  365|  69.9k|	((a) = (((a) & 0xff) << 24)  | (((a) & 0xff00) << 8) | \
  |  |  366|  69.9k|	 (((a) >> 8) & 0xff00) | (((a) >> 24) & 0xff))
  ------------------
  200|  69.9k|#endif
  201|  69.9k|	bs->bufa = tmp;
  202|       |
  203|  69.9k|	tmp = *(bs->start + 1);
  204|  69.9k|#ifndef ARCH_IS_BIG_ENDIAN
  205|  69.9k|	BSWAP(tmp);
  ------------------
  |  |  365|  69.9k|	((a) = (((a) & 0xff) << 24)  | (((a) & 0xff00) << 8) | \
  |  |  366|  69.9k|	 (((a) >> 8) & 0xff00) | (((a) >> 24) & 0xff))
  ------------------
  206|  69.9k|#endif
  207|  69.9k|	bs->bufb = tmp;
  208|       |
  209|  69.9k|	bs->pos = bs->initpos = (uint32_t) bitpos*8;
  210|       |	/* preserve the intervening bytes */
  211|  69.9k|	if (bs->initpos > 0)
  ------------------
  |  Branch (211:6): [True: 43.7k, False: 26.2k]
  ------------------
  212|  43.7k|		bs->buf = bs->bufa & (0xffffffff << (32 - bs->initpos));
  213|  26.2k|	else
  214|  26.2k|		bs->buf = 0;
  215|  69.9k|	bs->length = length;
  216|  69.9k|}
decoder.c:BitstreamShowBits:
  254|   205M|{
  255|   205M|	int nbit = (bits + bs->pos) - 32;
  256|       |
  257|   205M|	if (nbit > 0) {
  ------------------
  |  Branch (257:6): [True: 51.0M, False: 154M]
  ------------------
  258|  51.0M|		return ((bs->bufa & (0xffffffff >> bs->pos)) << nbit) | (bs->
  259|  51.0M|																 bufb >> (32 -
  260|  51.0M|																		  nbit));
  261|   154M|	} else {
  262|   154M|		return (bs->bufa & (0xffffffff >> bs->pos)) >> (32 - bs->pos - bits);
  263|   154M|	}
  264|   205M|}
decoder.c:BitstreamPos:
  347|  69.9k|{
  348|  69.9k|	return((uint32_t)(8*((ptr_t)bs->tail - (ptr_t)bs->start) + bs->pos - bs->initpos));
  349|  69.9k|}
decoder.c:BitstreamSkip:
  272|   112M|{
  273|   112M|	bs->pos += bits;
  274|       |
  275|   112M|	if (bs->pos >= 32) {
  ------------------
  |  Branch (275:6): [True: 27.2M, False: 85.6M]
  ------------------
  276|  27.2M|		uint32_t tmp;
  277|       |
  278|  27.2M|		bs->bufa = bs->bufb;
  279|  27.2M|#if defined(XVID_SAFE_BS_TAIL)
  280|  27.2M|		if (bs->tail<(bs->start+((bs->length+3)>>2)))
  ------------------
  |  Branch (280:7): [True: 3.03M, False: 24.1M]
  ------------------
  281|  3.03M|#endif
  282|  3.03M|		{
  283|  3.03M|			tmp = *((uint32_t *) bs->tail + 2);
  284|  3.03M|#ifndef ARCH_IS_BIG_ENDIAN
  285|  3.03M|			BSWAP(tmp);
  ------------------
  |  |  365|  3.03M|	((a) = (((a) & 0xff) << 24)  | (((a) & 0xff00) << 8) | \
  |  |  366|  3.03M|	 (((a) >> 8) & 0xff00) | (((a) >> 24) & 0xff))
  ------------------
  286|  3.03M|#endif
  287|  3.03M|			bs->bufb = tmp;
  288|  3.03M|			bs->tail++;
  289|  3.03M|		}
  290|  24.1M|#if defined(XVID_SAFE_BS_TAIL)
  291|  24.1M|		else {
  292|  24.1M|			bs->bufb = 0;
  293|  24.1M|		}
  294|  27.2M|#endif
  295|  27.2M|		bs->pos -= 32;
  296|  27.2M|	}
  297|   112M|}
decoder.c:BitstreamGetBit:
  418|   108M|{
  419|   108M|	return BitstreamGetBits(bs, 1);
  420|   108M|}
decoder.c:BitstreamGetBits:
  406|   112M|{
  407|   112M|	uint32_t ret = BitstreamShowBits(bs, n);
  408|       |
  409|   112M|	BitstreamSkip(bs, n);
  410|   112M|	return ret;
  411|   112M|}
bitstream.c:BitstreamSkip:
  272|  75.2M|{
  273|  75.2M|	bs->pos += bits;
  274|       |
  275|  75.2M|	if (bs->pos >= 32) {
  ------------------
  |  Branch (275:6): [True: 36.2M, False: 39.0M]
  ------------------
  276|  36.2M|		uint32_t tmp;
  277|       |
  278|  36.2M|		bs->bufa = bs->bufb;
  279|  36.2M|#if defined(XVID_SAFE_BS_TAIL)
  280|  36.2M|		if (bs->tail<(bs->start+((bs->length+3)>>2)))
  ------------------
  |  Branch (280:7): [True: 34.2M, False: 1.92M]
  ------------------
  281|  34.2M|#endif
  282|  34.2M|		{
  283|  34.2M|			tmp = *((uint32_t *) bs->tail + 2);
  284|  34.2M|#ifndef ARCH_IS_BIG_ENDIAN
  285|  34.2M|			BSWAP(tmp);
  ------------------
  |  |  365|  34.2M|	((a) = (((a) & 0xff) << 24)  | (((a) & 0xff00) << 8) | \
  |  |  366|  34.2M|	 (((a) >> 8) & 0xff00) | (((a) >> 24) & 0xff))
  ------------------
  286|  34.2M|#endif
  287|  34.2M|			bs->bufb = tmp;
  288|  34.2M|			bs->tail++;
  289|  34.2M|		}
  290|  1.92M|#if defined(XVID_SAFE_BS_TAIL)
  291|  1.92M|		else {
  292|  1.92M|			bs->bufb = 0;
  293|  1.92M|		}
  294|  36.2M|#endif
  295|  36.2M|		bs->pos -= 32;
  296|  36.2M|	}
  297|  75.2M|}
bitstream.c:BitstreamNumBitsToByteAlign:
  303|  28.2k|{
  304|  28.2k|	uint32_t n = (32 - bs->pos) % 8;
  305|  28.2k|	return n == 0 ? 8 : n;
  ------------------
  |  Branch (305:9): [True: 364, False: 27.8k]
  ------------------
  306|  28.2k|}
bitstream.c:BitstreamGetBit:
  418|  18.1M|{
  419|  18.1M|	return BitstreamGetBits(bs, 1);
  420|  18.1M|}
bitstream.c:BitstreamGetBits:
  406|  33.6M|{
  407|  33.6M|	uint32_t ret = BitstreamShowBits(bs, n);
  408|       |
  409|  33.6M|	BitstreamSkip(bs, n);
  410|  33.6M|	return ret;
  411|  33.6M|}
bitstream.c:BitstreamPos:
  347|  22.8M|{
  348|  22.8M|	return((uint32_t)(8*((ptr_t)bs->tail - (ptr_t)bs->start) + bs->pos - bs->initpos));
  349|  22.8M|}
bitstream.c:BitstreamByteAlign:
  334|  22.8M|{
  335|  22.8M|	uint32_t remainder = bs->pos % 8;
  336|       |
  337|  22.8M|	if (remainder) {
  ------------------
  |  Branch (337:6): [True: 1.66M, False: 21.1M]
  ------------------
  338|  1.66M|		BitstreamSkip(bs, 8 - remainder);
  339|  1.66M|	}
  340|  22.8M|}
bitstream.c:BitstreamShowBits:
  254|  77.8M|{
  255|  77.8M|	int nbit = (bits + bs->pos) - 32;
  256|       |
  257|  77.8M|	if (nbit > 0) {
  ------------------
  |  Branch (257:6): [True: 13.5M, False: 64.3M]
  ------------------
  258|  13.5M|		return ((bs->bufa & (0xffffffff >> bs->pos)) << nbit) | (bs->
  259|  13.5M|																 bufb >> (32 -
  260|  13.5M|																		  nbit));
  261|  64.3M|	} else {
  262|  64.3M|		return (bs->bufa & (0xffffffff >> bs->pos)) >> (32 - bs->pos - bits);
  263|  64.3M|	}
  264|  77.8M|}

bs_get_spritetrajectory:
   71|  3.61M|{
   72|  3.61M|	int i;
   73|  25.3M|	for (i = 0; i < 12; i++)
  ------------------
  |  Branch (73:14): [True: 23.9M, False: 1.34M]
  ------------------
   74|  23.9M|	{
   75|  23.9M|		if (BitstreamShowBits(bs, sprite_trajectory_len[i].len) == sprite_trajectory_len[i].code)
  ------------------
  |  Branch (75:7): [True: 2.26M, False: 21.7M]
  ------------------
   76|  2.26M|		{
   77|  2.26M|			BitstreamSkip(bs, sprite_trajectory_len[i].len);
   78|  2.26M|			return i;
   79|  2.26M|		}
   80|  23.9M|	}
   81|  1.34M|	return -1;
   82|  3.61M|}
init_vlc_tables:
   86|      2|{
   87|      2|	uint32_t i, j, k, intra, last, run,  run_esc, level, level_esc, escape, escape_len, offset;
   88|      2|	int32_t l;
   89|       |
   90|      6|	for (intra = 0; intra < 2; intra++)
  ------------------
  |  Branch (90:18): [True: 4, False: 2]
  ------------------
   91|  16.3k|		for (i = 0; i < 4096; i++)
  ------------------
  |  Branch (91:15): [True: 16.3k, False: 4]
  ------------------
   92|  16.3k|			DCT3D[intra][i].event.level = 0;
   93|       |
   94|      6|	for (intra = 0; intra < 2; intra++) {
  ------------------
  |  Branch (94:18): [True: 4, False: 2]
  ------------------
   95|     12|		for (last = 0; last < 2; last++) {
  ------------------
  |  Branch (95:18): [True: 8, False: 4]
  ------------------
   96|    516|			for (run = 0; run < 63 + last; run++) {
  ------------------
  |  Branch (96:18): [True: 508, False: 8]
  ------------------
   97|  24.8k|				for (level = 0; level < (uint32_t)(32 << intra); level++) {
  ------------------
  |  Branch (97:21): [True: 24.3k, False: 508]
  ------------------
   98|  24.3k|					offset = !intra * LEVELOFFSET;
  ------------------
  |  |   46|  24.3k|#define LEVELOFFSET 32
  ------------------
   99|  24.3k|					coeff_VLC[intra][last][level + offset][run].len = 128;
  100|  24.3k|				}
  101|    508|			}
  102|      8|		}
  103|      4|	}
  104|       |
  105|      6|	for (intra = 0; intra < 2; intra++) {
  ------------------
  |  Branch (105:18): [True: 4, False: 2]
  ------------------
  106|    412|		for (i = 0; i < 102; i++) {
  ------------------
  |  Branch (106:15): [True: 408, False: 4]
  ------------------
  107|    408|			offset = !intra * LEVELOFFSET;
  ------------------
  |  |   46|    408|#define LEVELOFFSET 32
  ------------------
  108|       |
  109|  16.6k|			for (j = 0; j < (uint32_t)(1 << (12 - coeff_tab[intra][i].vlc.len)); j++) {
  ------------------
  |  Branch (109:16): [True: 16.2k, False: 408]
  ------------------
  110|  16.2k|				DCT3D[intra][(coeff_tab[intra][i].vlc.code << (12 - coeff_tab[intra][i].vlc.len)) | j].len	 = coeff_tab[intra][i].vlc.len;
  111|  16.2k|				DCT3D[intra][(coeff_tab[intra][i].vlc.code << (12 - coeff_tab[intra][i].vlc.len)) | j].event = coeff_tab[intra][i].event;
  112|  16.2k|			}
  113|       |
  114|    408|			coeff_VLC[intra][coeff_tab[intra][i].event.last][coeff_tab[intra][i].event.level + offset][coeff_tab[intra][i].event.run].code
  115|    408|				= coeff_tab[intra][i].vlc.code << 1;
  116|    408|			coeff_VLC[intra][coeff_tab[intra][i].event.last][coeff_tab[intra][i].event.level + offset][coeff_tab[intra][i].event.run].len
  117|    408|				= coeff_tab[intra][i].vlc.len + 1;
  118|       |
  119|    408|			if (!intra) {
  ------------------
  |  Branch (119:8): [True: 204, False: 204]
  ------------------
  120|    204|				coeff_VLC[intra][coeff_tab[intra][i].event.last][offset - coeff_tab[intra][i].event.level][coeff_tab[intra][i].event.run].code
  121|    204|					= (coeff_tab[intra][i].vlc.code << 1) | 1;
  122|    204|				coeff_VLC[intra][coeff_tab[intra][i].event.last][offset - coeff_tab[intra][i].event.level][coeff_tab[intra][i].event.run].len
  123|    204|					= coeff_tab[intra][i].vlc.len + 1;
  124|    204|			}
  125|    408|		}
  126|      4|	}
  127|       |
  128|      6|	for (intra = 0; intra < 2; intra++) {
  ------------------
  |  Branch (128:18): [True: 4, False: 2]
  ------------------
  129|     12|		for (last = 0; last < 2; last++) {
  ------------------
  |  Branch (129:18): [True: 8, False: 4]
  ------------------
  130|    516|			for (run = 0; run < 63 + last; run++) {
  ------------------
  |  Branch (130:18): [True: 508, False: 8]
  ------------------
  131|  24.3k|				for (level = 1; level < (uint32_t)(32 << intra); level++) {
  ------------------
  |  Branch (131:21): [True: 23.8k, False: 508]
  ------------------
  132|       |
  133|  23.8k|					if (level <= max_level[intra][last][run] && run <= max_run[intra][last][level])
  ------------------
  |  Branch (133:10): [True: 408, False: 23.4k]
  |  Branch (133:50): [True: 408, False: 0]
  ------------------
  134|    408|					    continue;
  135|       |
  136|  23.4k|					offset = !intra * LEVELOFFSET;
  ------------------
  |  |   46|  23.4k|#define LEVELOFFSET 32
  ------------------
  137|  23.4k|                    level_esc = level - max_level[intra][last][run];
  138|  23.4k|					run_esc = run - 1 - max_run[intra][last][level];
  139|       |
  140|  23.4k|					if (level_esc <= max_level[intra][last][run] && run <= max_run[intra][last][level_esc]) {
  ------------------
  |  Branch (140:10): [True: 408, False: 23.0k]
  |  Branch (140:54): [True: 408, False: 0]
  ------------------
  141|    408|						escape     = ESCAPE1;
  ------------------
  |  |   33|    408|#define ESCAPE1 6
  ------------------
  142|    408|						escape_len = 7 + 1;
  143|    408|						run_esc    = run;
  144|  23.0k|					} else {
  145|  23.0k|						if (run_esc <= max_run[intra][last][level] && level <= max_level[intra][last][run_esc]) {
  ------------------
  |  Branch (145:11): [True: 496, False: 22.5k]
  |  Branch (145:53): [True: 222, False: 274]
  ------------------
  146|    222|							escape     = ESCAPE2;
  ------------------
  |  |   34|    222|#define ESCAPE2 14
  ------------------
  147|    222|							escape_len = 7 + 2;
  148|    222|							level_esc  = level;
  149|  22.8k|						} else {
  150|  22.8k|							if (!intra) {
  ------------------
  |  Branch (150:12): [True: 7.36k, False: 15.4k]
  ------------------
  151|  7.36k|								coeff_VLC[intra][last][level + offset][run].code
  152|  7.36k|									= (ESCAPE3 << 21) | (last << 20) | (run << 14) | (1 << 13) | ((level & 0xfff) << 1) | 1;
  ------------------
  |  |   35|  7.36k|#define ESCAPE3 15
  ------------------
  153|  7.36k|								coeff_VLC[intra][last][level + offset][run].len = 30;
  154|  7.36k|									coeff_VLC[intra][last][offset - level][run].code
  155|  7.36k|									= (ESCAPE3 << 21) | (last << 20) | (run << 14) | (1 << 13) | ((-(int32_t)level & 0xfff) << 1) | 1;
  ------------------
  |  |   35|  7.36k|#define ESCAPE3 15
  ------------------
  156|  7.36k|								coeff_VLC[intra][last][offset - level][run].len = 30;
  157|  7.36k|							}
  158|  22.8k|							continue;
  159|  22.8k|						}
  160|  23.0k|					}
  161|       |
  162|    630|					coeff_VLC[intra][last][level + offset][run].code
  163|    630|						= (escape << coeff_VLC[intra][last][level_esc + offset][run_esc].len)
  164|    630|						|  coeff_VLC[intra][last][level_esc + offset][run_esc].code;
  165|    630|					coeff_VLC[intra][last][level + offset][run].len
  166|    630|						= coeff_VLC[intra][last][level_esc + offset][run_esc].len + escape_len;
  167|       |
  168|    630|					if (!intra) {
  ------------------
  |  Branch (168:10): [True: 310, False: 320]
  ------------------
  169|    310|						coeff_VLC[intra][last][offset - level][run].code
  170|    310|							= (escape << coeff_VLC[intra][last][level_esc + offset][run_esc].len)
  171|    310|							|  coeff_VLC[intra][last][level_esc + offset][run_esc].code | 1;
  172|    310|						coeff_VLC[intra][last][offset - level][run].len
  173|    310|							= coeff_VLC[intra][last][level_esc + offset][run_esc].len + escape_len;
  174|    310|					}
  175|    630|				}
  176|       |
  177|    508|				if (!intra) {
  ------------------
  |  Branch (177:9): [True: 254, False: 254]
  ------------------
  178|    254|					coeff_VLC[intra][last][0][run].code
  179|    254|						= (ESCAPE3 << 21) | (last << 20) | (run << 14) | (1 << 13) | ((-32 & 0xfff) << 1) | 1;
  ------------------
  |  |   35|    254|#define ESCAPE3 15
  ------------------
  180|    254|					coeff_VLC[intra][last][0][run].len = 30;
  181|    254|				}
  182|    508|			}
  183|      8|		}
  184|      4|	}
  185|       |
  186|       |	/* init sprite_trajectory tables
  187|       |	 * even if GMC is not specified (it might be used later...) */
  188|       |
  189|      2|	sprite_trajectory_code[0+16384].code = 0;
  190|      2|	sprite_trajectory_code[0+16384].len = 0;
  191|     30|	for (k=0;k<14;k++) {
  ------------------
  |  Branch (191:11): [True: 28, False: 2]
  ------------------
  192|     28|		int limit = (1<<k);
  193|       |
  194|  32.7k|		for (l=-(2*limit-1); l <= -limit; l++) {
  ------------------
  |  Branch (194:24): [True: 32.7k, False: 28]
  ------------------
  195|  32.7k|			sprite_trajectory_code[l+16384].code = (2*limit-1)+l;
  196|  32.7k|			sprite_trajectory_code[l+16384].len = k+1;
  197|  32.7k|		}
  198|       |
  199|  32.7k|		for (l=limit; l<= 2*limit-1; l++) {
  ------------------
  |  Branch (199:17): [True: 32.7k, False: 28]
  ------------------
  200|  32.7k|			sprite_trajectory_code[l+16384].code = l;
  201|  32.7k|			sprite_trajectory_code[l+16384].len = k+1;
  202|  32.7k|		}
  203|     28|	}
  204|      2|}
check_resync_marker:
  814|  50.8M|{
  815|  50.8M|	uint32_t nbits;
  816|  50.8M|	uint32_t code;
  817|  50.8M|	uint32_t nbitsresyncmarker = NUMBITS_VP_RESYNC_MARKER + addbits;
  ------------------
  |  |  111|  50.8M|#define NUMBITS_VP_RESYNC_MARKER  17
  ------------------
  818|       |
  819|  50.8M|	nbits = BitstreamNumBitsToByteAlign(bs);
  820|  50.8M|	code = BitstreamShowBits(bs, nbits);
  821|       |
  822|  50.8M|	if (code == (((uint32_t)1 << (nbits - 1)) - 1))
  ------------------
  |  Branch (822:6): [True: 5.33M, False: 45.4M]
  ------------------
  823|  5.33M|	{
  824|  5.33M|		return BitstreamShowBitsFromByteAlign(bs, nbitsresyncmarker) == RESYNC_MARKER;
  ------------------
  |  |  112|  5.33M|#define RESYNC_MARKER 1
  ------------------
  825|  5.33M|	}
  826|       |
  827|  45.4M|	return 0;
  828|  50.8M|}
get_mcbpc_intra:
  834|  27.4M|{
  835|       |
  836|  27.4M|	uint32_t index;
  837|       |
  838|  27.4M|	index = BitstreamShowBits(bs, 9);
  839|  27.4M|	index >>= 3;
  840|       |
  841|  27.4M|	BitstreamSkip(bs, mcbpc_intra_table[index].len);
  842|       |
  843|  27.4M|	return mcbpc_intra_table[index].code;
  844|       |
  845|  27.4M|}
get_mcbpc_inter:
  849|  18.1M|{
  850|       |
  851|  18.1M|	uint32_t index;
  852|       |
  853|  18.1M|	index = MIN(BitstreamShowBits(bs, 9), 256);
  ------------------
  |  |  255|  18.1M|#define MIN(X, Y) ((X)<(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (255:20): [True: 18.0M, False: 144k]
  |  |  ------------------
  ------------------
  854|       |
  855|  18.1M|	BitstreamSkip(bs, mcbpc_inter_table[index].len);
  856|       |
  857|  18.1M|	return mcbpc_inter_table[index].code;
  858|       |
  859|  18.1M|}
get_cbpy:
  864|  45.6M|{
  865|       |
  866|  45.6M|	int cbpy;
  867|  45.6M|	uint32_t index = BitstreamShowBits(bs, 6);
  868|       |
  869|  45.6M|	BitstreamSkip(bs, cbpy_table[index].len);
  870|  45.6M|	cbpy = cbpy_table[index].code;
  871|       |
  872|  45.6M|	if (!intra)
  ------------------
  |  Branch (872:6): [True: 18.1M, False: 27.5M]
  ------------------
  873|  18.1M|		cbpy = 15 - cbpy;
  874|       |
  875|  45.6M|	return cbpy;
  876|       |
  877|  45.6M|}
get_mv:
  912|  1.45M|{
  913|       |
  914|  1.45M|	int data;
  915|  1.45M|	int res;
  916|  1.45M|	int mv;
  917|  1.45M|	int scale_fac = 1 << (fcode - 1);
  918|       |
  919|  1.45M|	data = get_mv_data(bs);
  920|       |
  921|  1.45M|	if (scale_fac == 1 || data == 0)
  ------------------
  |  Branch (921:6): [True: 327k, False: 1.12M]
  |  Branch (921:24): [True: 803k, False: 326k]
  ------------------
  922|  1.13M|		return data;
  923|       |
  924|   326k|	res = BitstreamGetBits(bs, fcode - 1);
  925|   326k|	mv = ((abs(data) - 1) * scale_fac) + res + 1;
  926|       |
  927|   326k|	return data < 0 ? -mv : mv;
  ------------------
  |  Branch (927:9): [True: 157k, False: 169k]
  ------------------
  928|       |
  929|  1.45M|}
get_dc_dif:
  934|  44.7M|{
  935|       |
  936|  44.7M|	int code = BitstreamGetBits(bs, dc_size);
  937|  44.7M|	int msb = code >> (dc_size - 1);
  938|       |
  939|  44.7M|	if (msb == 0)
  ------------------
  |  Branch (939:6): [True: 44.4M, False: 276k]
  ------------------
  940|  44.4M|		return (-1 * (code ^ ((1 << dc_size) - 1)));
  941|       |
  942|   276k|	return code;
  943|       |
  944|  44.7M|}
get_dc_size_lum:
  948|  89.0M|{
  949|       |
  950|  89.0M|	int code, i;
  951|       |
  952|  89.0M|	code = BitstreamShowBits(bs, 11);
  953|       |
  954|   801M|	for (i = 11; i > 3; i--) {
  ------------------
  |  Branch (954:15): [True: 712M, False: 89.0M]
  ------------------
  955|   712M|		if (code == 1) {
  ------------------
  |  Branch (955:7): [True: 93.0k, False: 712M]
  ------------------
  956|  93.0k|			BitstreamSkip(bs, i);
  957|  93.0k|			return i + 1;
  958|  93.0k|		}
  959|   712M|		code >>= 1;
  960|   712M|	}
  961|       |
  962|  89.0M|	BitstreamSkip(bs, dc_lum_tab[code].len);
  963|  89.0M|	return dc_lum_tab[code].code;
  964|       |
  965|  89.0M|}
get_dc_size_chrom:
  970|  44.5M|{
  971|       |
  972|  44.5M|	uint32_t code, i;
  973|       |
  974|  44.5M|	code = BitstreamShowBits(bs, 12);
  975|       |
  976|   489M|	for (i = 12; i > 2; i--) {
  ------------------
  |  Branch (976:15): [True: 445M, False: 44.4M]
  ------------------
  977|   445M|		if (code == 1) {
  ------------------
  |  Branch (977:7): [True: 60.7k, False: 445M]
  ------------------
  978|  60.7k|			BitstreamSkip(bs, i);
  979|  60.7k|			return i;
  980|  60.7k|		}
  981|   445M|		code >>= 1;
  982|   445M|	}
  983|       |
  984|  44.4M|	return 3 - BitstreamGetBits(bs, 2);
  985|       |
  986|  44.5M|}
get_intra_block:
 1087|   272M|{
 1088|       |
 1089|   272M|	const uint16_t *scan = scan_tables[direction];
 1090|   272M|	int level, run, last = 0;
 1091|       |
 1092|   274M|	do {
 1093|   274M|		level = get_coeff(bs, &run, &last, 1, 0);
 1094|   274M|		coeff += run;
 1095|   274M|		if (coeff & ~63) {
  ------------------
  |  Branch (1095:7): [True: 272M, False: 2.53M]
  ------------------
 1096|   272M|			DPRINTF(XVID_DEBUG_ERROR,"fatal: invalid run or index");
  ------------------
  |  |  196|   272M|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
 1097|   272M|			break;
 1098|   272M|		}
 1099|       |
 1100|  2.53M|		block[scan[coeff]] = level;
 1101|       |
 1102|  2.53M|		DPRINTF(XVID_DEBUG_COEFF,"block[%i] %i\n", scan[coeff], level);
  ------------------
  |  |  201|  2.53M|#define XVID_DEBUG_COEFF     (1<< 5)
  ------------------
 1103|       |#if 0
 1104|       |		DPRINTF(XVID_DEBUG_COEFF,"block[%i] %i %08x\n", scan[coeff], level, BitstreamShowBits(bs, 32));
 1105|       |#endif
 1106|       |
 1107|  2.53M|		if (level < -2047 || level > 2047) {
  ------------------
  |  Branch (1107:7): [True: 13, False: 2.53M]
  |  Branch (1107:24): [True: 0, False: 2.53M]
  ------------------
 1108|     13|			DPRINTF(XVID_DEBUG_ERROR,"warning: intra_overflow %i\n", level);
  ------------------
  |  |  196|     13|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
 1109|     13|		}
 1110|  2.53M|		coeff++;
 1111|  2.53M|	} while (!last);
  ------------------
  |  Branch (1111:11): [True: 2.21M, False: 319k]
  ------------------
 1112|       |
 1113|   272M|}
get_inter_block_h263:
 1122|   216k|{
 1123|       |
 1124|   216k|	const uint16_t *scan = scan_tables[direction];
 1125|   216k|	const uint16_t quant_m_2 = quant << 1;
 1126|   216k|	const uint16_t quant_add = (quant & 1 ? quant : quant - 1);
  ------------------
  |  Branch (1126:30): [True: 155k, False: 60.2k]
  ------------------
 1127|   216k|	int p;
 1128|   216k|	int level;
 1129|   216k|	int run;
 1130|   216k|	int last = 0;
 1131|       |
 1132|   216k|	p = 0;
 1133|   346k|	do {
 1134|   346k|		level = get_coeff(bs, &run, &last, 0, 0);
 1135|   346k|		p += run;
 1136|   346k|		if (p & ~63) {
  ------------------
  |  Branch (1136:7): [True: 179k, False: 167k]
  ------------------
 1137|   179k|			DPRINTF(XVID_DEBUG_ERROR,"fatal: invalid run or index");
  ------------------
  |  |  196|   179k|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
 1138|   179k|			break;
 1139|   179k|		}
 1140|       |
 1141|   167k|		if (level < 0) {
  ------------------
  |  Branch (1141:7): [True: 89.6k, False: 77.4k]
  ------------------
 1142|  89.6k|			level = level*quant_m_2 - quant_add;
 1143|  89.6k|			block[scan[p]] = (level >= -2048 ? level : -2048);
  ------------------
  |  Branch (1143:22): [True: 88.6k, False: 960]
  ------------------
 1144|  89.6k|		} else {
 1145|  77.4k|			level = level * quant_m_2 + quant_add;
 1146|  77.4k|			block[scan[p]] = (level <= 2047 ? level : 2047);
  ------------------
  |  Branch (1146:22): [True: 76.7k, False: 731]
  ------------------
 1147|  77.4k|		}		
 1148|   167k|		p++;
 1149|   167k|	} while (!last);
  ------------------
  |  Branch (1149:11): [True: 129k, False: 37.1k]
  ------------------
 1150|   216k|}
get_inter_block_mpeg:
 1159|   310k|{
 1160|   310k|	const uint16_t *scan = scan_tables[direction];
 1161|   310k|	uint32_t sum = 0;
 1162|   310k|	int p;
 1163|   310k|	int level;
 1164|   310k|	int run;
 1165|   310k|	int last = 0;
 1166|       |
 1167|   310k|	p = 0;
 1168|   533k|	do {
 1169|   533k|		level = get_coeff(bs, &run, &last, 0, 0);
 1170|   533k|		p += run;
 1171|   533k|		if (p & ~63) {
  ------------------
  |  Branch (1171:7): [True: 263k, False: 269k]
  ------------------
 1172|   263k|			DPRINTF(XVID_DEBUG_ERROR,"fatal: invalid run or index");
  ------------------
  |  |  196|   263k|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
 1173|   263k|			break;
 1174|   263k|		}
 1175|       |
 1176|   269k|		if (level < 0) {
  ------------------
  |  Branch (1176:7): [True: 151k, False: 118k]
  ------------------
 1177|   151k|			level = ((2 * -level + 1) * matrix[scan[p]] * quant) >> 4;
 1178|   151k|			block[scan[p]] = (level <= 2048 ? -level : -2048);
  ------------------
  |  Branch (1178:22): [True: 146k, False: 4.15k]
  ------------------
 1179|   151k|		} else {
 1180|   118k|			level = ((2 *  level + 1) * matrix[scan[p]] * quant) >> 4;
 1181|   118k|			block[scan[p]] = (level <= 2047 ? level : 2047);
  ------------------
  |  Branch (1181:22): [True: 116k, False: 2.01k]
  ------------------
 1182|   118k|		}
 1183|       |
 1184|   269k|		sum ^= block[scan[p]];
 1185|       |		
 1186|   269k|		p++;
 1187|   269k|	} while (!last);
  ------------------
  |  Branch (1187:11): [True: 222k, False: 46.6k]
  ------------------
 1188|       |
 1189|       |	/*	mismatch control */
 1190|   310k|	if ((sum & 1) == 0) {
  ------------------
  |  Branch (1190:6): [True: 275k, False: 35.3k]
  ------------------
 1191|   275k|		block[63] ^= 1;
 1192|   275k|	}
 1193|   310k|}
mbcoding.c:get_mv_data:
  881|  1.45M|{
  882|       |
  883|  1.45M|	uint32_t index;
  884|       |
  885|  1.45M|	if (BitstreamGetBit(bs))
  ------------------
  |  Branch (885:6): [True: 655k, False: 801k]
  ------------------
  886|   655k|		return 0;
  887|       |
  888|   801k|	index = BitstreamShowBits(bs, 12);
  889|       |
  890|   801k|	if (index >= 512) {
  ------------------
  |  Branch (890:6): [True: 355k, False: 445k]
  ------------------
  891|   355k|		index = (index >> 8) - 2;
  892|   355k|		BitstreamSkip(bs, TMNMVtab0[index].len);
  893|   355k|		return TMNMVtab0[index].code;
  894|   355k|	}
  895|       |
  896|   445k|	if (index >= 128) {
  ------------------
  |  Branch (896:6): [True: 50.0k, False: 395k]
  ------------------
  897|  50.0k|		index = (index >> 2) - 32;
  898|  50.0k|		BitstreamSkip(bs, TMNMVtab1[index].len);
  899|  50.0k|		return TMNMVtab1[index].code;
  900|  50.0k|	}
  901|       |
  902|   395k|	index -= 4;
  903|       |
  904|   395k|	BitstreamSkip(bs, TMNMVtab2[index&0x7f].len);
  905|   395k|	return TMNMVtab2[index&0x7f].code;
  906|       |
  907|   445k|}
mbcoding.c:get_coeff:
  996|   275M|{
  997|       |
  998|   275M|	uint32_t mode;
  999|   275M|	int32_t level;
 1000|   275M|	REVERSE_EVENT *reverse_event;
 1001|       |
 1002|   275M|	uint32_t cache = BitstreamShowBits(bs, 32);
 1003|       |	
 1004|   275M|	if (short_video_header)		/* inter-VLCs will be used for both intra and inter blocks */
  ------------------
  |  Branch (1004:6): [True: 0, False: 275M]
  ------------------
 1005|      0|		intra = 0;
 1006|       |
 1007|   275M|	if (GET_BITS(cache, 7) != ESCAPE) {
  ------------------
  |  |  988|   275M|#define GET_BITS(cache, n) ((cache)>>(32-(n)))
  ------------------
              	if (GET_BITS(cache, 7) != ESCAPE) {
  ------------------
  |  |   32|   275M|#define ESCAPE  3
  ------------------
  |  Branch (1007:6): [True: 275M, False: 17.2k]
  ------------------
 1008|   275M|		reverse_event = &DCT3D[intra][GET_BITS(cache, 12)];
  ------------------
  |  |  988|   275M|#define GET_BITS(cache, n) ((cache)>>(32-(n)))
  ------------------
 1009|       |
 1010|   275M|		if ((level = reverse_event->event.level) == 0)
  ------------------
  |  Branch (1010:7): [True: 272M, False: 2.96M]
  ------------------
 1011|   272M|			goto error;
 1012|       |
 1013|  2.96M|		*last = reverse_event->event.last;
 1014|  2.96M|		*run  = reverse_event->event.run;
 1015|       |
 1016|       |		/* Don't forget to update the bitstream position */
 1017|  2.96M|		BitstreamSkip(bs, reverse_event->len+1);
 1018|       |
 1019|  2.96M|		return (GET_BITS(cache, reverse_event->len+1)&0x01) ? -level : level;
  ------------------
  |  |  988|  2.96M|#define GET_BITS(cache, n) ((cache)>>(32-(n)))
  ------------------
  |  Branch (1019:10): [True: 2.04M, False: 923k]
  ------------------
 1020|   275M|	}
 1021|       |
 1022|       |	/* flush 7bits of cache */
 1023|  17.2k|	cache <<= 7;
 1024|       |
 1025|  17.2k|	if (short_video_header) {
  ------------------
  |  Branch (1025:6): [True: 0, False: 17.2k]
  ------------------
 1026|       |		/* escape mode 4 - H.263 type, only used if short_video_header = 1  */
 1027|      0|		*last =  GET_BITS(cache, 1);
  ------------------
  |  |  988|      0|#define GET_BITS(cache, n) ((cache)>>(32-(n)))
  ------------------
 1028|      0|		*run  = (GET_BITS(cache, 7) &0x3f);
  ------------------
  |  |  988|      0|#define GET_BITS(cache, n) ((cache)>>(32-(n)))
  ------------------
 1029|      0|		level = (GET_BITS(cache, 15)&0xff);
  ------------------
  |  |  988|      0|#define GET_BITS(cache, n) ((cache)>>(32-(n)))
  ------------------
 1030|       |
 1031|      0|		if (level == 0 || level == 128)
  ------------------
  |  Branch (1031:7): [True: 0, False: 0]
  |  Branch (1031:21): [True: 0, False: 0]
  ------------------
 1032|      0|			DPRINTF(XVID_DEBUG_ERROR, "Illegal LEVEL for ESCAPE mode 4: %d\n", level);
  ------------------
  |  |  196|      0|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
 1033|       |
 1034|       |		/* We've "eaten" 22 bits */
 1035|      0|		BitstreamSkip(bs, 22);
 1036|       |
 1037|      0|		return (level << 24) >> 24;
 1038|      0|	}
 1039|       |
 1040|  17.2k|	if ((mode = GET_BITS(cache, 2)) < 3) {
  ------------------
  |  |  988|  17.2k|#define GET_BITS(cache, n) ((cache)>>(32-(n)))
  ------------------
  |  Branch (1040:6): [True: 15.3k, False: 1.91k]
  ------------------
 1041|  15.3k|		const int skip[3] = {1, 1, 2};
 1042|  15.3k|		cache <<= skip[mode];
 1043|       |
 1044|  15.3k|		reverse_event = &DCT3D[intra][GET_BITS(cache, 12)];
  ------------------
  |  |  988|  15.3k|#define GET_BITS(cache, n) ((cache)>>(32-(n)))
  ------------------
 1045|       |
 1046|  15.3k|		if ((level = reverse_event->event.level) == 0)
  ------------------
  |  Branch (1046:7): [True: 6.83k, False: 8.51k]
  ------------------
 1047|  6.83k|			goto error;
 1048|       |
 1049|  8.51k|		*last = reverse_event->event.last;
 1050|  8.51k|		*run  = reverse_event->event.run;
 1051|       |
 1052|  8.51k|		if (mode < 2) {
  ------------------
  |  Branch (1052:7): [True: 6.60k, False: 1.91k]
  ------------------
 1053|       |			/* first escape mode, level is offset */
 1054|  6.60k|			level += max_level[intra][*last][*run];
 1055|  6.60k|		} else {
 1056|       |			/* second escape mode, run is offset */
 1057|  1.91k|			*run += max_run[intra][*last][level] + 1;
 1058|  1.91k|		}
 1059|       |		
 1060|       |		/* Update bitstream position */
 1061|  8.51k|		BitstreamSkip(bs, 7 + skip[mode] + reverse_event->len + 1);
 1062|       |
 1063|  8.51k|		return (GET_BITS(cache, reverse_event->len+1)&0x01) ? -level : level;
  ------------------
  |  |  988|  8.51k|#define GET_BITS(cache, n) ((cache)>>(32-(n)))
  ------------------
  |  Branch (1063:10): [True: 3.24k, False: 5.26k]
  ------------------
 1064|  15.3k|	}
 1065|       |
 1066|       |	/* third escape mode - fixed length codes */
 1067|  1.91k|	cache <<= 2;
 1068|  1.91k|	*last =  GET_BITS(cache, 1);
  ------------------
  |  |  988|  1.91k|#define GET_BITS(cache, n) ((cache)>>(32-(n)))
  ------------------
 1069|  1.91k|	*run  = (GET_BITS(cache, 7)&0x3f);
  ------------------
  |  |  988|  1.91k|#define GET_BITS(cache, n) ((cache)>>(32-(n)))
  ------------------
 1070|  1.91k|	level = (GET_BITS(cache, 20)&0xfff);
  ------------------
  |  |  988|  1.91k|#define GET_BITS(cache, n) ((cache)>>(32-(n)))
  ------------------
 1071|       |	
 1072|       |	/* Update bitstream position */
 1073|  1.91k|	BitstreamSkip(bs, 30);
 1074|       |
 1075|  1.91k|	return (level << 20) >> 20;
 1076|       |
 1077|   272M|  error:
 1078|   272M|	*run = 64;
 1079|   272M|	return 0;
 1080|  17.2k|}

idct_int32:
  311|   273M|{
  312|   273M|  int i, Rows = 0x07;
  313|       |
  314|   273M|  Idct_Row(In + 0*8, Tab04, Rnd0);
  ------------------
  |  |   49|   273M|#define Rnd0 65536 // 1<<(COL_SHIFT+ROW_SHIFT-1);
  ------------------
  315|   273M|  Idct_Row(In + 1*8, Tab17, Rnd1);
  ------------------
  |  |   50|   273M|#define Rnd1 3597  // FIX (1.75683487303);
  ------------------
  316|   273M|  Idct_Row(In + 2*8, Tab26, Rnd2);
  ------------------
  |  |   51|   273M|#define Rnd2 2260  // FIX (1.10355339059);
  ------------------
  317|   273M|  if (Idct_Row(In + 3*8, Tab35, Rnd3)) Rows |= 0x08;
  ------------------
  |  |   52|   273M|#define Rnd3 1203  // FIX (0.587788325588);
  ------------------
  |  Branch (317:7): [True: 209k, False: 272M]
  ------------------
  318|   273M|  if (Idct_Row(In + 4*8, Tab04, Rnd4)) Rows |= 0x10;
  ------------------
  |  |   53|   273M|#define Rnd4 0
  ------------------
  |  Branch (318:7): [True: 84.7k, False: 273M]
  ------------------
  319|   273M|  if (Idct_Row(In + 5*8, Tab35, Rnd5)) Rows |= 0x20;
  ------------------
  |  |   54|   273M|#define Rnd5 120   // FIX (0.058658283817);
  ------------------
  |  Branch (319:7): [True: 53.8k, False: 273M]
  ------------------
  320|   273M|  if (Idct_Row(In + 6*8, Tab26, Rnd6)) Rows |= 0x40;
  ------------------
  |  |   55|   273M|#define Rnd6 512   // FIX (0.25);
  ------------------
  |  Branch (320:7): [True: 43.7k, False: 273M]
  ------------------
  321|   273M|  if (Idct_Row(In + 7*8, Tab17, Rnd7)) Rows |= 0x80;
  ------------------
  |  |   56|   273M|#define Rnd7 512   // FIX (0.25);
  ------------------
  |  Branch (321:7): [True: 300k, False: 272M]
  ------------------
  322|       |
  323|   273M|  if (Rows&0xf0) {
  ------------------
  |  Branch (323:7): [True: 375k, False: 272M]
  ------------------
  324|  3.37M|    for(i=0; i<8; i++)
  ------------------
  |  Branch (324:14): [True: 3.00M, False: 375k]
  ------------------
  325|  3.00M|      Idct_Col_8(In + i);
  326|   375k|  }
  327|   272M|  else if (Rows&0x08) {
  ------------------
  |  Branch (327:12): [True: 131k, False: 272M]
  ------------------
  328|  1.17M|    for(i=0; i<8; i++)
  ------------------
  |  Branch (328:14): [True: 1.04M, False: 131k]
  ------------------
  329|  1.04M|      Idct_Col_4(In + i);
  330|   131k|  }
  331|   272M|  else {
  332|  2.45G|    for(i=0; i<8; i++)
  ------------------
  |  Branch (332:14): [True: 2.18G, False: 272M]
  ------------------
  333|  2.18G|      Idct_Col_3(In + i);
  334|   272M|  }
  335|   273M|}
idct.c:Idct_Row:
   65|  2.18G|{
   66|  2.18G|  const int C1 = Tab[0];
   67|  2.18G|  const int C2 = Tab[1];
   68|  2.18G|  const int C3 = Tab[2];
   69|  2.18G|  const int C4 = Tab[3];
   70|  2.18G|  const int C5 = Tab[4];
   71|  2.18G|  const int C6 = Tab[5];
   72|  2.18G|  const int C7 = Tab[6];
   73|       |
   74|  2.18G|  const int Right = In[5]|In[6]|In[7];
   75|  2.18G|  const int Left  = In[1]|In[2]|In[3];
   76|  2.18G|  if (!(Right | In[4]))
  ------------------
  |  Branch (76:7): [True: 2.18G, False: 444k]
  ------------------
   77|  2.18G|  {
   78|  2.18G|    const int K = C4*In[0] + Rnd;
   79|  2.18G|    if (Left)
  ------------------
  |  Branch (79:9): [True: 848k, False: 2.18G]
  ------------------
   80|   848k|    {
   81|   848k|      const int a0 = K + C2*In[2];
   82|   848k|      const int a1 = K + C6*In[2];
   83|   848k|      const int a2 = K - C6*In[2];
   84|   848k|      const int a3 = K - C2*In[2];
   85|       |
   86|   848k|      const int b0 = C1*In[1] + C3*In[3];
   87|   848k|      const int b1 = C3*In[1] - C7*In[3];
   88|   848k|      const int b2 = C5*In[1] - C1*In[3];
   89|   848k|      const int b3 = C7*In[1] - C5*In[3];
   90|       |
   91|   848k|      In[0] = (a0 + b0) >> ROW_SHIFT;
  ------------------
  |  |   45|   848k|#define ROW_SHIFT 11
  ------------------
   92|   848k|      In[1] = (a1 + b1) >> ROW_SHIFT;
  ------------------
  |  |   45|   848k|#define ROW_SHIFT 11
  ------------------
   93|   848k|      In[2] = (a2 + b2) >> ROW_SHIFT;
  ------------------
  |  |   45|   848k|#define ROW_SHIFT 11
  ------------------
   94|   848k|      In[3] = (a3 + b3) >> ROW_SHIFT;
  ------------------
  |  |   45|   848k|#define ROW_SHIFT 11
  ------------------
   95|   848k|      In[4] = (a3 - b3) >> ROW_SHIFT;
  ------------------
  |  |   45|   848k|#define ROW_SHIFT 11
  ------------------
   96|   848k|      In[5] = (a2 - b2) >> ROW_SHIFT;
  ------------------
  |  |   45|   848k|#define ROW_SHIFT 11
  ------------------
   97|   848k|      In[6] = (a1 - b1) >> ROW_SHIFT;
  ------------------
  |  |   45|   848k|#define ROW_SHIFT 11
  ------------------
   98|   848k|      In[7] = (a0 - b0) >> ROW_SHIFT;
  ------------------
  |  |   45|   848k|#define ROW_SHIFT 11
  ------------------
   99|   848k|    }
  100|  2.18G|    else
  101|  2.18G|    {
  102|  2.18G|      const int a0 = K >> ROW_SHIFT;
  ------------------
  |  |   45|  2.18G|#define ROW_SHIFT 11
  ------------------
  103|  2.18G|      if (a0) {
  ------------------
  |  Branch (103:11): [True: 818M, False: 1.36G]
  ------------------
  104|   818M|        In[0] = In[1] = In[2] = In[3] =
  105|   818M|        In[4] = In[5] = In[6] = In[7] = a0;
  106|   818M|      }
  107|  1.36G|      else return 0;
  108|  2.18G|    }
  109|  2.18G|  }
  110|   444k|  else if (!(Left|Right))
  ------------------
  |  Branch (110:12): [True: 18.8k, False: 425k]
  ------------------
  111|  18.8k|  {
  112|  18.8k|    const int a0 = (Rnd + C4*(In[0]+In[4])) >> ROW_SHIFT;
  ------------------
  |  |   45|  18.8k|#define ROW_SHIFT 11
  ------------------
  113|  18.8k|    const int a1 = (Rnd + C4*(In[0]-In[4])) >> ROW_SHIFT;
  ------------------
  |  |   45|  18.8k|#define ROW_SHIFT 11
  ------------------
  114|       |
  115|  18.8k|    In[0] = a0;
  116|  18.8k|    In[3] = a0;
  117|  18.8k|    In[4] = a0;
  118|  18.8k|    In[7] = a0;
  119|  18.8k|    In[1] = a1;
  120|  18.8k|    In[2] = a1;
  121|  18.8k|    In[5] = a1;
  122|  18.8k|    In[6] = a1;
  123|  18.8k|  }
  124|   425k|  else
  125|   425k|  {
  126|   425k|    const int K = C4*In[0] + Rnd;
  127|   425k|    const int a0 = K + C2*In[2] + C4*In[4] + C6*In[6];
  128|   425k|    const int a1 = K + C6*In[2] - C4*In[4] - C2*In[6];
  129|   425k|    const int a2 = K - C6*In[2] - C4*In[4] + C2*In[6];
  130|   425k|    const int a3 = K - C2*In[2] + C4*In[4] - C6*In[6];
  131|       |
  132|   425k|    const int b0 = C1*In[1] + C3*In[3] + C5*In[5] + C7*In[7];
  133|   425k|    const int b1 = C3*In[1] - C7*In[3] - C1*In[5] - C5*In[7];
  134|   425k|    const int b2 = C5*In[1] - C1*In[3] + C7*In[5] + C3*In[7];
  135|   425k|    const int b3 = C7*In[1] - C5*In[3] + C3*In[5] - C1*In[7];
  136|       |
  137|   425k|    In[0] = (a0 + b0) >> ROW_SHIFT;
  ------------------
  |  |   45|   425k|#define ROW_SHIFT 11
  ------------------
  138|   425k|    In[1] = (a1 + b1) >> ROW_SHIFT;
  ------------------
  |  |   45|   425k|#define ROW_SHIFT 11
  ------------------
  139|   425k|    In[2] = (a2 + b2) >> ROW_SHIFT;
  ------------------
  |  |   45|   425k|#define ROW_SHIFT 11
  ------------------
  140|   425k|    In[3] = (a3 + b3) >> ROW_SHIFT;
  ------------------
  |  |   45|   425k|#define ROW_SHIFT 11
  ------------------
  141|   425k|    In[4] = (a3 - b3) >> ROW_SHIFT;
  ------------------
  |  |   45|   425k|#define ROW_SHIFT 11
  ------------------
  142|   425k|    In[5] = (a2 - b2) >> ROW_SHIFT;
  ------------------
  |  |   45|   425k|#define ROW_SHIFT 11
  ------------------
  143|   425k|    In[6] = (a1 - b1) >> ROW_SHIFT;
  ------------------
  |  |   45|   425k|#define ROW_SHIFT 11
  ------------------
  144|   425k|    In[7] = (a0 - b0) >> ROW_SHIFT;
  ------------------
  |  |   45|   425k|#define ROW_SHIFT 11
  ------------------
  145|   425k|  }
  146|   820M|  return 1;
  147|  2.18G|}
idct.c:Idct_Col_8:
  168|  3.00M|{
  169|  3.00M|  int mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, Spill;
  170|       |
  171|       |    // odd
  172|       |
  173|  3.00M|  mm4 = (int)In[7*8];
  174|  3.00M|  mm5 = (int)In[5*8];
  175|  3.00M|  mm6 = (int)In[3*8];
  176|  3.00M|  mm7 = (int)In[1*8];
  177|       |
  178|  3.00M|  mm0 = MULT(Tan1, mm4, 16) + mm7;
  ------------------
  |  |  154|  3.00M|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  179|  3.00M|  mm1 = MULT(Tan1, mm7, 16) - mm4;
  ------------------
  |  |  154|  3.00M|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  180|  3.00M|  mm2 = MULT(Tan3, mm5, 16) + mm6;
  ------------------
  |  |  154|  3.00M|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  181|  3.00M|  mm3 = MULT(Tan3, mm6, 16) - mm5;
  ------------------
  |  |  154|  3.00M|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  182|       |
  183|  3.00M|  mm7 = mm0 + mm2;
  184|  3.00M|  mm4 = mm1 - mm3;
  185|  3.00M|  mm0 = mm0 - mm2;
  186|  3.00M|  mm1 = mm1 + mm3;
  187|  3.00M|  mm6 = mm0 + mm1;
  188|  3.00M|  mm5 = mm0 - mm1;
  189|  3.00M|  mm5 = 2*MULT(Sqrt2, mm5, 16);  // 2*sqrt2
  ------------------
  |  |  154|  3.00M|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  190|  3.00M|  mm6 = 2*MULT(Sqrt2, mm6, 16);  // Watch out: precision loss but done to match
  ------------------
  |  |  154|  3.00M|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  191|       |                                 // the pmulhw used in mmx/sse versions
  192|       |  
  193|       |    // even
  194|       |
  195|  3.00M|  mm1 = (int)In[2*8];
  196|  3.00M|  mm2 = (int)In[6*8];
  197|  3.00M|  mm3 = MULT(Tan2,mm2, 16) + mm1;
  ------------------
  |  |  154|  3.00M|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  198|  3.00M|  mm2 = MULT(Tan2,mm1, 16) - mm2;
  ------------------
  |  |  154|  3.00M|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  199|       |
  200|  3.00M|  LOAD_BUTF(mm0, mm1, 0*8, 4*8, Spill, In);
  ------------------
  |  |  164|  3.00M|  (m1) = (S)[(a)] + (S)[(b)];           \
  |  |  165|  3.00M|  (m2) = (S)[(a)] - (S)[(b)]
  ------------------
  201|       |
  202|  3.00M|  BUTF(mm0, mm3, Spill);
  ------------------
  |  |  159|  3.00M|  (tmp) = (a)+(b);      \
  |  |  160|  3.00M|  (b)   = (a)-(b);      \
  |  |  161|  3.00M|  (a)   = (tmp)
  ------------------
  203|  3.00M|  BUTF(mm0, mm7, Spill);
  ------------------
  |  |  159|  3.00M|  (tmp) = (a)+(b);      \
  |  |  160|  3.00M|  (b)   = (a)-(b);      \
  |  |  161|  3.00M|  (a)   = (tmp)
  ------------------
  204|  3.00M|  In[8*0] = (int16_t) (mm0 >> COL_SHIFT);
  ------------------
  |  |   46|  3.00M|#define COL_SHIFT 6
  ------------------
  205|  3.00M|  In[8*7] = (int16_t) (mm7 >> COL_SHIFT);
  ------------------
  |  |   46|  3.00M|#define COL_SHIFT 6
  ------------------
  206|  3.00M|  BUTF(mm3, mm4, mm0);
  ------------------
  |  |  159|  3.00M|  (tmp) = (a)+(b);      \
  |  |  160|  3.00M|  (b)   = (a)-(b);      \
  |  |  161|  3.00M|  (a)   = (tmp)
  ------------------
  207|  3.00M|  In[8*3] = (int16_t) (mm3 >> COL_SHIFT);
  ------------------
  |  |   46|  3.00M|#define COL_SHIFT 6
  ------------------
  208|  3.00M|  In[8*4] = (int16_t) (mm4 >> COL_SHIFT);
  ------------------
  |  |   46|  3.00M|#define COL_SHIFT 6
  ------------------
  209|       |
  210|  3.00M|  BUTF(mm1, mm2, mm0);
  ------------------
  |  |  159|  3.00M|  (tmp) = (a)+(b);      \
  |  |  160|  3.00M|  (b)   = (a)-(b);      \
  |  |  161|  3.00M|  (a)   = (tmp)
  ------------------
  211|  3.00M|  BUTF(mm1, mm6, mm0);
  ------------------
  |  |  159|  3.00M|  (tmp) = (a)+(b);      \
  |  |  160|  3.00M|  (b)   = (a)-(b);      \
  |  |  161|  3.00M|  (a)   = (tmp)
  ------------------
  212|  3.00M|  In[8*1] = (int16_t) (mm1 >> COL_SHIFT);
  ------------------
  |  |   46|  3.00M|#define COL_SHIFT 6
  ------------------
  213|  3.00M|  In[8*6] = (int16_t) (mm6 >> COL_SHIFT);
  ------------------
  |  |   46|  3.00M|#define COL_SHIFT 6
  ------------------
  214|  3.00M|  BUTF(mm2, mm5, mm0);
  ------------------
  |  |  159|  3.00M|  (tmp) = (a)+(b);      \
  |  |  160|  3.00M|  (b)   = (a)-(b);      \
  |  |  161|  3.00M|  (a)   = (tmp)
  ------------------
  215|  3.00M|  In[8*2] = (int16_t) (mm2 >> COL_SHIFT);
  ------------------
  |  |   46|  3.00M|#define COL_SHIFT 6
  ------------------
  216|  3.00M|  In[8*5] = (int16_t) (mm5 >> COL_SHIFT);
  ------------------
  |  |   46|  3.00M|#define COL_SHIFT 6
  ------------------
  217|  3.00M|}
idct.c:Idct_Col_4:
  220|  1.04M|{
  221|  1.04M|  int mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, Spill;
  222|       |
  223|       |    // odd
  224|       |
  225|  1.04M|  mm0 = (int)In[1*8];
  226|  1.04M|  mm2 = (int)In[3*8];
  227|       |
  228|  1.04M|  mm1 = MULT(Tan1, mm0, 16);
  ------------------
  |  |  154|  1.04M|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  229|  1.04M|  mm3 = MULT(Tan3, mm2, 16);
  ------------------
  |  |  154|  1.04M|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  230|       |
  231|  1.04M|  mm7 = mm0 + mm2;
  232|  1.04M|  mm4 = mm1 - mm3;
  233|  1.04M|  mm0 = mm0 - mm2;
  234|  1.04M|  mm1 = mm1 + mm3;
  235|  1.04M|  mm6 = mm0 + mm1;
  236|  1.04M|  mm5 = mm0 - mm1;
  237|  1.04M|  mm6 = 2*MULT(Sqrt2, mm6, 16);  // 2*sqrt2
  ------------------
  |  |  154|  1.04M|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  238|  1.04M|  mm5 = 2*MULT(Sqrt2, mm5, 16);
  ------------------
  |  |  154|  1.04M|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  239|       |
  240|       |    // even
  241|       |
  242|  1.04M|  mm0 = mm1 = (int)In[0*8];
  243|  1.04M|  mm3 = (int)In[2*8];
  244|  1.04M|  mm2 = MULT(Tan2,mm3, 16);
  ------------------
  |  |  154|  1.04M|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  245|       |
  246|  1.04M|  BUTF(mm0, mm3, Spill);
  ------------------
  |  |  159|  1.04M|  (tmp) = (a)+(b);      \
  |  |  160|  1.04M|  (b)   = (a)-(b);      \
  |  |  161|  1.04M|  (a)   = (tmp)
  ------------------
  247|  1.04M|  BUTF(mm0, mm7, Spill);
  ------------------
  |  |  159|  1.04M|  (tmp) = (a)+(b);      \
  |  |  160|  1.04M|  (b)   = (a)-(b);      \
  |  |  161|  1.04M|  (a)   = (tmp)
  ------------------
  248|  1.04M|  In[8*0] = (int16_t) (mm0 >> COL_SHIFT);
  ------------------
  |  |   46|  1.04M|#define COL_SHIFT 6
  ------------------
  249|  1.04M|  In[8*7] = (int16_t) (mm7 >> COL_SHIFT);
  ------------------
  |  |   46|  1.04M|#define COL_SHIFT 6
  ------------------
  250|  1.04M|  BUTF(mm3, mm4, mm0);
  ------------------
  |  |  159|  1.04M|  (tmp) = (a)+(b);      \
  |  |  160|  1.04M|  (b)   = (a)-(b);      \
  |  |  161|  1.04M|  (a)   = (tmp)
  ------------------
  251|  1.04M|  In[8*3] = (int16_t) (mm3 >> COL_SHIFT);
  ------------------
  |  |   46|  1.04M|#define COL_SHIFT 6
  ------------------
  252|  1.04M|  In[8*4] = (int16_t) (mm4 >> COL_SHIFT);
  ------------------
  |  |   46|  1.04M|#define COL_SHIFT 6
  ------------------
  253|       |
  254|  1.04M|  BUTF(mm1, mm2, mm0);
  ------------------
  |  |  159|  1.04M|  (tmp) = (a)+(b);      \
  |  |  160|  1.04M|  (b)   = (a)-(b);      \
  |  |  161|  1.04M|  (a)   = (tmp)
  ------------------
  255|  1.04M|  BUTF(mm1, mm6, mm0);
  ------------------
  |  |  159|  1.04M|  (tmp) = (a)+(b);      \
  |  |  160|  1.04M|  (b)   = (a)-(b);      \
  |  |  161|  1.04M|  (a)   = (tmp)
  ------------------
  256|  1.04M|  In[8*1] = (int16_t) (mm1 >> COL_SHIFT);
  ------------------
  |  |   46|  1.04M|#define COL_SHIFT 6
  ------------------
  257|  1.04M|  In[8*6] = (int16_t) (mm6 >> COL_SHIFT);
  ------------------
  |  |   46|  1.04M|#define COL_SHIFT 6
  ------------------
  258|  1.04M|  BUTF(mm2, mm5, mm0);
  ------------------
  |  |  159|  1.04M|  (tmp) = (a)+(b);      \
  |  |  160|  1.04M|  (b)   = (a)-(b);      \
  |  |  161|  1.04M|  (a)   = (tmp)
  ------------------
  259|  1.04M|  In[8*2] = (int16_t) (mm2 >> COL_SHIFT);
  ------------------
  |  |   46|  1.04M|#define COL_SHIFT 6
  ------------------
  260|  1.04M|  In[8*5] = (int16_t) (mm5 >> COL_SHIFT);
  ------------------
  |  |   46|  1.04M|#define COL_SHIFT 6
  ------------------
  261|  1.04M|}
idct.c:Idct_Col_3:
  264|  2.18G|{
  265|  2.18G|  int mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, Spill;
  266|       |
  267|       |    // odd
  268|       |
  269|  2.18G|  mm7 = (int)In[1*8];
  270|  2.18G|  mm4 = MULT(Tan1, mm7, 16);
  ------------------
  |  |  154|  2.18G|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  271|       |
  272|  2.18G|  mm6 = mm7 + mm4;
  273|  2.18G|  mm5 = mm7 - mm4;
  274|  2.18G|  mm6 = 2*MULT(Sqrt2, mm6, 16);  // 2*sqrt2
  ------------------
  |  |  154|  2.18G|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  275|  2.18G|  mm5 = 2*MULT(Sqrt2, mm5, 16);
  ------------------
  |  |  154|  2.18G|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  276|       |
  277|       |    // even
  278|       |
  279|  2.18G|  mm0 = mm1 = (int)In[0*8];
  280|  2.18G|  mm3 = (int)In[2*8];
  281|  2.18G|  mm2 = MULT(Tan2,mm3, 16);
  ------------------
  |  |  154|  2.18G|#define MULT(c,x, n)  ( ((c) * (x)) >> (n) )
  ------------------
  282|       |
  283|  2.18G|  BUTF(mm0, mm3, Spill);
  ------------------
  |  |  159|  2.18G|  (tmp) = (a)+(b);      \
  |  |  160|  2.18G|  (b)   = (a)-(b);      \
  |  |  161|  2.18G|  (a)   = (tmp)
  ------------------
  284|  2.18G|  BUTF(mm0, mm7, Spill);
  ------------------
  |  |  159|  2.18G|  (tmp) = (a)+(b);      \
  |  |  160|  2.18G|  (b)   = (a)-(b);      \
  |  |  161|  2.18G|  (a)   = (tmp)
  ------------------
  285|  2.18G|  In[8*0] = (int16_t) (mm0 >> COL_SHIFT);
  ------------------
  |  |   46|  2.18G|#define COL_SHIFT 6
  ------------------
  286|  2.18G|  In[8*7] = (int16_t) (mm7 >> COL_SHIFT);
  ------------------
  |  |   46|  2.18G|#define COL_SHIFT 6
  ------------------
  287|  2.18G|  BUTF(mm3, mm4, mm0);
  ------------------
  |  |  159|  2.18G|  (tmp) = (a)+(b);      \
  |  |  160|  2.18G|  (b)   = (a)-(b);      \
  |  |  161|  2.18G|  (a)   = (tmp)
  ------------------
  288|  2.18G|  In[8*3] = (int16_t) (mm3 >> COL_SHIFT);
  ------------------
  |  |   46|  2.18G|#define COL_SHIFT 6
  ------------------
  289|  2.18G|  In[8*4] = (int16_t) (mm4 >> COL_SHIFT);
  ------------------
  |  |   46|  2.18G|#define COL_SHIFT 6
  ------------------
  290|       |
  291|  2.18G|  BUTF(mm1, mm2, mm0);
  ------------------
  |  |  159|  2.18G|  (tmp) = (a)+(b);      \
  |  |  160|  2.18G|  (b)   = (a)-(b);      \
  |  |  161|  2.18G|  (a)   = (tmp)
  ------------------
  292|  2.18G|  BUTF(mm1, mm6, mm0);
  ------------------
  |  |  159|  2.18G|  (tmp) = (a)+(b);      \
  |  |  160|  2.18G|  (b)   = (a)-(b);      \
  |  |  161|  2.18G|  (a)   = (tmp)
  ------------------
  293|  2.18G|  In[8*1] = (int16_t) (mm1 >> COL_SHIFT);
  ------------------
  |  |   46|  2.18G|#define COL_SHIFT 6
  ------------------
  294|  2.18G|  In[8*6] = (int16_t) (mm6 >> COL_SHIFT);
  ------------------
  |  |   46|  2.18G|#define COL_SHIFT 6
  ------------------
  295|  2.18G|  BUTF(mm2, mm5, mm0);
  ------------------
  |  |  159|  2.18G|  (tmp) = (a)+(b);      \
  |  |  160|  2.18G|  (b)   = (a)-(b);      \
  |  |  161|  2.18G|  (a)   = (tmp)
  ------------------
  296|  2.18G|  In[8*2] = (int16_t) (mm2 >> COL_SHIFT);
  ------------------
  |  |   46|  2.18G|#define COL_SHIFT 6
  ------------------
  297|  2.18G|  In[8*5] = (int16_t) (mm5 >> COL_SHIFT);
  ------------------
  |  |   46|  2.18G|#define COL_SHIFT 6
  ------------------
  298|  2.18G|}

decoder_create:
  151|  10.2k|{
  152|  10.2k|  int ret = 0;
  153|  10.2k|  DECODER *dec;
  154|       |
  155|  10.2k|  if (XVID_VERSION_MAJOR(create->version) != 1) /* v1.x.x */
  ------------------
  |  |   63|  10.2k|#define XVID_VERSION_MAJOR(a)    ((char)(((a)>>16) & 0xff))
  ------------------
  |  Branch (155:7): [True: 0, False: 10.2k]
  ------------------
  156|      0|    return XVID_ERR_VERSION;
  ------------------
  |  |   98|      0|#define XVID_ERR_VERSION	-4		/* structure version not supported */
  ------------------
  157|       |
  158|  10.2k|  dec = xvid_malloc(sizeof(DECODER), CACHE_LINE);
  ------------------
  |  |  131|  10.2k|#    define CACHE_LINE  64
  ------------------
  159|  10.2k|  if (dec == NULL) {
  ------------------
  |  Branch (159:7): [True: 0, False: 10.2k]
  ------------------
  160|      0|    return XVID_ERR_MEMORY;
  ------------------
  |  |   96|      0|#define XVID_ERR_MEMORY		-2		/* memory allocation error */
  ------------------
  161|      0|  }
  162|       |
  163|  10.2k|  memset(dec, 0, sizeof(DECODER));
  164|       |
  165|  10.2k|  dec->mpeg_quant_matrices = xvid_malloc(sizeof(uint16_t) * 64 * 8, CACHE_LINE);
  ------------------
  |  |  131|  10.2k|#    define CACHE_LINE  64
  ------------------
  166|  10.2k|  if (dec->mpeg_quant_matrices == NULL) {
  ------------------
  |  Branch (166:7): [True: 0, False: 10.2k]
  ------------------
  167|      0|    xvid_free(dec);
  168|      0|    return XVID_ERR_MEMORY;
  ------------------
  |  |   96|      0|#define XVID_ERR_MEMORY		-2		/* memory allocation error */
  ------------------
  169|      0|  }
  170|       |
  171|  10.2k|  create->handle = dec;
  172|       |
  173|  10.2k|  dec->width = MAX(0, create->width);
  ------------------
  |  |   35|  10.2k|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (35:20): [True: 0, False: 10.2k]
  |  |  ------------------
  ------------------
  174|  10.2k|  dec->height = MAX(0, create->height);
  ------------------
  |  |   35|  10.2k|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (35:20): [True: 0, False: 10.2k]
  |  |  ------------------
  ------------------
  175|       |
  176|  10.2k|  dec->num_threads = MAX(0, create->num_threads);
  ------------------
  |  |   35|  10.2k|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (35:20): [True: 0, False: 10.2k]
  |  |  ------------------
  ------------------
  177|       |
  178|  10.2k|  image_null(&dec->cur);
  179|  10.2k|  image_null(&dec->refn[0]);
  180|  10.2k|  image_null(&dec->refn[1]);
  181|  10.2k|  image_null(&dec->tmp);
  182|  10.2k|  image_null(&dec->qtmp);
  183|       |
  184|       |  /* image based GMC */
  185|  10.2k|  image_null(&dec->gmc);
  186|       |
  187|  10.2k|  dec->mbs = NULL;
  188|  10.2k|  dec->last_mbs = NULL;
  189|  10.2k|  dec->qscale = NULL;
  190|       |
  191|  10.2k|  init_timer();
  192|  10.2k|  init_postproc(&dec->postproc);
  193|  10.2k|  init_mpeg_matrix(dec->mpeg_quant_matrices);
  194|       |
  195|       |  /* For B-frame support (used to save reference frame's time */
  196|  10.2k|  dec->frames = 0;
  197|  10.2k|  dec->time = dec->time_base = dec->last_time_base = 0;
  198|  10.2k|  dec->low_delay = 0;
  199|  10.2k|  dec->packed_mode = 0;
  200|  10.2k|  dec->time_inc_resolution = 1; /* until VOL header says otherwise */
  201|  10.2k|  dec->ver_id = 1;
  202|       |
  203|  10.2k|  if (create->fourcc == ((int)('X')|((int)('V')<<8)| 
  ------------------
  |  Branch (203:7): [True: 0, False: 10.2k]
  ------------------
  204|  10.2k|                         ((int)('I')<<16)|((int)('D')<<24))) { /* XVID */
  205|      0|    dec->bs_version = 0; /* Initially assume oldest xvid version */ 
  206|      0|  }
  207|  10.2k|  else {
  208|  10.2k|	dec->bs_version = 0xffff; /* Initialize to very high value -> assume bugfree stream */
  209|  10.2k|  }
  210|       |
  211|  10.2k|  dec->fixed_dimensions = (dec->width > 0 && dec->height > 0);
  ------------------
  |  Branch (211:28): [True: 0, False: 10.2k]
  |  Branch (211:46): [True: 0, False: 0]
  ------------------
  212|       |
  213|  10.2k|  ret = decoder_resize(dec);
  214|  10.2k|  if (ret == XVID_ERR_MEMORY) create->handle = NULL;
  ------------------
  |  |   96|  10.2k|#define XVID_ERR_MEMORY		-2		/* memory allocation error */
  ------------------
  |  Branch (214:7): [True: 0, False: 10.2k]
  ------------------
  215|       |
  216|  10.2k|  return ret;
  217|  10.2k|}
decoder_destroy:
  222|  10.2k|{
  223|  10.2k|  xvid_free(dec->last_mbs);
  224|  10.2k|  xvid_free(dec->mbs);
  225|  10.2k|  xvid_free(dec->qscale);
  226|       |
  227|       |  /* image based GMC */
  228|  10.2k|  image_destroy(&dec->gmc, dec->edged_width, dec->edged_height);
  229|       |
  230|  10.2k|  image_destroy(&dec->refn[0], dec->edged_width, dec->edged_height);
  231|  10.2k|  image_destroy(&dec->refn[1], dec->edged_width, dec->edged_height);
  232|  10.2k|  image_destroy(&dec->tmp, dec->edged_width, dec->edged_height);
  233|  10.2k|  image_destroy(&dec->qtmp, dec->edged_width, dec->edged_height);
  234|  10.2k|  image_destroy(&dec->cur, dec->edged_width, dec->edged_height);
  235|  10.2k|  xvid_free(dec->mpeg_quant_matrices);
  236|  10.2k|  xvid_free(dec);
  237|       |
  238|  10.2k|  write_timer();
  239|  10.2k|  return 0;
  240|  10.2k|}
decoder_decode:
 1566|  69.9k|{
 1567|       |
 1568|  69.9k|  Bitstream bs;
 1569|  69.9k|  uint32_t rounding = 0;
 1570|  69.9k|  uint32_t quant = 2;
 1571|  69.9k|  uint32_t fcode_forward = 0;
 1572|  69.9k|  uint32_t fcode_backward = 0;
 1573|  69.9k|  uint32_t intra_dc_threshold = 0;
 1574|  69.9k|  WARPPOINTS gmc_warp;
 1575|  69.9k|  int coding_type = -1;
 1576|  69.9k|  int success, output, seen_something;
 1577|       |
 1578|  69.9k|  if (XVID_VERSION_MAJOR(frame->version) != 1 || (stats && XVID_VERSION_MAJOR(stats->version) != 1))  /* v1.x.x */
  ------------------
  |  |   63|  69.9k|#define XVID_VERSION_MAJOR(a)    ((char)(((a)>>16) & 0xff))
  ------------------
                if (XVID_VERSION_MAJOR(frame->version) != 1 || (stats && XVID_VERSION_MAJOR(stats->version) != 1))  /* v1.x.x */
  ------------------
  |  |   63|  69.9k|#define XVID_VERSION_MAJOR(a)    ((char)(((a)>>16) & 0xff))
  ------------------
  |  Branch (1578:7): [True: 0, False: 69.9k]
  |  Branch (1578:51): [True: 69.9k, False: 0]
  |  Branch (1578:60): [True: 0, False: 69.9k]
  ------------------
 1579|      0|    return XVID_ERR_VERSION;
  ------------------
  |  |   98|      0|#define XVID_ERR_VERSION	-4		/* structure version not supported */
  ------------------
 1580|       |
 1581|  69.9k|  start_global_timer();
 1582|  69.9k|  memset((void *)&gmc_warp, 0, sizeof(WARPPOINTS));
 1583|       |
 1584|  69.9k|  dec->low_delay_default = (frame->general & XVID_LOWDELAY);
  ------------------
  |  |  268|  69.9k|#define XVID_LOWDELAY      (1<<0) /* lowdelay mode  */
  ------------------
 1585|  69.9k|  if ((frame->general & XVID_DISCONTINUITY))
  ------------------
  |  |  269|  69.9k|#define XVID_DISCONTINUITY (1<<1) /* indicates break in stream */
  ------------------
  |  Branch (1585:7): [True: 0, False: 69.9k]
  ------------------
 1586|      0|    dec->frames = 0;
 1587|  69.9k|  dec->out_frm = (frame->output.csp == XVID_CSP_SLICE) ? &frame->output : NULL;
  ------------------
  |  |  124|  69.9k|#define XVID_CSP_SLICE    (1<<12) /* decoder only: 4:2:0 planar, per slice rendering */
  ------------------
  |  Branch (1587:18): [True: 0, False: 69.9k]
  ------------------
 1588|       |
 1589|  69.9k|  if(frame->length<0) {  /* decoder flush */
  ------------------
  |  Branch (1589:6): [True: 0, False: 69.9k]
  ------------------
 1590|      0|    int ret;
 1591|       |    /* if not decoding "low_delay/packed", and this isn't low_delay and
 1592|       |      we have a reference frame, then outout the reference frame */
 1593|      0|    if (!(dec->low_delay_default && dec->packed_mode) && !dec->low_delay && dec->frames>0) {
  ------------------
  |  Branch (1593:11): [True: 0, False: 0]
  |  Branch (1593:37): [True: 0, False: 0]
  |  Branch (1593:58): [True: 0, False: 0]
  |  Branch (1593:77): [True: 0, False: 0]
  ------------------
 1594|      0|      decoder_output(dec, &dec->refn[0], dec->last_mbs, frame, stats, dec->last_coding_type, quant);
 1595|      0|      dec->frames = 0;
 1596|      0|      ret = 0;
 1597|      0|    } else {
 1598|      0|      if (stats) stats->type = XVID_TYPE_NOTHING;
  ------------------
  |  |  166|      0|#define XVID_TYPE_NOTHING  0 /* decoder only (encoder stats): nothing was decoded/encoded */
  ------------------
  |  Branch (1598:11): [True: 0, False: 0]
  ------------------
 1599|      0|      ret = XVID_ERR_END;
  ------------------
  |  |   99|      0|#define XVID_ERR_END		-5		/* encoder only; end of stream reached */
  ------------------
 1600|      0|    }
 1601|       |
 1602|      0|    emms();
 1603|      0|    stop_global_timer();
 1604|      0|    return ret;
 1605|      0|  }
 1606|       |
 1607|  69.9k|  BitstreamInit(&bs, frame->bitstream, frame->length);
 1608|       |
 1609|       |  /* XXX: 0x7f is only valid whilst decoding vfw xvid/divx5 avi's */
 1610|  69.9k|  if(dec->low_delay_default && frame->length == 1 && BitstreamShowBits(&bs, 8) == 0x7f)
  ------------------
  |  Branch (1610:6): [True: 0, False: 69.9k]
  |  Branch (1610:32): [True: 0, False: 0]
  |  Branch (1610:54): [True: 0, False: 0]
  ------------------
 1611|      0|  {
 1612|      0|    image_output(&dec->refn[0], dec->width, dec->height, dec->edged_width,
 1613|      0|           (uint8_t**)frame->output.plane, frame->output.stride, frame->output.csp, dec->interlacing);
 1614|      0|    if (stats) stats->type = XVID_TYPE_NOTHING;
  ------------------
  |  |  166|      0|#define XVID_TYPE_NOTHING  0 /* decoder only (encoder stats): nothing was decoded/encoded */
  ------------------
  |  Branch (1614:9): [True: 0, False: 0]
  ------------------
 1615|      0|    emms();
 1616|      0|    return 1; /* one byte consumed */
 1617|      0|  }
 1618|       |
 1619|  69.9k|  success = 0;
 1620|  69.9k|  output = 0;
 1621|  69.9k|  seen_something = 0;
 1622|       |
 1623|  1.86M|repeat:
 1624|       |
 1625|  1.86M|  coding_type = BitstreamReadHeaders(&bs, dec, &rounding,
 1626|  1.86M|      &quant, &fcode_forward, &fcode_backward, &intra_dc_threshold, &gmc_warp);
 1627|       |
 1628|  1.86M|  DPRINTF(XVID_DEBUG_HEADER, "coding_type=%i,  packed=%i,  time=%"
  ------------------
  |  |  198|  1.86M|#define XVID_DEBUG_HEADER    (1<< 2)
  ------------------
 1629|       |#if defined(_MSC_VER)
 1630|       |    "I64"
 1631|       |#else
 1632|  1.86M|    "ll"
 1633|  1.86M|#endif
 1634|  1.86M|    "i,  time_pp=%i,  time_bp=%i\n",
 1635|  1.86M|              coding_type,  dec->packed_mode, dec->time, dec->time_pp, dec->time_bp);
 1636|       |
 1637|  1.86M|  if (coding_type == -1) { /* nothing */
  ------------------
  |  Branch (1637:7): [True: 18.3k, False: 1.85M]
  ------------------
 1638|  18.3k|    if (success) goto done;
  ------------------
  |  Branch (1638:9): [True: 0, False: 18.3k]
  ------------------
 1639|  18.3k|    if (stats) stats->type = XVID_TYPE_NOTHING;
  ------------------
  |  |  166|  18.3k|#define XVID_TYPE_NOTHING  0 /* decoder only (encoder stats): nothing was decoded/encoded */
  ------------------
  |  Branch (1639:9): [True: 18.3k, False: 0]
  ------------------
 1640|  18.3k|    emms();
 1641|  18.3k|    return BitstreamPos(&bs)/8;
 1642|  18.3k|  }
 1643|       |
 1644|  1.85M|  if (coding_type == -2 || coding_type == -3) { /* vol and/or resize */
  ------------------
  |  Branch (1644:7): [True: 21.6k, False: 1.82M]
  |  Branch (1644:28): [True: 12.9k, False: 1.81M]
  ------------------
 1645|       |
 1646|  34.5k|    if (coding_type == -3)
  ------------------
  |  Branch (1646:9): [True: 12.9k, False: 21.6k]
  ------------------
 1647|  12.9k|      if (decoder_resize(dec)) return XVID_ERR_MEMORY;
  ------------------
  |  |   96|      0|#define XVID_ERR_MEMORY		-2		/* memory allocation error */
  ------------------
  |  Branch (1647:11): [True: 0, False: 12.9k]
  ------------------
 1648|       |
 1649|  34.5k|    if(stats) {
  ------------------
  |  Branch (1649:8): [True: 34.5k, False: 0]
  ------------------
 1650|  34.5k|      stats->type = XVID_TYPE_VOL;
  ------------------
  |  |  165|  34.5k|#define XVID_TYPE_VOL     -1 /* decoder only: vol was decoded */
  ------------------
 1651|  34.5k|      stats->data.vol.general = 0;
 1652|  34.5k|	  stats->data.vop.general = 0;
 1653|  34.5k|	  if (dec->interlacing) {
  ------------------
  |  Branch (1653:8): [True: 10.1k, False: 24.3k]
  ------------------
 1654|  10.1k|		stats->data.vol.general |= XVID_VOL_INTERLACING;
  ------------------
  |  |  661|  10.1k|#define XVID_VOL_INTERLACING    (1<<5) /* enable interlaced encoding */
  ------------------
 1655|  10.1k|		if (dec->top_field_first) {
  ------------------
  |  Branch (1655:7): [True: 929, False: 9.22k]
  ------------------
 1656|    929|	      stats->data.vop.general |= XVID_VOP_TOPFIELDFIRST;
  ------------------
  |  |  686|    929|#define XVID_VOP_TOPFIELDFIRST        (1<< 9) /* set top-field-first flag  */
  ------------------
 1657|    929|		}
 1658|  10.1k|	  }
 1659|  34.5k|      stats->data.vol.width = dec->width;
 1660|  34.5k|      stats->data.vol.height = dec->height;
 1661|  34.5k|      stats->data.vol.par = dec->aspect_ratio;
 1662|  34.5k|      stats->data.vol.par_width = dec->par_width;
 1663|  34.5k|      stats->data.vol.par_height = dec->par_height;
 1664|  34.5k|      emms();
 1665|  34.5k|      return BitstreamPos(&bs)/8; /* number of bytes consumed */
 1666|  34.5k|    }
 1667|      0|    goto repeat;
 1668|  34.5k|  }
 1669|       |
 1670|  1.81M|  if((dec->frames == 0 && coding_type != I_VOP) || (!dec->width || !dec->height)) {
  ------------------
  |  |  104|  1.80M|#define I_VOP	0
  ------------------
  |  Branch (1670:7): [True: 1.80M, False: 10.1k]
  |  Branch (1670:27): [True: 1.77M, False: 27.7k]
  |  Branch (1670:53): [True: 20.7k, False: 17.1k]
  |  Branch (1670:68): [True: 0, False: 17.1k]
  ------------------
 1671|       |    /* 1st frame is not an i-vop */
 1672|  1.79M|    goto repeat;
 1673|  1.79M|  }
 1674|       |
 1675|  17.1k|  dec->p_bmv.x = dec->p_bmv.y = dec->p_fmv.x = dec->p_fmv.y = 0;  /* init pred vector to 0 */
 1676|       |
 1677|       |  /* packed_mode: special-N_VOP treament */
 1678|  17.1k|  if (dec->packed_mode && coding_type == N_VOP) {
  ------------------
  |  |  108|    669|#define N_VOP	4
  ------------------
  |  Branch (1678:7): [True: 669, False: 16.4k]
  |  Branch (1678:27): [True: 254, False: 415]
  ------------------
 1679|    254|    if (dec->low_delay_default && dec->frames > 0) {
  ------------------
  |  Branch (1679:9): [True: 0, False: 254]
  |  Branch (1679:35): [True: 0, False: 0]
  ------------------
 1680|      0|      decoder_output(dec, &dec->refn[0], dec->last_mbs, frame, stats, dec->last_coding_type, quant);
 1681|      0|      output = 1;
 1682|      0|    }
 1683|       |    /* ignore otherwise */
 1684|  16.8k|  } else if (coding_type != B_VOP) {
  ------------------
  |  |  106|  16.8k|#define B_VOP	2
  ------------------
  |  Branch (1684:14): [True: 10.5k, False: 6.33k]
  ------------------
 1685|  10.5k|    switch(coding_type) {
  ------------------
  |  Branch (1685:12): [True: 0, False: 10.5k]
  ------------------
 1686|  7.00k|    case I_VOP :
  ------------------
  |  |  104|  7.00k|#define I_VOP	0
  ------------------
  |  Branch (1686:5): [True: 7.00k, False: 3.50k]
  ------------------
 1687|  7.00k|      decoder_iframe(dec, &bs, quant, intra_dc_threshold);
 1688|  7.00k|      break;
 1689|    988|    case P_VOP :
  ------------------
  |  |  105|    988|#define P_VOP	1
  ------------------
  |  Branch (1689:5): [True: 988, False: 9.51k]
  ------------------
 1690|    988|      decoder_pframe(dec, &bs, rounding, quant,
 1691|    988|                        fcode_forward, intra_dc_threshold, NULL);
 1692|    988|      break;
 1693|  2.47k|    case S_VOP :
  ------------------
  |  |  107|  2.47k|#define S_VOP	3
  ------------------
  |  Branch (1693:5): [True: 2.47k, False: 8.03k]
  ------------------
 1694|  2.47k|      decoder_pframe(dec, &bs, rounding, quant,
 1695|  2.47k|                        fcode_forward, intra_dc_threshold, &gmc_warp);
 1696|  2.47k|      break;
 1697|     42|    case N_VOP :
  ------------------
  |  |  108|     42|#define N_VOP	4
  ------------------
  |  Branch (1697:5): [True: 42, False: 10.4k]
  ------------------
 1698|       |      /* XXX: not_coded vops are not used for forward prediction */
 1699|       |      /* we should not swap(last_mbs,mbs) */
 1700|     42|      image_copy(&dec->cur, &dec->refn[0], dec->edged_width, dec->height);
 1701|     42|      SWAP(MACROBLOCK *, dec->mbs, dec->last_mbs); /* it will be swapped back */
  ------------------
  |  |  264|     42|#define SWAP(_T_,A,B)    { _T_ tmp = A; A = B; B = tmp; }
  ------------------
 1702|     42|      break;
 1703|  10.5k|    }
 1704|       |
 1705|       |    /* note: for packed_mode, output is performed when the special-N_VOP is decoded */
 1706|  10.5k|    if (!(dec->low_delay_default && dec->packed_mode)) {
  ------------------
  |  Branch (1706:11): [True: 0, False: 10.5k]
  |  Branch (1706:37): [True: 0, False: 0]
  ------------------
 1707|  10.5k|      if(dec->low_delay) {
  ------------------
  |  Branch (1707:10): [True: 286, False: 10.2k]
  ------------------
 1708|    286|        decoder_output(dec, &dec->cur, dec->mbs, frame, stats, coding_type, quant);
 1709|    286|        output = 1;
 1710|  10.2k|      } else if (dec->frames > 0) { /* is the reference frame valid? */
  ------------------
  |  Branch (1710:18): [True: 3.52k, False: 6.69k]
  ------------------
 1711|       |        /* output the reference frame */
 1712|  3.52k|        decoder_output(dec, &dec->refn[0], dec->last_mbs, frame, stats, dec->last_coding_type, quant);
 1713|  3.52k|        output = 1;
 1714|  3.52k|      }
 1715|  10.5k|    }
 1716|       |    
 1717|  10.5k|    image_swap(&dec->refn[0], &dec->refn[1]);
 1718|  10.5k|    dec->is_edged[1] = dec->is_edged[0];
 1719|  10.5k|    image_swap(&dec->cur, &dec->refn[0]);
 1720|  10.5k|    dec->is_edged[0] = 0;
 1721|  10.5k|    SWAP(MACROBLOCK *, dec->mbs, dec->last_mbs);
  ------------------
  |  |  264|  10.5k|#define SWAP(_T_,A,B)    { _T_ tmp = A; A = B; B = tmp; }
  ------------------
 1722|  10.5k|    dec->last_coding_type = coding_type;
 1723|       |
 1724|  10.5k|    dec->frames++;
 1725|  10.5k|    seen_something = 1;
 1726|       |
 1727|  10.5k|  } else {  /* B_VOP */
 1728|       |
 1729|  6.33k|    if (dec->low_delay) {
  ------------------
  |  Branch (1729:9): [True: 86, False: 6.25k]
  ------------------
 1730|     86|      DPRINTF(XVID_DEBUG_ERROR, "warning: bvop found in low_delay==1 stream\n");
  ------------------
  |  |  196|     86|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
 1731|     86|      dec->low_delay = 0;
 1732|     86|    }
 1733|       |
 1734|  6.33k|    if (dec->frames < 2) {
  ------------------
  |  Branch (1734:9): [True: 2.38k, False: 3.95k]
  ------------------
 1735|       |      /* attemping to decode a bvop without atleast 2 reference frames */
 1736|  2.38k|      image_printf(&dec->cur, dec->edged_width, dec->height, 16, 16,
 1737|  2.38k|            "broken b-frame, mising ref frames");
 1738|  2.38k|      if (stats) stats->type = XVID_TYPE_NOTHING;
  ------------------
  |  |  166|  2.38k|#define XVID_TYPE_NOTHING  0 /* decoder only (encoder stats): nothing was decoded/encoded */
  ------------------
  |  Branch (1738:11): [True: 2.38k, False: 0]
  ------------------
 1739|  3.95k|    } else if (dec->time_pp <= dec->time_bp) {
  ------------------
  |  Branch (1739:16): [True: 1.71k, False: 2.24k]
  ------------------
 1740|       |      /* this occurs when dx50_bvop_compatibility==0 sequences are
 1741|       |      decoded in vfw. */
 1742|  1.71k|      image_printf(&dec->cur, dec->edged_width, dec->height, 16, 16,
 1743|  1.71k|            "broken b-frame, tpp=%i tbp=%i", dec->time_pp, dec->time_bp);
 1744|  1.71k|      if (stats) stats->type = XVID_TYPE_NOTHING;
  ------------------
  |  |  166|  1.71k|#define XVID_TYPE_NOTHING  0 /* decoder only (encoder stats): nothing was decoded/encoded */
  ------------------
  |  Branch (1744:11): [True: 1.71k, False: 0]
  ------------------
 1745|  2.24k|    } else {
 1746|  2.24k|      decoder_bframe(dec, &bs, quant, fcode_forward, fcode_backward);
 1747|  2.24k|      decoder_output(dec, &dec->cur, dec->mbs, frame, stats, coding_type, quant);
 1748|  2.24k|    }
 1749|       |
 1750|  6.33k|    output = 1;
 1751|  6.33k|    dec->frames++;
 1752|  6.33k|  }
 1753|       |
 1754|       |#if 0 /* Avoids to read to much data because of 32bit reads in our BS functions */
 1755|       |   BitstreamByteAlign(&bs);
 1756|       |#endif
 1757|       |
 1758|       |  /* low_delay_default mode: repeat in packed_mode */
 1759|  17.1k|  if (dec->low_delay_default && dec->packed_mode && output == 0 && success == 0) {
  ------------------
  |  Branch (1759:7): [True: 0, False: 17.1k]
  |  Branch (1759:33): [True: 0, False: 0]
  |  Branch (1759:53): [True: 0, False: 0]
  |  Branch (1759:68): [True: 0, False: 0]
  ------------------
 1760|      0|    success = 1;
 1761|      0|    goto repeat;
 1762|      0|  }
 1763|       |
 1764|  17.1k|done :
 1765|       |
 1766|       |  /* if we reach here without outputing anything _and_
 1767|       |     the calling application has specified low_delay_default,
 1768|       |     we *must* output something.
 1769|       |     this always occurs on the first call to decode() call
 1770|       |     when bframes are present in the bitstream. it may also
 1771|       |     occur if no vops  were seen in the bitstream
 1772|       |
 1773|       |     if packed_mode is enabled, then we output the recently
 1774|       |     decoded frame (the very first ivop). otherwise we have
 1775|       |     nothing to display, and therefore output a black screen.
 1776|       |  */
 1777|  17.1k|  if (dec->low_delay_default && output == 0) {
  ------------------
  |  Branch (1777:7): [True: 0, False: 17.1k]
  |  Branch (1777:33): [True: 0, False: 0]
  ------------------
 1778|      0|    if (dec->packed_mode && seen_something) {
  ------------------
  |  Branch (1778:9): [True: 0, False: 0]
  |  Branch (1778:29): [True: 0, False: 0]
  ------------------
 1779|      0|      decoder_output(dec, &dec->refn[0], dec->last_mbs, frame, stats, dec->last_coding_type, quant);
 1780|      0|    } else {
 1781|      0|      image_clear(&dec->cur, dec->width, dec->height, dec->edged_width, 0, 128, 128);
 1782|      0|      decoder_output(dec, &dec->cur, NULL, frame, stats, P_VOP, quant);
  ------------------
  |  |  105|      0|#define P_VOP	1
  ------------------
 1783|      0|      if (stats) stats->type = XVID_TYPE_NOTHING;
  ------------------
  |  |  166|      0|#define XVID_TYPE_NOTHING  0 /* decoder only (encoder stats): nothing was decoded/encoded */
  ------------------
  |  Branch (1783:11): [True: 0, False: 0]
  ------------------
 1784|      0|    }
 1785|      0|  }
 1786|       |
 1787|  17.1k|  emms();
 1788|  17.1k|  stop_global_timer();
 1789|       |
 1790|  17.1k|  return (BitstreamPos(&bs)+7)/8; /* number of bytes consumed */
 1791|  17.1k|}
decoder.c:decoder_resize:
   70|  23.2k|{
   71|       |	/* free existing */
   72|  23.2k|	image_destroy(&dec->cur, dec->edged_width, dec->edged_height);
   73|  23.2k|	image_destroy(&dec->refn[0], dec->edged_width, dec->edged_height);
   74|  23.2k|	image_destroy(&dec->refn[1], dec->edged_width, dec->edged_height);
   75|  23.2k|	image_destroy(&dec->tmp, dec->edged_width, dec->edged_height);
   76|  23.2k|	image_destroy(&dec->qtmp, dec->edged_width, dec->edged_height);
   77|       |
   78|  23.2k|	image_destroy(&dec->gmc, dec->edged_width, dec->edged_height);
   79|       |
   80|  23.2k|  image_null(&dec->cur);
   81|  23.2k|  image_null(&dec->refn[0]);
   82|  23.2k|  image_null(&dec->refn[1]);
   83|  23.2k|  image_null(&dec->tmp);
   84|  23.2k|  image_null(&dec->qtmp);
   85|  23.2k|  image_null(&dec->gmc);
   86|       |
   87|       |
   88|  23.2k|  xvid_free(dec->last_mbs);
   89|  23.2k|  xvid_free(dec->mbs);
   90|  23.2k|  xvid_free(dec->qscale);
   91|  23.2k|  dec->last_mbs = NULL;
   92|  23.2k|  dec->mbs = NULL;
   93|  23.2k|  dec->qscale = NULL;
   94|       |
   95|       |	/* realloc */
   96|  23.2k|	dec->mb_width = (dec->width + 15) / 16;
   97|  23.2k|	dec->mb_height = (dec->height + 15) / 16;
   98|       |
   99|  23.2k|	dec->edged_width = 16 * dec->mb_width + 2 * EDGE_SIZE;
  ------------------
  |  |   36|  23.2k|#define EDGE_SIZE  64
  ------------------
  100|  23.2k|	dec->edged_height = 16 * dec->mb_height + 2 * EDGE_SIZE;
  ------------------
  |  |   36|  23.2k|#define EDGE_SIZE  64
  ------------------
  101|       |
  102|  23.2k|	if (   image_create(&dec->cur, dec->edged_width, dec->edged_height) 
  ------------------
  |  Branch (102:9): [True: 0, False: 23.2k]
  ------------------
  103|  23.2k|	    || image_create(&dec->refn[0], dec->edged_width, dec->edged_height)
  ------------------
  |  Branch (103:9): [True: 0, False: 23.2k]
  ------------------
  104|  23.2k|	    || image_create(&dec->refn[1], dec->edged_width, dec->edged_height) 	/* Support B-frame to reference last 2 frame */
  ------------------
  |  Branch (104:9): [True: 0, False: 23.2k]
  ------------------
  105|  23.2k|	    || image_create(&dec->tmp, dec->edged_width, dec->edged_height)
  ------------------
  |  Branch (105:9): [True: 0, False: 23.2k]
  ------------------
  106|  23.2k|	    || image_create(&dec->qtmp, dec->edged_width, dec->edged_height)
  ------------------
  |  Branch (106:9): [True: 0, False: 23.2k]
  ------------------
  107|  23.2k|      || image_create(&dec->gmc, dec->edged_width, dec->edged_height) )
  ------------------
  |  Branch (107:10): [True: 0, False: 23.2k]
  ------------------
  108|      0|    goto memory_error;
  109|       |
  110|  23.2k|	dec->mbs =
  111|  23.2k|		xvid_malloc(sizeof(MACROBLOCK) * dec->mb_width * dec->mb_height,
  112|  23.2k|					CACHE_LINE);
  ------------------
  |  |  131|  23.2k|#    define CACHE_LINE  64
  ------------------
  113|  23.2k|	if (dec->mbs == NULL)
  ------------------
  |  Branch (113:6): [True: 0, False: 23.2k]
  ------------------
  114|      0|	  goto memory_error;
  115|  23.2k|	memset(dec->mbs, 0, sizeof(MACROBLOCK) * dec->mb_width * dec->mb_height);
  116|       |
  117|       |	/* For skip MB flag */
  118|  23.2k|	dec->last_mbs =
  119|  23.2k|		xvid_malloc(sizeof(MACROBLOCK) * dec->mb_width * dec->mb_height,
  120|  23.2k|					CACHE_LINE);
  ------------------
  |  |  131|  23.2k|#    define CACHE_LINE  64
  ------------------
  121|  23.2k|	if (dec->last_mbs == NULL)
  ------------------
  |  Branch (121:6): [True: 0, False: 23.2k]
  ------------------
  122|      0|	  goto memory_error;
  123|  23.2k|	memset(dec->last_mbs, 0, sizeof(MACROBLOCK) * dec->mb_width * dec->mb_height);
  124|       |
  125|       |	/* nothing happens if that fails */
  126|  23.2k|	dec->qscale =
  127|  23.2k|		xvid_malloc(sizeof(int) * dec->mb_width * dec->mb_height, CACHE_LINE);
  ------------------
  |  |  131|  23.2k|#    define CACHE_LINE  64
  ------------------
  128|       |	
  129|  23.2k|	if (dec->qscale)
  ------------------
  |  Branch (129:6): [True: 23.2k, False: 0]
  ------------------
  130|  23.2k|		memset(dec->qscale, 0, sizeof(int) * dec->mb_width * dec->mb_height);
  131|       |
  132|  23.2k|	return 0;
  133|       |
  134|      0|memory_error:
  135|       |        /* Most structures were deallocated / nullifieded, so it should be safe */
  136|       |        /* decoder_destroy(dec) minus the write_timer */
  137|      0|  xvid_free(dec->mbs);
  138|      0|  image_destroy(&dec->cur, dec->edged_width, dec->edged_height);
  139|      0|  image_destroy(&dec->refn[0], dec->edged_width, dec->edged_height);
  140|      0|  image_destroy(&dec->refn[1], dec->edged_width, dec->edged_height);
  141|      0|  image_destroy(&dec->tmp, dec->edged_width, dec->edged_height);
  142|      0|  image_destroy(&dec->qtmp, dec->edged_width, dec->edged_height);
  143|       |
  144|      0|  xvid_free(dec);
  145|      0|  return XVID_ERR_MEMORY;
  ------------------
  |  |   96|      0|#define XVID_ERR_MEMORY		-2		/* memory allocation error */
  ------------------
  146|  23.2k|}
decoder.c:decoder_output:
 1524|  6.05k|{
 1525|  6.05k|  const int brightness = XVID_VERSION_MINOR(frame->version) >= 1 ? frame->brightness : 0;
  ------------------
  |  |   64|  6.05k|#define XVID_VERSION_MINOR(a)    ((char)(((a)>> 8) & 0xff))
  ------------------
  |  Branch (1525:26): [True: 6.05k, False: 0]
  ------------------
 1526|       |
 1527|  6.05k|  if (dec->cartoon_mode)
  ------------------
  |  Branch (1527:7): [True: 12, False: 6.04k]
  ------------------
 1528|     12|    frame->general &= ~XVID_FILMEFFECT;
  ------------------
  |  |  272|     12|#define XVID_FILMEFFECT    (1<<4) /* adds film grain */
  ------------------
 1529|       |
 1530|  6.05k|  if ((frame->general & (XVID_DEBLOCKY|XVID_DEBLOCKUV|XVID_FILMEFFECT) || brightness!=0)
  ------------------
  |  |  270|  6.05k|#define XVID_DEBLOCKY      (1<<2) /* perform luma deblocking */
  ------------------
                if ((frame->general & (XVID_DEBLOCKY|XVID_DEBLOCKUV|XVID_FILMEFFECT) || brightness!=0)
  ------------------
  |  |  271|  6.05k|#define XVID_DEBLOCKUV     (1<<3) /* perform chroma deblocking */
  ------------------
                if ((frame->general & (XVID_DEBLOCKY|XVID_DEBLOCKUV|XVID_FILMEFFECT) || brightness!=0)
  ------------------
  |  |  272|  6.05k|#define XVID_FILMEFFECT    (1<<4) /* adds film grain */
  ------------------
  |  Branch (1530:8): [True: 0, False: 6.05k]
  |  Branch (1530:75): [True: 0, False: 6.05k]
  ------------------
 1531|  6.05k|    && mbs != NULL) /* post process */
  ------------------
  |  Branch (1531:8): [True: 0, False: 0]
  ------------------
 1532|      0|  {
 1533|       |    /* note: image is stored to tmp */
 1534|      0|    image_copy(&dec->tmp, img, dec->edged_width, dec->height);
 1535|      0|    image_postproc(&dec->postproc, &dec->tmp, dec->edged_width,
 1536|      0|             mbs, dec->mb_width, dec->mb_height, dec->mb_width,
 1537|      0|             frame->general, brightness, dec->frames, (coding_type == B_VOP), dec->num_threads);
  ------------------
  |  |  106|      0|#define B_VOP	2
  ------------------
 1538|      0|    img = &dec->tmp;
 1539|      0|  }
 1540|       |
 1541|  6.05k|  if ((frame->output.csp == XVID_CSP_INTERNAL) || 
  ------------------
  |  |  125|  6.05k|#define XVID_CSP_INTERNAL (1<<13) /* decoder only: 4:2:0 planar, returns ptrs to internal buffers */
  ------------------
  |  Branch (1541:7): [True: 0, False: 6.05k]
  ------------------
 1542|  6.05k|      ((frame->output.plane[0] != NULL) && (frame->output.stride[0] >= dec->width))) {
  ------------------
  |  Branch (1542:8): [True: 6.05k, False: 0]
  |  Branch (1542:44): [True: 6.05k, False: 0]
  ------------------
 1543|  6.05k|    image_output(img, dec->width, dec->height,
 1544|  6.05k|           dec->edged_width, (uint8_t**)frame->output.plane, frame->output.stride,
 1545|  6.05k|           frame->output.csp, dec->interlacing);
 1546|  6.05k|  }
 1547|       |
 1548|  6.05k|  if (stats) {
  ------------------
  |  Branch (1548:7): [True: 6.05k, False: 0]
  ------------------
 1549|  6.05k|    stats->type = coding2type(coding_type);
 1550|  6.05k|    stats->data.vop.time_base = (int)dec->time_base;
 1551|  6.05k|    stats->data.vop.time_increment = 0; /* XXX: todo */
 1552|  6.05k|    stats->data.vop.qscale_stride = dec->mb_width;
 1553|  6.05k|    stats->data.vop.qscale = dec->qscale;
 1554|  6.05k|    if (stats->data.vop.qscale != NULL && mbs != NULL) {
  ------------------
  |  Branch (1554:9): [True: 6.05k, False: 0]
  |  Branch (1554:43): [True: 6.05k, False: 0]
  ------------------
 1555|  6.05k|      unsigned int i;
 1556|  43.4M|      for (i = 0; i < dec->mb_width*dec->mb_height; i++)
  ------------------
  |  Branch (1556:19): [True: 43.4M, False: 6.05k]
  ------------------
 1557|  43.4M|        stats->data.vop.qscale[i] = mbs[i].quant;
 1558|  6.05k|    } else
 1559|      0|      stats->data.vop.qscale = NULL;
 1560|  6.05k|  }
 1561|  6.05k|}
decoder.c:decoder_iframe:
  735|  7.00k|{
  736|  7.00k|  uint32_t bound;
  737|  7.00k|  uint32_t x, y;
  738|  7.00k|  const uint32_t mb_width = dec->mb_width;
  739|  7.00k|  const uint32_t mb_height = dec->mb_height;
  740|       |
  741|  7.00k|  bound = 0;
  742|       |
  743|   182k|  for (y = 0; y < mb_height; y++) {
  ------------------
  |  Branch (743:15): [True: 175k, False: 7.00k]
  ------------------
  744|  27.6M|    for (x = 0; x < mb_width; x++) {
  ------------------
  |  Branch (744:17): [True: 27.4M, False: 175k]
  ------------------
  745|  27.4M|      MACROBLOCK *mb;
  746|  27.4M|      uint32_t mcbpc;
  747|  27.4M|      uint32_t cbpc;
  748|  27.4M|      uint32_t acpred_flag;
  749|  27.4M|      uint32_t cbpy;
  750|  27.4M|      uint32_t cbp;
  751|       |
  752|  27.4M|      while (BitstreamShowBits(bs, 9) == 1)
  ------------------
  |  Branch (752:14): [True: 1.49k, False: 27.4M]
  ------------------
  753|  1.49k|        BitstreamSkip(bs, 9);
  754|       |
  755|  27.4M|      if (check_resync_marker(bs, 0))
  ------------------
  |  Branch (755:11): [True: 12.6k, False: 27.4M]
  ------------------
  756|  12.6k|      {
  757|  12.6k|        bound = read_video_packet_header(bs, dec, 0,
  758|  12.6k|              &quant, NULL, NULL, &intra_dc_threshold);
  759|  12.6k|        x = bound % mb_width;
  760|  12.6k|        y = MIN((bound / mb_width), (mb_height-1));
  ------------------
  |  |   34|  12.6k|#define MIN(X, Y) ((X)<(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (34:20): [True: 8.90k, False: 3.71k]
  |  |  ------------------
  ------------------
  761|  12.6k|      }
  762|  27.4M|      mb = &dec->mbs[y * dec->mb_width + x];
  763|       |
  764|  27.4M|      DPRINTF(XVID_DEBUG_MB, "macroblock (%i,%i) %08x\n", x, y, BitstreamShowBits(bs, 32));
  ------------------
  |  |  200|  27.4M|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
  765|       |
  766|  27.4M|      mcbpc = get_mcbpc_intra(bs);
  767|  27.4M|      mb->mode = mcbpc & 7;
  768|  27.4M|      cbpc = (mcbpc >> 4);
  769|       |
  770|  27.4M|      acpred_flag = BitstreamGetBit(bs);
  771|       |
  772|  27.4M|      cbpy = get_cbpy(bs, 1);
  773|  27.4M|      cbp = (cbpy << 2) | cbpc;
  774|       |
  775|  27.4M|      if (mb->mode == MODE_INTRA_Q) {
  ------------------
  |  |   38|  27.4M|#define MODE_INTRA_Q	4
  ------------------
  |  Branch (775:11): [True: 9.80k, False: 27.4M]
  ------------------
  776|  9.80k|        quant += dquant_table[BitstreamGetBits(bs, 2)];
  777|  9.80k|        if (quant > 31) {
  ------------------
  |  Branch (777:13): [True: 2.15k, False: 7.65k]
  ------------------
  778|  2.15k|          quant = 31;
  779|  7.65k|        } else if (quant < 1) {
  ------------------
  |  Branch (779:20): [True: 1.77k, False: 5.87k]
  ------------------
  780|  1.77k|          quant = 1;
  781|  1.77k|        }
  782|  9.80k|      }
  783|  27.4M|      mb->quant = quant;
  784|  27.4M|      mb->mvs[0].x = mb->mvs[0].y =
  785|  27.4M|      mb->mvs[1].x = mb->mvs[1].y =
  786|  27.4M|      mb->mvs[2].x = mb->mvs[2].y =
  787|  27.4M|      mb->mvs[3].x = mb->mvs[3].y =0;
  788|       |
  789|  27.4M|      if (dec->interlacing) {
  ------------------
  |  Branch (789:11): [True: 19.7M, False: 7.69M]
  ------------------
  790|  19.7M|        mb->field_dct = BitstreamGetBit(bs);
  791|  19.7M|        DPRINTF(XVID_DEBUG_MB,"deci: field_dct: %i\n", mb->field_dct);
  ------------------
  |  |  200|  19.7M|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
  792|  19.7M|      }
  793|       |
  794|  27.4M|      decoder_mbintra(dec, mb, x, y, acpred_flag, cbp, bs, quant,
  795|  27.4M|              intra_dc_threshold, bound);
  796|       |
  797|  27.4M|    }
  798|   175k|    if(dec->out_frm)
  ------------------
  |  Branch (798:8): [True: 0, False: 175k]
  ------------------
  799|      0|      output_slice(&dec->cur, dec->edged_width,dec->width,dec->out_frm,0,y,mb_width);
  800|   175k|  }
  801|       |
  802|  7.00k|}
decoder.c:decoder_mbintra:
  258|  45.4M|{
  259|       |
  260|  45.4M|  DECLARE_ALIGNED_MATRIX(block, 6, 64, int16_t, CACHE_LINE);
  ------------------
  |  |  287|  45.4M|	type name##_storage[(sizex)*(sizey)+(alignment)-1]; \
  |  |  288|  45.4M|type * name = (type *) (((ptr_t) name##_storage+(alignment - 1)) & ~((ptr_t)(alignment)-1))
  ------------------
  261|  45.4M|  DECLARE_ALIGNED_MATRIX(data, 6, 64, int16_t, CACHE_LINE);
  ------------------
  |  |  287|  45.4M|	type name##_storage[(sizex)*(sizey)+(alignment)-1]; \
  |  |  288|  45.4M|type * name = (type *) (((ptr_t) name##_storage+(alignment - 1)) & ~((ptr_t)(alignment)-1))
  ------------------
  262|       |
  263|  45.4M|  uint32_t stride = dec->edged_width;
  264|  45.4M|  uint32_t stride2 = stride / 2;
  265|  45.4M|  uint32_t next_block = stride * 8;
  266|  45.4M|  uint32_t i;
  267|  45.4M|  uint32_t iQuant = MAX(1, pMB->quant);
  ------------------
  |  |   35|  45.4M|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (35:20): [True: 1.15M, False: 44.2M]
  |  |  ------------------
  ------------------
  268|  45.4M|  uint8_t *pY_Cur, *pU_Cur, *pV_Cur;
  269|       |
  270|  45.4M|  pY_Cur = dec->cur.y + (y_pos << 4) * stride + (x_pos << 4);
  271|  45.4M|  pU_Cur = dec->cur.u + (y_pos << 3) * stride2 + (x_pos << 3);
  272|  45.4M|  pV_Cur = dec->cur.v + (y_pos << 3) * stride2 + (x_pos << 3);
  273|       |
  274|  45.4M|  memset(block, 0, 6 * 64 * sizeof(int16_t)); /* clear */
  275|       |
  276|   318M|  for (i = 0; i < 6; i++) {
  ------------------
  |  Branch (276:15): [True: 272M, False: 45.4M]
  ------------------
  277|   272M|    uint32_t iDcScaler = get_dc_scaler(iQuant, i < 4);
  278|   272M|    int16_t predictors[8];
  279|   272M|    int start_coeff;
  280|       |
  281|   272M|    start_timer();
  282|   272M|    predict_acdc(dec->mbs, x_pos, y_pos, dec->mb_width, i, &block[i * 64],
  283|   272M|           iQuant, iDcScaler, predictors, bound);
  284|   272M|    if (!acpred_flag) {
  ------------------
  |  Branch (284:9): [True: 272M, False: 368k]
  ------------------
  285|   272M|      pMB->acpred_directions[i] = 0;
  286|   272M|    }
  287|   272M|    stop_prediction_timer();
  288|       |
  289|   272M|    if (quant < intra_dc_threshold) {
  ------------------
  |  Branch (289:9): [True: 133M, False: 139M]
  ------------------
  290|   133M|      int dc_size;
  291|   133M|      int dc_dif;
  292|       |
  293|   133M|      dc_size = i < 4 ? get_dc_size_lum(bs) : get_dc_size_chrom(bs);
  ------------------
  |  Branch (293:17): [True: 89.0M, False: 44.5M]
  ------------------
  294|   133M|      dc_dif = dc_size ? get_dc_dif(bs, dc_size) : 0;
  ------------------
  |  Branch (294:16): [True: 44.7M, False: 88.8M]
  ------------------
  295|       |
  296|   133M|      if (dc_size > 8) {
  ------------------
  |  Branch (296:11): [True: 117k, False: 133M]
  ------------------
  297|   117k|        BitstreamSkip(bs, 1); /* marker */
  298|   117k|      }
  299|       |
  300|   133M|      block[i * 64 + 0] = dc_dif;
  301|   133M|      start_coeff = 1;
  302|       |
  303|   133M|      DPRINTF(XVID_DEBUG_COEFF,"block[0] %i\n", dc_dif);
  ------------------
  |  |  201|   133M|#define XVID_DEBUG_COEFF     (1<< 5)
  ------------------
  304|   139M|    } else {
  305|   139M|      start_coeff = 0;
  306|   139M|    }
  307|       |
  308|   272M|    start_timer();
  309|   272M|    if (cbp & (1 << (5 - i))) /* coded */
  ------------------
  |  Branch (309:9): [True: 272M, False: 299k]
  ------------------
  310|   272M|    {
  311|   272M|      int direction = dec->alternate_vertical_scan ?
  ------------------
  |  Branch (311:23): [True: 91.6M, False: 180M]
  ------------------
  312|   180M|        2 : pMB->acpred_directions[i];
  313|       |
  314|   272M|      get_intra_block(bs, &block[i * 64], direction, start_coeff);
  315|   272M|    }
  316|   272M|    stop_coding_timer();
  317|       |
  318|   272M|    start_timer();
  319|   272M|    add_acdc(pMB, i, &block[i * 64], iDcScaler, predictors, dec->bs_version);
  320|   272M|    stop_prediction_timer();
  321|       |
  322|   272M|    start_timer();
  323|   272M|    if (dec->quant_type == 0) {
  ------------------
  |  Branch (323:9): [True: 158M, False: 114M]
  ------------------
  324|   158M|      dequant_h263_intra(&data[i * 64], &block[i * 64], iQuant, iDcScaler, dec->mpeg_quant_matrices);
  325|   158M|    } else {
  326|   114M|      dequant_mpeg_intra(&data[i * 64], &block[i * 64], iQuant, iDcScaler, dec->mpeg_quant_matrices);
  327|   114M|    }
  328|   272M|    stop_iquant_timer();
  329|       |
  330|   272M|    start_timer();
  331|   272M|    idct((short * const)&data[i * 64]);
  332|   272M|    stop_idct_timer();
  333|       |
  334|   272M|  }
  335|       |
  336|  45.4M|  if (dec->interlacing && pMB->field_dct) {
  ------------------
  |  Branch (336:7): [True: 33.5M, False: 11.8M]
  |  Branch (336:27): [True: 68.3k, False: 33.4M]
  ------------------
  337|  68.3k|    next_block = stride;
  338|  68.3k|    stride *= 2;
  339|  68.3k|  }
  340|       |
  341|  45.4M|  start_timer();
  342|  45.4M|  transfer_16to8copy(pY_Cur, &data[0 * 64], stride);
  343|  45.4M|  transfer_16to8copy(pY_Cur + 8, &data[1 * 64], stride);
  344|  45.4M|  transfer_16to8copy(pY_Cur + next_block, &data[2 * 64], stride);
  345|  45.4M|  transfer_16to8copy(pY_Cur + 8 + next_block, &data[3 * 64], stride);
  346|  45.4M|  transfer_16to8copy(pU_Cur, &data[4 * 64], stride2);
  347|  45.4M|  transfer_16to8copy(pV_Cur, &data[5 * 64], stride2);
  348|  45.4M|  stop_transfer_timer();
  349|  45.4M|}
decoder.c:decoder_pframe:
  948|  3.45k|{
  949|  3.45k|  uint32_t x, y;
  950|  3.45k|  uint32_t bound;
  951|  3.45k|  int cp_mb, st_mb;
  952|  3.45k|  const uint32_t mb_width = dec->mb_width;
  953|  3.45k|  const uint32_t mb_height = dec->mb_height;
  954|       |
  955|  3.45k|  if (!dec->is_edged[0]) {
  ------------------
  |  Branch (955:7): [True: 3.45k, False: 0]
  ------------------
  956|  3.45k|    start_timer();
  957|  3.45k|    image_setedges(&dec->refn[0], dec->edged_width, dec->edged_height,
  958|  3.45k|            dec->width, dec->height, dec->bs_version);
  959|  3.45k|    dec->is_edged[0] = 1;
  960|  3.45k|    stop_edges_timer();
  961|  3.45k|  }
  962|       |
  963|  3.45k|  if (gmc_warp) {
  ------------------
  |  Branch (963:7): [True: 2.47k, False: 988]
  ------------------
  964|       |    /* accuracy: 0==1/2, 1=1/4, 2=1/8, 3=1/16 */
  965|  2.47k|    generate_GMCparameters( dec->sprite_warping_points,
  966|  2.47k|        dec->sprite_warping_accuracy, gmc_warp,
  967|  2.47k|        dec->width, dec->height, &dec->new_gmc_data);
  968|       |
  969|       |    /* image warping is done block-based in decoder_mbgmc(), now */
  970|  2.47k|  }
  971|       |
  972|  3.45k|  bound = 0;
  973|       |
  974|   448k|  for (y = 0; y < mb_height; y++) {
  ------------------
  |  Branch (974:15): [True: 444k, False: 3.45k]
  ------------------
  975|   444k|    cp_mb = st_mb = 0;
  976|  19.1M|    for (x = 0; x < mb_width; x++) {
  ------------------
  |  Branch (976:17): [True: 18.7M, False: 444k]
  ------------------
  977|  18.7M|      MACROBLOCK *mb;
  978|       |
  979|       |      /* skip stuffing */
  980|  18.7M|      while (BitstreamShowBits(bs, 10) == 1)
  ------------------
  |  Branch (980:14): [True: 20.9k, False: 18.7M]
  ------------------
  981|  20.9k|        BitstreamSkip(bs, 10);
  982|       |
  983|  18.7M|      if (check_resync_marker(bs, fcode - 1)) {
  ------------------
  |  Branch (983:11): [True: 11.7k, False: 18.7M]
  ------------------
  984|  11.7k|        bound = read_video_packet_header(bs, dec, fcode - 1,
  985|  11.7k|          &quant, &fcode, NULL, &intra_dc_threshold);
  986|  11.7k|        x = bound % mb_width;
  987|  11.7k|        y = MIN((bound / mb_width), (mb_height-1));
  ------------------
  |  |   34|  11.7k|#define MIN(X, Y) ((X)<(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (34:20): [True: 10.7k, False: 1.03k]
  |  |  ------------------
  ------------------
  988|  11.7k|      }
  989|  18.7M|      mb = &dec->mbs[y * dec->mb_width + x];
  990|       |
  991|  18.7M|      DPRINTF(XVID_DEBUG_MB, "macroblock (%i,%i) %08x\n", x, y, BitstreamShowBits(bs, 32));
  ------------------
  |  |  200|  18.7M|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
  992|       |
  993|  18.7M|      if (!(BitstreamGetBit(bs))) { /* block _is_ coded */
  ------------------
  |  Branch (993:11): [True: 18.1M, False: 539k]
  ------------------
  994|  18.1M|        uint32_t mcbpc, cbpc, cbpy, cbp;
  995|  18.1M|        uint32_t intra, acpred_flag = 0;
  996|  18.1M|        int mcsel = 0;    /* mcsel: '0'=local motion, '1'=GMC */
  997|       |
  998|  18.1M|        cp_mb++;
  999|  18.1M|        mcbpc = get_mcbpc_inter(bs);
 1000|  18.1M|        mb->mode = mcbpc & 7;
 1001|  18.1M|        cbpc = (mcbpc >> 4);
 1002|       |
 1003|  18.1M|        DPRINTF(XVID_DEBUG_MB, "mode %i\n", mb->mode);
  ------------------
  |  |  200|  18.1M|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
 1004|  18.1M|        DPRINTF(XVID_DEBUG_MB, "cbpc %i\n", cbpc);
  ------------------
  |  |  200|  18.1M|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
 1005|       |
 1006|  18.1M|        intra = (mb->mode == MODE_INTRA || mb->mode == MODE_INTRA_Q);
  ------------------
  |  |   37|  36.3M|#define	MODE_INTRA		3
  ------------------
                      intra = (mb->mode == MODE_INTRA || mb->mode == MODE_INTRA_Q);
  ------------------
  |  |   38|  18.1M|#define MODE_INTRA_Q	4
  ------------------
  |  Branch (1006:18): [True: 21.3k, False: 18.1M]
  |  Branch (1006:44): [True: 15.7k, False: 18.1M]
  ------------------
 1007|       |
 1008|  18.1M|        if (gmc_warp && (mb->mode == MODE_INTER || mb->mode == MODE_INTER_Q))
  ------------------
  |  |   34|  28.1M|#define MODE_INTER		0
  ------------------
                      if (gmc_warp && (mb->mode == MODE_INTER || mb->mode == MODE_INTER_Q))
  ------------------
  |  |   35|  13.9M|#define MODE_INTER_Q	1
  ------------------
  |  Branch (1008:13): [True: 14.0M, False: 4.14M]
  |  Branch (1008:26): [True: 109k, False: 13.9M]
  |  Branch (1008:52): [True: 23.4k, False: 13.9M]
  ------------------
 1009|   132k|          mcsel = BitstreamGetBit(bs);
 1010|  18.0M|        else if (intra)
  ------------------
  |  Branch (1010:18): [True: 37.0k, False: 18.0M]
  ------------------
 1011|  37.0k|          acpred_flag = BitstreamGetBit(bs);
 1012|       |
 1013|  18.1M|        cbpy = get_cbpy(bs, intra);
 1014|  18.1M|        DPRINTF(XVID_DEBUG_MB, "cbpy %i mcsel %i \n", cbpy,mcsel);
  ------------------
  |  |  200|  18.1M|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
 1015|       |
 1016|  18.1M|        cbp = (cbpy << 2) | cbpc;
 1017|       |
 1018|  18.1M|        if (mb->mode == MODE_INTER_Q || mb->mode == MODE_INTRA_Q) {
  ------------------
  |  |   35|  36.3M|#define MODE_INTER_Q	1
  ------------------
                      if (mb->mode == MODE_INTER_Q || mb->mode == MODE_INTRA_Q) {
  ------------------
  |  |   38|  18.1M|#define MODE_INTRA_Q	4
  ------------------
  |  Branch (1018:13): [True: 32.8k, False: 18.1M]
  |  Branch (1018:41): [True: 15.7k, False: 18.1M]
  ------------------
 1019|  48.6k|          int dquant = dquant_table[BitstreamGetBits(bs, 2)];
 1020|  48.6k|          DPRINTF(XVID_DEBUG_MB, "dquant %i\n", dquant);
  ------------------
  |  |  200|  48.6k|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
 1021|  48.6k|          quant += dquant;
 1022|  48.6k|          if (quant > 31) {
  ------------------
  |  Branch (1022:15): [True: 4.47k, False: 44.1k]
  ------------------
 1023|  4.47k|            quant = 31;
 1024|  44.1k|          } else if (quant < 1) {
  ------------------
  |  Branch (1024:22): [True: 18.2k, False: 25.8k]
  ------------------
 1025|  18.2k|            quant = 1;
 1026|  18.2k|          }
 1027|  48.6k|          DPRINTF(XVID_DEBUG_MB, "quant %i\n", quant);
  ------------------
  |  |  200|  48.6k|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
 1028|  48.6k|        }
 1029|  18.1M|        mb->quant = quant;
 1030|       |
 1031|  18.1M|        mb->field_pred=0;
 1032|  18.1M|        if (dec->interlacing) {
  ------------------
  |  Branch (1032:13): [True: 13.9M, False: 4.28M]
  ------------------
 1033|  13.9M|          if (cbp || intra) {
  ------------------
  |  Branch (1033:15): [True: 13.8M, False: 47.6k]
  |  Branch (1033:22): [True: 1.05k, False: 46.5k]
  ------------------
 1034|  13.8M|            mb->field_dct = BitstreamGetBit(bs);
 1035|  13.8M|            DPRINTF(XVID_DEBUG_MB,"decp: field_dct: %i\n", mb->field_dct);
  ------------------
  |  |  200|  13.8M|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
 1036|  13.8M|          }
 1037|       |
 1038|  13.9M|          if ((mb->mode == MODE_INTER || mb->mode == MODE_INTER_Q) && !mcsel) {
  ------------------
  |  |   34|  27.8M|#define MODE_INTER		0
  ------------------
                        if ((mb->mode == MODE_INTER || mb->mode == MODE_INTER_Q) && !mcsel) {
  ------------------
  |  |   35|  13.8M|#define MODE_INTER_Q	1
  ------------------
  |  Branch (1038:16): [True: 94.4k, False: 13.8M]
  |  Branch (1038:42): [True: 20.8k, False: 13.7M]
  |  Branch (1038:71): [True: 75.1k, False: 40.1k]
  ------------------
 1039|  75.1k|            mb->field_pred = BitstreamGetBit(bs);
 1040|  75.1k|            DPRINTF(XVID_DEBUG_MB, "decp: field_pred: %i\n", mb->field_pred);
  ------------------
  |  |  200|  75.1k|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
 1041|       |
 1042|  75.1k|            if (mb->field_pred) {
  ------------------
  |  Branch (1042:17): [True: 28.0k, False: 47.1k]
  ------------------
 1043|  28.0k|              mb->field_for_top = BitstreamGetBit(bs);
 1044|  28.0k|              DPRINTF(XVID_DEBUG_MB,"decp: field_for_top: %i\n", mb->field_for_top);
  ------------------
  |  |  200|  28.0k|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
 1045|  28.0k|              mb->field_for_bot = BitstreamGetBit(bs);
 1046|  28.0k|              DPRINTF(XVID_DEBUG_MB,"decp: field_for_bot: %i\n", mb->field_for_bot);
  ------------------
  |  |  200|  28.0k|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
 1047|  28.0k|            }
 1048|  75.1k|          }
 1049|  13.9M|        }
 1050|       |
 1051|  18.1M|        if (mcsel) {
  ------------------
  |  Branch (1051:13): [True: 52.4k, False: 18.1M]
  ------------------
 1052|  52.4k|          decoder_mbgmc(dec, mb, x, y, fcode, cbp, bs, rounding);
 1053|  52.4k|          continue;
 1054|       |
 1055|  18.1M|        } else if (mb->mode == MODE_INTER || mb->mode == MODE_INTER_Q) {
  ------------------
  |  |   34|  36.2M|#define MODE_INTER		0
  ------------------
                      } else if (mb->mode == MODE_INTER || mb->mode == MODE_INTER_Q) {
  ------------------
  |  |   35|  18.0M|#define MODE_INTER_Q	1
  ------------------
  |  Branch (1055:20): [True: 123k, False: 18.0M]
  |  Branch (1055:46): [True: 24.2k, False: 17.9M]
  ------------------
 1056|       |
 1057|   147k|          if(dec->interlacing) {
  ------------------
  |  Branch (1057:14): [True: 75.1k, False: 72.7k]
  ------------------
 1058|       |            /* Get motion vectors interlaced, field_pred is handled there */
 1059|  75.1k|            get_motion_vector_interlaced(dec, bs, x, y, 0, mb, fcode, bound);
 1060|  75.1k|          } else {
 1061|  72.7k|            get_motion_vector(dec, bs, x, y, 0, &mb->mvs[0], fcode, bound);
 1062|  72.7k|            mb->mvs[1] = mb->mvs[2] = mb->mvs[3] = mb->mvs[0];
 1063|  72.7k|          }
 1064|  17.9M|        } else if (mb->mode == MODE_INTER4V ) {
  ------------------
  |  |   36|  17.9M|#define MODE_INTER4V	2
  ------------------
  |  Branch (1064:20): [True: 36.9k, False: 17.9M]
  ------------------
 1065|       |          /* interlaced missing here */
 1066|  36.9k|          get_motion_vector(dec, bs, x, y, 0, &mb->mvs[0], fcode, bound);
 1067|  36.9k|          get_motion_vector(dec, bs, x, y, 1, &mb->mvs[1], fcode, bound);
 1068|  36.9k|          get_motion_vector(dec, bs, x, y, 2, &mb->mvs[2], fcode, bound);
 1069|  36.9k|          get_motion_vector(dec, bs, x, y, 3, &mb->mvs[3], fcode, bound);
 1070|  17.9M|        } else { /* MODE_INTRA, MODE_INTRA_Q */
 1071|  17.9M|          mb->mvs[0].x = mb->mvs[1].x = mb->mvs[2].x = mb->mvs[3].x = 0;
 1072|  17.9M|          mb->mvs[0].y = mb->mvs[1].y = mb->mvs[2].y = mb->mvs[3].y = 0;
 1073|  17.9M|          decoder_mbintra(dec, mb, x, y, acpred_flag, cbp, bs, quant,
 1074|  17.9M|                  intra_dc_threshold, bound);
 1075|  17.9M|          continue;
 1076|  17.9M|        }
 1077|       |
 1078|       |        /* See how to decode */
 1079|   184k|        if(!mb->field_pred)
  ------------------
  |  Branch (1079:12): [True: 156k, False: 28.0k]
  ------------------
 1080|   156k|         decoder_mbinter(dec, mb, x, y, cbp, bs, rounding, 0, 0);
 1081|  28.0k|        else 
 1082|  28.0k|         decoder_mbinter_field(dec, mb, x, y, cbp, bs, rounding, 0, 0);
 1083|       |
 1084|   539k|      } else if (gmc_warp) {  /* a not coded S(GMC)-VOP macroblock */
  ------------------
  |  Branch (1084:18): [True: 329k, False: 209k]
  ------------------
 1085|   329k|        mb->mode = MODE_NOT_CODED_GMC;
  ------------------
  |  |   40|   329k|#define MODE_NOT_CODED_GMC	17
  ------------------
 1086|   329k|        mb->quant = quant;
 1087|   329k|        decoder_mbgmc(dec, mb, x, y, fcode, 0x00, bs, rounding);
 1088|       |
 1089|   329k|        if(dec->out_frm && cp_mb > 0) {
  ------------------
  |  Branch (1089:12): [True: 0, False: 329k]
  |  Branch (1089:28): [True: 0, False: 0]
  ------------------
 1090|      0|          output_slice(&dec->cur, dec->edged_width,dec->width,dec->out_frm,st_mb,y,cp_mb);
 1091|      0|          cp_mb = 0;
 1092|      0|        }
 1093|   329k|        st_mb = x+1;
 1094|   329k|      } else { /* not coded P_VOP macroblock */
 1095|   209k|        mb->mode = MODE_NOT_CODED;
  ------------------
  |  |   39|   209k|#define MODE_NOT_CODED	16
  ------------------
 1096|   209k|        mb->quant = quant;
 1097|       |
 1098|   209k|        mb->mvs[0].x = mb->mvs[1].x = mb->mvs[2].x = mb->mvs[3].x = 0;
 1099|   209k|        mb->mvs[0].y = mb->mvs[1].y = mb->mvs[2].y = mb->mvs[3].y = 0;
 1100|   209k|        mb->field_pred=0; /* (!) */
 1101|       |
 1102|   209k|        decoder_mbinter(dec, mb, x, y, 0, bs, 
 1103|   209k|                                rounding, 0, 0);
 1104|       |
 1105|   209k|        if(dec->out_frm && cp_mb > 0) {
  ------------------
  |  Branch (1105:12): [True: 0, False: 209k]
  |  Branch (1105:28): [True: 0, False: 0]
  ------------------
 1106|      0|          output_slice(&dec->cur, dec->edged_width,dec->width,dec->out_frm,st_mb,y,cp_mb);
 1107|      0|          cp_mb = 0;
 1108|      0|        }
 1109|   209k|        st_mb = x+1;
 1110|   209k|      }
 1111|  18.7M|    }
 1112|       |
 1113|   444k|    if(dec->out_frm && cp_mb > 0)
  ------------------
  |  Branch (1113:8): [True: 0, False: 444k]
  |  Branch (1113:24): [True: 0, False: 0]
  ------------------
 1114|      0|      output_slice(&dec->cur, dec->edged_width,dec->width,dec->out_frm,st_mb,y,cp_mb);
 1115|   444k|  }
 1116|  3.45k|}
decoder.c:decoder_mbgmc:
  690|   381k|{
  691|   381k|  const uint32_t stride = dec->edged_width;
  692|   381k|  const uint32_t stride2 = stride / 2;
  693|       |
  694|   381k|  uint8_t *const pY_Cur=dec->cur.y + (y_pos << 4) * stride + (x_pos << 4);
  695|   381k|  uint8_t *const pU_Cur=dec->cur.u + (y_pos << 3) * stride2 + (x_pos << 3);
  696|   381k|  uint8_t *const pV_Cur=dec->cur.v + (y_pos << 3) * stride2 + (x_pos << 3);
  697|       |
  698|   381k|  NEW_GMC_DATA * gmc_data = &dec->new_gmc_data;
  699|       |
  700|   381k|  pMB->mvs[0] = pMB->mvs[1] = pMB->mvs[2] = pMB->mvs[3] = pMB->amv;
  701|       |
  702|   381k|  start_timer();
  703|       |
  704|       |/* this is where the calculations are done */
  705|       |
  706|   381k|  gmc_data->predict_16x16(gmc_data,
  707|   381k|      dec->cur.y + y_pos*16*stride + x_pos*16, dec->refn[0].y,
  708|   381k|      stride, stride, x_pos, y_pos, rounding);
  709|       |
  710|   381k|  gmc_data->predict_8x8(gmc_data,
  711|   381k|      dec->cur.u + y_pos*8*stride2 + x_pos*8, dec->refn[0].u,
  712|   381k|      dec->cur.v + y_pos*8*stride2 + x_pos*8, dec->refn[0].v,
  713|   381k|      stride2, stride2, x_pos, y_pos, rounding);
  714|       |
  715|   381k|  gmc_data->get_average_mv(gmc_data, &pMB->amv, x_pos, y_pos, dec->quarterpel);
  716|       |
  717|   381k|  pMB->amv.x = gmc_sanitize(pMB->amv.x, dec->quarterpel, fcode);
  718|   381k|  pMB->amv.y = gmc_sanitize(pMB->amv.y, dec->quarterpel, fcode);
  719|       |
  720|   381k|  pMB->mvs[0] = pMB->mvs[1] = pMB->mvs[2] = pMB->mvs[3] = pMB->amv;
  721|       |
  722|   381k|  stop_transfer_timer();
  723|       |
  724|   381k|  if (cbp)
  ------------------
  |  Branch (724:7): [True: 37.5k, False: 344k]
  ------------------
  725|  37.5k|    decoder_mb_decode(dec, cbp, bs, pY_Cur, pU_Cur, pV_Cur, pMB);
  726|       |
  727|   381k|}
decoder.c:decoder_mb_decode:
  359|   262k|{
  360|   262k|  DECLARE_ALIGNED_MATRIX(data, 1, 64, int16_t, CACHE_LINE);
  ------------------
  |  |  287|   262k|	type name##_storage[(sizex)*(sizey)+(alignment)-1]; \
  |  |  288|   262k|type * name = (type *) (((ptr_t) name##_storage+(alignment - 1)) & ~((ptr_t)(alignment)-1))
  ------------------
  361|       |
  362|   262k|  int stride = dec->edged_width;
  363|   262k|  int i;
  364|   262k|  const uint32_t iQuant = MAX(1, pMB->quant);
  ------------------
  |  |   35|   262k|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (35:20): [True: 1.38k, False: 261k]
  |  |  ------------------
  ------------------
  365|   262k|  const int direction = dec->alternate_vertical_scan ? 2 : 0;
  ------------------
  |  Branch (365:25): [True: 26.6k, False: 235k]
  ------------------
  366|   262k|  typedef void (*get_inter_block_function_t)(
  367|   262k|      Bitstream * bs,
  368|   262k|      int16_t * block,
  369|   262k|      int direction,
  370|   262k|      const int quant,
  371|   262k|      const uint16_t *matrix);
  372|   262k|  typedef void (*add_residual_function_t)(
  373|   262k|      uint8_t *predicted_block,
  374|   262k|      const int16_t *residual,
  375|   262k|      int stride);
  376|       |
  377|   262k|  const get_inter_block_function_t get_inter_block = (dec->quant_type == 0)
  ------------------
  |  Branch (377:54): [True: 132k, False: 130k]
  ------------------
  378|   262k|    ? (get_inter_block_function_t)get_inter_block_h263
  379|   262k|    : (get_inter_block_function_t)get_inter_block_mpeg;
  380|       |
  381|   262k|  uint8_t *dst[6];
  382|   262k|  int strides[6];
  383|       |
  384|       |
  385|   262k|  if (dec->interlacing && pMB->field_dct) {
  ------------------
  |  Branch (385:7): [True: 159k, False: 103k]
  |  Branch (385:27): [True: 60.9k, False: 98.3k]
  ------------------
  386|  60.9k|    dst[0] = pY_Cur;
  387|  60.9k|    dst[1] = pY_Cur + 8;
  388|  60.9k|    dst[2] = pY_Cur + stride;
  389|  60.9k|    dst[3] = dst[2] + 8;
  390|  60.9k|    dst[4] = pU_Cur;
  391|  60.9k|    dst[5] = pV_Cur;
  392|  60.9k|    strides[0] = strides[1] = strides[2] = strides[3] = stride*2;
  393|  60.9k|    strides[4] = stride/2;
  394|  60.9k|    strides[5] = stride/2;
  395|   201k|  } else {
  396|   201k|    dst[0] = pY_Cur;
  397|   201k|    dst[1] = pY_Cur + 8;
  398|   201k|    dst[2] = pY_Cur + 8*stride;
  399|   201k|    dst[3] = dst[2] + 8;
  400|   201k|    dst[4] = pU_Cur;
  401|   201k|    dst[5] = pV_Cur;
  402|   201k|    strides[0] = strides[1] = strides[2] = strides[3] = stride;
  403|   201k|    strides[4] = stride/2;
  404|   201k|    strides[5] = stride/2;
  405|   201k|  }
  406|       |
  407|  1.83M|  for (i = 0; i < 6; i++) {
  ------------------
  |  Branch (407:15): [True: 1.57M, False: 262k]
  ------------------
  408|       |    /* Process only coded blocks */
  409|  1.57M|    if (cbp & (1 << (5 - i))) {
  ------------------
  |  Branch (409:9): [True: 526k, False: 1.04M]
  ------------------
  410|       |
  411|       |      /* Clear the block */
  412|   526k|      memset(&data[0], 0, 64*sizeof(int16_t));
  413|       |
  414|       |      /* Decode coeffs and dequantize on the fly */
  415|   526k|      start_timer();
  416|   526k|      get_inter_block(bs, &data[0], direction, iQuant, get_inter_matrix(dec->mpeg_quant_matrices));
  417|   526k|      stop_coding_timer();
  418|       |
  419|       |      /* iDCT */
  420|   526k|      start_timer();
  421|   526k|      idct((short * const)&data[0]);
  422|   526k|      stop_idct_timer();
  423|       |
  424|       |      /* Add this residual to the predicted block */
  425|   526k|      start_timer();
  426|   526k|      transfer_16to8add(dst[i], &data[0], strides[i]);
  427|   526k|      stop_transfer_timer();
  428|   526k|    }
  429|  1.57M|  }
  430|   262k|}
decoder.c:get_motion_vector_interlaced:
  858|  75.1k|{
  859|  75.1k|  const int scale_fac = 1 << (fcode - 1);
  860|  75.1k|  const int high = (32 * scale_fac) - 1;
  861|  75.1k|  const int low = ((-32) * scale_fac);
  862|  75.1k|  const int range = (64 * scale_fac);
  863|       |  
  864|       |  /* Get interlaced prediction */
  865|  75.1k|  const VECTOR pmv=get_pmv2_interlaced(dec->mbs,dec->mb_width,bound,x,y,k);
  866|  75.1k|  VECTOR mv,mvf1,mvf2;
  867|       |
  868|  75.1k|  if(!pMB->field_pred)
  ------------------
  |  Branch (868:6): [True: 47.1k, False: 28.0k]
  ------------------
  869|  47.1k|  {
  870|  47.1k|    mv.x = get_mv(bs,fcode);
  871|  47.1k|    mv.y = get_mv(bs,fcode);
  872|       |    
  873|  47.1k|    mv.x += pmv.x;
  874|  47.1k|    mv.y += pmv.y;
  875|       |
  876|  47.1k|    if(mv.x<low) {
  ------------------
  |  Branch (876:8): [True: 9.08k, False: 38.0k]
  ------------------
  877|  9.08k|      mv.x += range;
  878|  38.0k|    } else if (mv.x>high) {
  ------------------
  |  Branch (878:16): [True: 25.1k, False: 12.9k]
  ------------------
  879|  25.1k|      mv.x-=range;
  880|  25.1k|    }
  881|       |
  882|  47.1k|    if (mv.y < low) {
  ------------------
  |  Branch (882:9): [True: 8.89k, False: 38.2k]
  ------------------
  883|  8.89k|      mv.y += range;
  884|  38.2k|    } else if (mv.y > high) {
  ------------------
  |  Branch (884:16): [True: 25.3k, False: 12.8k]
  ------------------
  885|  25.3k|      mv.y -= range;
  886|  25.3k|    }
  887|       |    
  888|  47.1k|    pMB->mvs[0]=pMB->mvs[1]=pMB->mvs[2]=pMB->mvs[3]=mv;
  889|  47.1k|  }
  890|  28.0k|  else
  891|  28.0k|  {
  892|  28.0k|    mvf1.x = get_mv(bs, fcode);
  893|  28.0k|    mvf1.y = get_mv(bs, fcode);
  894|       |
  895|  28.0k|    mvf1.x += pmv.x;
  896|  28.0k|    mvf1.y = 2*(mvf1.y+pmv.y/2); /* It's multiple of 2 */
  897|       |
  898|  28.0k|    if (mvf1.x < low) {
  ------------------
  |  Branch (898:9): [True: 4.62k, False: 23.3k]
  ------------------
  899|  4.62k|      mvf1.x += range;
  900|  23.3k|    } else if (mvf1.x > high) {
  ------------------
  |  Branch (900:16): [True: 15.1k, False: 8.22k]
  ------------------
  901|  15.1k|      mvf1.x -= range;
  902|  15.1k|    }
  903|       |
  904|  28.0k|    if (mvf1.y < low) {
  ------------------
  |  Branch (904:9): [True: 7.14k, False: 20.8k]
  ------------------
  905|  7.14k|      mvf1.y += range;
  906|  20.8k|    } else if (mvf1.y > high) {
  ------------------
  |  Branch (906:16): [True: 12.6k, False: 8.23k]
  ------------------
  907|  12.6k|      mvf1.y -= range;
  908|  12.6k|    }
  909|       |
  910|  28.0k|    mvf2.x = get_mv(bs, fcode);
  911|  28.0k|    mvf2.y = get_mv(bs, fcode);
  912|       |
  913|  28.0k|    mvf2.x += pmv.x;
  914|  28.0k|    mvf2.y = 2*(mvf2.y+pmv.y/2); /* It's multiple of 2 */
  915|       |
  916|  28.0k|    if (mvf2.x < low) {
  ------------------
  |  Branch (916:9): [True: 3.97k, False: 24.0k]
  ------------------
  917|  3.97k|      mvf2.x += range;
  918|  24.0k|    } else if (mvf2.x > high) {
  ------------------
  |  Branch (918:16): [True: 15.7k, False: 8.29k]
  ------------------
  919|  15.7k|      mvf2.x -= range;
  920|  15.7k|    }
  921|       |
  922|  28.0k|    if (mvf2.y < low) {
  ------------------
  |  Branch (922:9): [True: 4.13k, False: 23.8k]
  ------------------
  923|  4.13k|      mvf2.y += range;
  924|  23.8k|    } else if (mvf2.y > high) {
  ------------------
  |  Branch (924:16): [True: 15.6k, False: 8.21k]
  ------------------
  925|  15.6k|      mvf2.y -= range;
  926|  15.6k|    }
  927|       |
  928|  28.0k|    pMB->mvs[0]=mvf1;
  929|  28.0k|    pMB->mvs[1]=mvf2;
  930|  28.0k|    pMB->mvs[2].x=pMB->mvs[3].x=0;
  931|  28.0k|    pMB->mvs[2].y=pMB->mvs[3].y=0;
  932|       |  
  933|       |    /* Calculate average for as it is field predicted */
  934|  28.0k|    pMB->mvs_avg.x=DIV2ROUND(pMB->mvs[0].x+pMB->mvs[1].x);
  ------------------
  |  |   64|  28.0k|#define DIV2ROUND(n)  (((n)>>1)|((n)&1))
  ------------------
  935|  28.0k|    pMB->mvs_avg.y=DIV2ROUND(pMB->mvs[0].y+pMB->mvs[1].y);
  ------------------
  |  |   64|  28.0k|#define DIV2ROUND(n)  (((n)>>1)|((n)&1))
  ------------------
  936|  28.0k|  }
  937|  75.1k|}
decoder.c:get_motion_vector:
  814|   220k|{
  815|       |
  816|   220k|  const int scale_fac = 1 << (fcode - 1);
  817|   220k|  const int high = (32 * scale_fac) - 1;
  818|   220k|  const int low = ((-32) * scale_fac);
  819|   220k|  const int range = (64 * scale_fac);
  820|       |
  821|   220k|  const VECTOR pmv = get_pmv2(dec->mbs, dec->mb_width, bound, x, y, k);
  822|   220k|  VECTOR mv;
  823|       |
  824|   220k|  mv.x = get_mv(bs, fcode);
  825|   220k|  mv.y = get_mv(bs, fcode);
  826|       |
  827|   220k|  DPRINTF(XVID_DEBUG_MV,"mv_diff (%i,%i) pred (%i,%i) result (%i,%i)\n", mv.x, mv.y, pmv.x, pmv.y, mv.x+pmv.x, mv.y+pmv.y);
  ------------------
  |  |  202|   220k|#define XVID_DEBUG_MV        (1<< 6)
  ------------------
  828|       |
  829|   220k|  mv.x += pmv.x;
  830|   220k|  mv.y += pmv.y;
  831|       |
  832|   220k|  if (mv.x < low) {
  ------------------
  |  Branch (832:7): [True: 36.2k, False: 184k]
  ------------------
  833|  36.2k|    mv.x += range;
  834|   184k|  } else if (mv.x > high) {
  ------------------
  |  Branch (834:14): [True: 130k, False: 53.6k]
  ------------------
  835|   130k|    mv.x -= range;
  836|   130k|  }
  837|       |
  838|   220k|  if (mv.y < low) {
  ------------------
  |  Branch (838:7): [True: 54.4k, False: 166k]
  ------------------
  839|  54.4k|    mv.y += range;
  840|   166k|  } else if (mv.y > high) {
  ------------------
  |  Branch (840:14): [True: 112k, False: 53.3k]
  ------------------
  841|   112k|    mv.y -= range;
  842|   112k|  }
  843|       |
  844|   220k|  ret_mv->x = mv.x;
  845|   220k|  ret_mv->y = mv.y;
  846|   220k|}
decoder.c:decoder_mbinter:
  483|   435k|{
  484|   435k|  uint32_t stride = dec->edged_width;
  485|   435k|  uint32_t stride2 = stride / 2;
  486|   435k|  uint32_t i;
  487|       |
  488|   435k|  uint8_t *pY_Cur, *pU_Cur, *pV_Cur;
  489|       |
  490|   435k|  int uv_dx, uv_dy;
  491|   435k|  VECTOR mv[4]; /* local copy of mvs */
  492|       |
  493|   435k|  pY_Cur = dec->cur.y + (y_pos << 4) * stride + (x_pos << 4);
  494|   435k|  pU_Cur = dec->cur.u + (y_pos << 3) * stride2 + (x_pos << 3);
  495|   435k|  pV_Cur = dec->cur.v + (y_pos << 3) * stride2 + (x_pos << 3);
  496|  2.17M|  for (i = 0; i < 4; i++)
  ------------------
  |  Branch (496:15): [True: 1.74M, False: 435k]
  ------------------
  497|  1.74M|    mv[i] = pMB->mvs[i];
  498|       |
  499|   435k|  validate_vector(mv, x_pos, y_pos, dec);
  500|       |
  501|   435k|  start_timer();
  502|       |
  503|   435k|  if ((pMB->mode != MODE_INTER4V) || (bvop)) { /* INTER, INTER_Q, NOT_CODED, FORWARD, BACKWARD */
  ------------------
  |  |   36|   435k|#define MODE_INTER4V	2
  ------------------
  |  Branch (503:7): [True: 354k, False: 81.1k]
  |  Branch (503:38): [True: 44.1k, False: 36.9k]
  ------------------
  504|       |
  505|   398k|    uv_dx = mv[0].x;
  506|   398k|    uv_dy = mv[0].y;
  507|   398k|    if (dec->quarterpel) {
  ------------------
  |  Branch (507:9): [True: 96.2k, False: 302k]
  ------------------
  508|  96.2k|			if (dec->bs_version <= BS_VERSION_BUGGY_CHROMA_ROUNDING) {
  ------------------
  |  |  470|  96.2k|#define BS_VERSION_BUGGY_CHROMA_ROUNDING 1
  ------------------
  |  Branch (508:8): [True: 1.66k, False: 94.5k]
  ------------------
  509|  1.66k|  				uv_dx = (uv_dx>>1) | (uv_dx&1);
  510|  1.66k|				uv_dy = (uv_dy>>1) | (uv_dy&1);
  511|  1.66k|			}
  512|  94.5k|			else {
  513|  94.5k|        uv_dx /= 2;
  514|  94.5k|        uv_dy /= 2;
  515|  94.5k|      }
  516|  96.2k|    }
  517|   398k|    uv_dx = (uv_dx >> 1) + roundtab_79[uv_dx & 0x3];
  518|   398k|    uv_dy = (uv_dy >> 1) + roundtab_79[uv_dy & 0x3];
  519|       |
  520|   398k|    if (dec->quarterpel)
  ------------------
  |  Branch (520:9): [True: 96.2k, False: 302k]
  ------------------
  521|  96.2k|      interpolate16x16_quarterpel(dec->cur.y, dec->refn[ref].y, dec->qtmp.y, dec->qtmp.y + 64,
  522|  96.2k|                  dec->qtmp.y + 128, 16*x_pos, 16*y_pos,
  523|  96.2k|                      mv[0].x, mv[0].y, stride, rounding);
  524|   302k|    else
  525|   302k|      interpolate16x16_switch(dec->cur.y, dec->refn[ref].y, 16*x_pos, 16*y_pos,
  526|   302k|                  mv[0].x, mv[0].y, stride, rounding);
  527|       |
  528|   398k|  } else {  /* MODE_INTER4V */
  529|       |
  530|  36.9k|    if(dec->quarterpel) {
  ------------------
  |  Branch (530:8): [True: 13.7k, False: 23.2k]
  ------------------
  531|  13.7k|			if (dec->bs_version <= BS_VERSION_BUGGY_CHROMA_ROUNDING) {
  ------------------
  |  |  470|  13.7k|#define BS_VERSION_BUGGY_CHROMA_ROUNDING 1
  ------------------
  |  Branch (531:8): [True: 1.59k, False: 12.1k]
  ------------------
  532|  1.59k|				int z;
  533|  1.59k|				uv_dx = 0; uv_dy = 0;
  534|  7.98k|				for (z = 0; z < 4; z++) {
  ------------------
  |  Branch (534:17): [True: 6.38k, False: 1.59k]
  ------------------
  535|  6.38k|				  uv_dx += ((mv[z].x>>1) | (mv[z].x&1));
  536|  6.38k|				  uv_dy += ((mv[z].y>>1) | (mv[z].y&1));
  537|  6.38k|				}
  538|  1.59k|			}
  539|  12.1k|			else {
  540|  12.1k|        uv_dx = (mv[0].x / 2) + (mv[1].x / 2) + (mv[2].x / 2) + (mv[3].x / 2);
  541|  12.1k|        uv_dy = (mv[0].y / 2) + (mv[1].y / 2) + (mv[2].y / 2) + (mv[3].y / 2);
  542|  12.1k|      }
  543|  23.2k|    } else {
  544|  23.2k|      uv_dx = mv[0].x + mv[1].x + mv[2].x + mv[3].x;
  545|  23.2k|      uv_dy = mv[0].y + mv[1].y + mv[2].y + mv[3].y;
  546|  23.2k|    }
  547|       |
  548|  36.9k|    uv_dx = (uv_dx >> 3) + roundtab_76[uv_dx & 0xf];
  549|  36.9k|    uv_dy = (uv_dy >> 3) + roundtab_76[uv_dy & 0xf];
  550|       |
  551|  36.9k|    if (dec->quarterpel) {
  ------------------
  |  Branch (551:9): [True: 13.7k, False: 23.2k]
  ------------------
  552|  13.7k|      interpolate8x8_quarterpel(dec->cur.y, dec->refn[0].y , dec->qtmp.y, dec->qtmp.y + 64,
  553|  13.7k|                  dec->qtmp.y + 128, 16*x_pos, 16*y_pos,
  554|  13.7k|                  mv[0].x, mv[0].y, stride, rounding);
  555|  13.7k|      interpolate8x8_quarterpel(dec->cur.y, dec->refn[0].y , dec->qtmp.y, dec->qtmp.y + 64,
  556|  13.7k|                  dec->qtmp.y + 128, 16*x_pos + 8, 16*y_pos,
  557|  13.7k|                  mv[1].x, mv[1].y, stride, rounding);
  558|  13.7k|      interpolate8x8_quarterpel(dec->cur.y, dec->refn[0].y , dec->qtmp.y, dec->qtmp.y + 64,
  559|  13.7k|                  dec->qtmp.y + 128, 16*x_pos, 16*y_pos + 8,
  560|  13.7k|                  mv[2].x, mv[2].y, stride, rounding);
  561|  13.7k|      interpolate8x8_quarterpel(dec->cur.y, dec->refn[0].y , dec->qtmp.y, dec->qtmp.y + 64,
  562|  13.7k|                  dec->qtmp.y + 128, 16*x_pos + 8, 16*y_pos + 8,
  563|  13.7k|                  mv[3].x, mv[3].y, stride, rounding);
  564|  23.2k|    } else {
  565|  23.2k|      interpolate8x8_switch(dec->cur.y, dec->refn[0].y , 16*x_pos, 16*y_pos,
  566|  23.2k|                mv[0].x, mv[0].y, stride, rounding);
  567|  23.2k|      interpolate8x8_switch(dec->cur.y, dec->refn[0].y , 16*x_pos + 8, 16*y_pos,
  568|  23.2k|                mv[1].x, mv[1].y, stride, rounding);
  569|  23.2k|      interpolate8x8_switch(dec->cur.y, dec->refn[0].y , 16*x_pos, 16*y_pos + 8,
  570|  23.2k|                mv[2].x, mv[2].y, stride, rounding);
  571|  23.2k|      interpolate8x8_switch(dec->cur.y, dec->refn[0].y , 16*x_pos + 8, 16*y_pos + 8,
  572|  23.2k|                mv[3].x, mv[3].y, stride, rounding);
  573|  23.2k|    }
  574|  36.9k|  }
  575|       |
  576|       |  /* chroma */
  577|   435k|  interpolate8x8_switch(dec->cur.u, dec->refn[ref].u, 8 * x_pos, 8 * y_pos,
  578|   435k|              uv_dx, uv_dy, stride2, rounding);
  579|   435k|  interpolate8x8_switch(dec->cur.v, dec->refn[ref].v, 8 * x_pos, 8 * y_pos,
  580|   435k|              uv_dx, uv_dy, stride2, rounding);
  581|       |
  582|   435k|  stop_comp_timer();
  583|       |
  584|   435k|  if (cbp)
  ------------------
  |  Branch (584:7): [True: 142k, False: 293k]
  ------------------
  585|   142k|    decoder_mb_decode(dec, cbp, bs, pY_Cur, pU_Cur, pV_Cur, pMB);
  586|   435k|}
decoder.c:validate_vector:
  434|  1.70M|{
  435|       |  /* clip a vector to valid range
  436|       |     prevents crashes if bitstream is broken
  437|       |  */
  438|  1.70M|  int shift = 5 + dec->quarterpel;
  439|  1.70M|  int xborder_high = (int)(dec->mb_width - x_pos) << shift;
  440|  1.70M|  int xborder_low = (-(int)x_pos-1) << shift;
  441|  1.70M|  int yborder_high = (int)(dec->mb_height - y_pos) << shift;
  442|  1.70M|  int yborder_low = (-(int)y_pos-1) << shift;
  443|       |
  444|  1.70M|#define CHECK_MV(mv) \
  445|  1.70M|  do { \
  446|  1.70M|  if ((mv).x > xborder_high) { \
  447|  1.70M|    DPRINTF(XVID_DEBUG_MV, "mv.x > max -- %d > %d, MB %d, %d", (mv).x, xborder_high, x_pos, y_pos); \
  448|  1.70M|    (mv).x = xborder_high; \
  449|  1.70M|  } else if ((mv).x < xborder_low) { \
  450|  1.70M|    DPRINTF(XVID_DEBUG_MV, "mv.x < min -- %d < %d, MB %d, %d", (mv).x, xborder_low, x_pos, y_pos); \
  451|  1.70M|    (mv).x = xborder_low; \
  452|  1.70M|  } \
  453|  1.70M|  if ((mv).y > yborder_high) { \
  454|  1.70M|    DPRINTF(XVID_DEBUG_MV, "mv.y > max -- %d > %d, MB %d, %d", (mv).y, yborder_high, x_pos, y_pos); \
  455|  1.70M|    (mv).y = yborder_high; \
  456|  1.70M|  } else if ((mv).y < yborder_low) { \
  457|  1.70M|    DPRINTF(XVID_DEBUG_MV, "mv.y < min -- %d < %d, MB %d, %d", (mv).y, yborder_low, x_pos, y_pos); \
  458|  1.70M|    (mv).y = yborder_low; \
  459|  1.70M|  } \
  460|  1.70M|  } while (0)
  461|       |
  462|  1.70M|  CHECK_MV(mv[0]);
  ------------------
  |  |  445|  1.70M|  do { \
  |  |  446|  1.70M|  if ((mv).x > xborder_high) { \
  |  |  ------------------
  |  |  |  Branch (446:7): [True: 34.9k, False: 1.66M]
  |  |  ------------------
  |  |  447|  34.9k|    DPRINTF(XVID_DEBUG_MV, "mv.x > max -- %d > %d, MB %d, %d", (mv).x, xborder_high, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  34.9k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  448|  34.9k|    (mv).x = xborder_high; \
  |  |  449|  1.66M|  } else if ((mv).x < xborder_low) { \
  |  |  ------------------
  |  |  |  Branch (449:14): [True: 40.8k, False: 1.62M]
  |  |  ------------------
  |  |  450|  40.8k|    DPRINTF(XVID_DEBUG_MV, "mv.x < min -- %d < %d, MB %d, %d", (mv).x, xborder_low, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  40.8k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  451|  40.8k|    (mv).x = xborder_low; \
  |  |  452|  40.8k|  } \
  |  |  453|  1.70M|  if ((mv).y > yborder_high) { \
  |  |  ------------------
  |  |  |  Branch (453:7): [True: 36.3k, False: 1.66M]
  |  |  ------------------
  |  |  454|  36.3k|    DPRINTF(XVID_DEBUG_MV, "mv.y > max -- %d > %d, MB %d, %d", (mv).y, yborder_high, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  36.3k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  455|  36.3k|    (mv).y = yborder_high; \
  |  |  456|  1.66M|  } else if ((mv).y < yborder_low) { \
  |  |  ------------------
  |  |  |  Branch (456:14): [True: 40.4k, False: 1.62M]
  |  |  ------------------
  |  |  457|  40.4k|    DPRINTF(XVID_DEBUG_MV, "mv.y < min -- %d < %d, MB %d, %d", (mv).y, yborder_low, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  40.4k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  458|  40.4k|    (mv).y = yborder_low; \
  |  |  459|  40.4k|  } \
  |  |  460|  1.70M|  } while (0)
  |  |  ------------------
  |  |  |  Branch (460:12): [Folded - Ignored]
  |  |  ------------------
  ------------------
  463|  1.70M|  CHECK_MV(mv[1]);
  ------------------
  |  |  445|  1.70M|  do { \
  |  |  446|  1.70M|  if ((mv).x > xborder_high) { \
  |  |  ------------------
  |  |  |  Branch (446:7): [True: 33.0k, False: 1.67M]
  |  |  ------------------
  |  |  447|  33.0k|    DPRINTF(XVID_DEBUG_MV, "mv.x > max -- %d > %d, MB %d, %d", (mv).x, xborder_high, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  33.0k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  448|  33.0k|    (mv).x = xborder_high; \
  |  |  449|  1.67M|  } else if ((mv).x < xborder_low) { \
  |  |  ------------------
  |  |  |  Branch (449:14): [True: 41.6k, False: 1.62M]
  |  |  ------------------
  |  |  450|  41.6k|    DPRINTF(XVID_DEBUG_MV, "mv.x < min -- %d < %d, MB %d, %d", (mv).x, xborder_low, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  41.6k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  451|  41.6k|    (mv).x = xborder_low; \
  |  |  452|  41.6k|  } \
  |  |  453|  1.70M|  if ((mv).y > yborder_high) { \
  |  |  ------------------
  |  |  |  Branch (453:7): [True: 35.6k, False: 1.66M]
  |  |  ------------------
  |  |  454|  35.6k|    DPRINTF(XVID_DEBUG_MV, "mv.y > max -- %d > %d, MB %d, %d", (mv).y, yborder_high, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  35.6k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  455|  35.6k|    (mv).y = yborder_high; \
  |  |  456|  1.66M|  } else if ((mv).y < yborder_low) { \
  |  |  ------------------
  |  |  |  Branch (456:14): [True: 40.6k, False: 1.62M]
  |  |  ------------------
  |  |  457|  40.6k|    DPRINTF(XVID_DEBUG_MV, "mv.y < min -- %d < %d, MB %d, %d", (mv).y, yborder_low, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  40.6k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  458|  40.6k|    (mv).y = yborder_low; \
  |  |  459|  40.6k|  } \
  |  |  460|  1.70M|  } while (0)
  |  |  ------------------
  |  |  |  Branch (460:12): [Folded - Ignored]
  |  |  ------------------
  ------------------
  464|  1.70M|  CHECK_MV(mv[2]);
  ------------------
  |  |  445|  1.70M|  do { \
  |  |  446|  1.70M|  if ((mv).x > xborder_high) { \
  |  |  ------------------
  |  |  |  Branch (446:7): [True: 31.3k, False: 1.67M]
  |  |  ------------------
  |  |  447|  31.3k|    DPRINTF(XVID_DEBUG_MV, "mv.x > max -- %d > %d, MB %d, %d", (mv).x, xborder_high, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  31.3k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  448|  31.3k|    (mv).x = xborder_high; \
  |  |  449|  1.67M|  } else if ((mv).x < xborder_low) { \
  |  |  ------------------
  |  |  |  Branch (449:14): [True: 37.6k, False: 1.63M]
  |  |  ------------------
  |  |  450|  37.6k|    DPRINTF(XVID_DEBUG_MV, "mv.x < min -- %d < %d, MB %d, %d", (mv).x, xborder_low, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  37.6k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  451|  37.6k|    (mv).x = xborder_low; \
  |  |  452|  37.6k|  } \
  |  |  453|  1.70M|  if ((mv).y > yborder_high) { \
  |  |  ------------------
  |  |  |  Branch (453:7): [True: 34.9k, False: 1.66M]
  |  |  ------------------
  |  |  454|  34.9k|    DPRINTF(XVID_DEBUG_MV, "mv.y > max -- %d > %d, MB %d, %d", (mv).y, yborder_high, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  34.9k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  455|  34.9k|    (mv).y = yborder_high; \
  |  |  456|  1.66M|  } else if ((mv).y < yborder_low) { \
  |  |  ------------------
  |  |  |  Branch (456:14): [True: 39.8k, False: 1.62M]
  |  |  ------------------
  |  |  457|  39.8k|    DPRINTF(XVID_DEBUG_MV, "mv.y < min -- %d < %d, MB %d, %d", (mv).y, yborder_low, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  39.8k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  458|  39.8k|    (mv).y = yborder_low; \
  |  |  459|  39.8k|  } \
  |  |  460|  1.70M|  } while (0)
  |  |  ------------------
  |  |  |  Branch (460:12): [Folded - Ignored]
  |  |  ------------------
  ------------------
  465|  1.70M|  CHECK_MV(mv[3]);
  ------------------
  |  |  445|  1.70M|  do { \
  |  |  446|  1.70M|  if ((mv).x > xborder_high) { \
  |  |  ------------------
  |  |  |  Branch (446:7): [True: 31.3k, False: 1.67M]
  |  |  ------------------
  |  |  447|  31.3k|    DPRINTF(XVID_DEBUG_MV, "mv.x > max -- %d > %d, MB %d, %d", (mv).x, xborder_high, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  31.3k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  448|  31.3k|    (mv).x = xborder_high; \
  |  |  449|  1.67M|  } else if ((mv).x < xborder_low) { \
  |  |  ------------------
  |  |  |  Branch (449:14): [True: 37.8k, False: 1.63M]
  |  |  ------------------
  |  |  450|  37.8k|    DPRINTF(XVID_DEBUG_MV, "mv.x < min -- %d < %d, MB %d, %d", (mv).x, xborder_low, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  37.8k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  451|  37.8k|    (mv).x = xborder_low; \
  |  |  452|  37.8k|  } \
  |  |  453|  1.70M|  if ((mv).y > yborder_high) { \
  |  |  ------------------
  |  |  |  Branch (453:7): [True: 35.6k, False: 1.66M]
  |  |  ------------------
  |  |  454|  35.6k|    DPRINTF(XVID_DEBUG_MV, "mv.y > max -- %d > %d, MB %d, %d", (mv).y, yborder_high, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  35.6k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  455|  35.6k|    (mv).y = yborder_high; \
  |  |  456|  1.66M|  } else if ((mv).y < yborder_low) { \
  |  |  ------------------
  |  |  |  Branch (456:14): [True: 39.6k, False: 1.62M]
  |  |  ------------------
  |  |  457|  39.6k|    DPRINTF(XVID_DEBUG_MV, "mv.y < min -- %d < %d, MB %d, %d", (mv).y, yborder_low, x_pos, y_pos); \
  |  |  ------------------
  |  |  |  |  202|  39.6k|#define XVID_DEBUG_MV        (1<< 6)
  |  |  ------------------
  |  |  458|  39.6k|    (mv).y = yborder_low; \
  |  |  459|  39.6k|  } \
  |  |  460|  1.70M|  } while (0)
  |  |  ------------------
  |  |  |  Branch (460:12): [Folded - Ignored]
  |  |  ------------------
  ------------------
  466|  1.70M|}
decoder.c:decoder_mbinter_field:
  599|  28.0k|{
  600|  28.0k|  uint32_t stride = dec->edged_width;
  601|  28.0k|  uint32_t stride2 = stride / 2;
  602|       |
  603|  28.0k|  uint8_t *pY_Cur, *pU_Cur, *pV_Cur;
  604|       |
  605|  28.0k|  int uvtop_dx, uvtop_dy;
  606|  28.0k|  int uvbot_dx, uvbot_dy;
  607|  28.0k|  VECTOR mv[4]; /* local copy of mvs */
  608|       |
  609|       |  /* Get pointer to memory areas */
  610|  28.0k|  pY_Cur = dec->cur.y + (y_pos << 4) * stride + (x_pos << 4);
  611|  28.0k|  pU_Cur = dec->cur.u + (y_pos << 3) * stride2 + (x_pos << 3);
  612|  28.0k|  pV_Cur = dec->cur.v + (y_pos << 3) * stride2 + (x_pos << 3);
  613|       |  
  614|  28.0k|  mv[0] = pMB->mvs[0];
  615|  28.0k|  mv[1] = pMB->mvs[1];
  616|  28.0k|  memset(&mv[2],0,2*sizeof(VECTOR));
  617|       |
  618|  28.0k|  validate_vector(mv, x_pos, y_pos, dec);
  619|       |
  620|  28.0k|  start_timer();
  621|       |
  622|  28.0k|  if((pMB->mode!=MODE_INTER4V) || (bvop))   /* INTER, INTER_Q, NOT_CODED, FORWARD, BACKWARD */
  ------------------
  |  |   36|  28.0k|#define MODE_INTER4V	2
  ------------------
  |  Branch (622:6): [True: 28.0k, False: 0]
  |  Branch (622:35): [True: 0, False: 0]
  ------------------
  623|  28.0k|  { 
  624|       |    /* Prepare top field vector */
  625|  28.0k|    uvtop_dx = DIV2ROUND(mv[0].x);
  ------------------
  |  |   64|  28.0k|#define DIV2ROUND(n)  (((n)>>1)|((n)&1))
  ------------------
  626|  28.0k|    uvtop_dy = DIV2ROUND(mv[0].y);
  ------------------
  |  |   64|  28.0k|#define DIV2ROUND(n)  (((n)>>1)|((n)&1))
  ------------------
  627|       |
  628|       |    /* Prepare bottom field vector */
  629|  28.0k|    uvbot_dx = DIV2ROUND(mv[1].x);
  ------------------
  |  |   64|  28.0k|#define DIV2ROUND(n)  (((n)>>1)|((n)&1))
  ------------------
  630|  28.0k|    uvbot_dy = DIV2ROUND(mv[1].y);
  ------------------
  |  |   64|  28.0k|#define DIV2ROUND(n)  (((n)>>1)|((n)&1))
  ------------------
  631|       |
  632|  28.0k|    if(dec->quarterpel)
  ------------------
  |  Branch (632:8): [True: 6.73k, False: 21.2k]
  ------------------
  633|  6.73k|    {
  634|       |      /* NOT supported */
  635|  6.73k|    }
  636|  21.2k|    else
  637|  21.2k|    {
  638|       |      /* Interpolate top field left part(we use double stride for every 2nd line) */
  639|  21.2k|      interpolate8x8_switch(dec->cur.y,dec->refn[ref].y+pMB->field_for_top*stride,
  640|  21.2k|                            16*x_pos,8*y_pos,mv[0].x, mv[0].y>>1,2*stride, rounding);
  641|       |      /* top field right part */
  642|  21.2k|      interpolate8x8_switch(dec->cur.y,dec->refn[ref].y+pMB->field_for_top*stride,
  643|  21.2k|                            16*x_pos+8,8*y_pos,mv[0].x, mv[0].y>>1,2*stride, rounding);
  644|       |
  645|       |      /* Interpolate bottom field left part(we use double stride for every 2nd line) */
  646|  21.2k|      interpolate8x8_switch(dec->cur.y+stride,dec->refn[ref].y+pMB->field_for_bot*stride,
  647|  21.2k|                            16*x_pos,8*y_pos,mv[1].x, mv[1].y>>1,2*stride, rounding);
  648|       |      /* Bottom field right part */
  649|  21.2k|      interpolate8x8_switch(dec->cur.y+stride,dec->refn[ref].y+pMB->field_for_bot*stride,
  650|  21.2k|                            16*x_pos+8,8*y_pos,mv[1].x, mv[1].y>>1,2*stride, rounding);
  651|       |
  652|       |      /* Interpolate field1 U */
  653|  21.2k|      interpolate8x4_switch(dec->cur.u,dec->refn[ref].u+pMB->field_for_top*stride2,
  654|  21.2k|                            8*x_pos,4*y_pos,uvtop_dx,DIV2ROUND(uvtop_dy),stride,rounding);
  ------------------
  |  |   64|  21.2k|#define DIV2ROUND(n)  (((n)>>1)|((n)&1))
  ------------------
  655|       |      
  656|       |      /* Interpolate field1 V */
  657|  21.2k|      interpolate8x4_switch(dec->cur.v,dec->refn[ref].v+pMB->field_for_top*stride2,
  658|  21.2k|                            8*x_pos,4*y_pos,uvtop_dx,DIV2ROUND(uvtop_dy),stride,rounding);
  ------------------
  |  |   64|  21.2k|#define DIV2ROUND(n)  (((n)>>1)|((n)&1))
  ------------------
  659|       |    
  660|       |      /* Interpolate field2 U */
  661|  21.2k|      interpolate8x4_switch(dec->cur.u+stride2,dec->refn[ref].u+pMB->field_for_bot*stride2,
  662|  21.2k|                            8*x_pos,4*y_pos,uvbot_dx,DIV2ROUND(uvbot_dy),stride,rounding);
  ------------------
  |  |   64|  21.2k|#define DIV2ROUND(n)  (((n)>>1)|((n)&1))
  ------------------
  663|       |    
  664|       |      /* Interpolate field2 V */
  665|  21.2k|      interpolate8x4_switch(dec->cur.v+stride2,dec->refn[ref].v+pMB->field_for_bot*stride2,
  666|  21.2k|                            8*x_pos,4*y_pos,uvbot_dx,DIV2ROUND(uvbot_dy),stride,rounding);
  ------------------
  |  |   64|  21.2k|#define DIV2ROUND(n)  (((n)>>1)|((n)&1))
  ------------------
  667|  21.2k|    }
  668|  28.0k|  } 
  669|      0|  else 
  670|      0|  {
  671|       |    /* We don't expect 4 motion vectors in interlaced mode */
  672|      0|  }
  673|       |
  674|  28.0k|  stop_comp_timer();
  675|       |
  676|       |  /* Must add error correction? */
  677|  28.0k|  if(cbp)
  ------------------
  |  Branch (677:6): [True: 15.7k, False: 12.3k]
  ------------------
  678|  15.7k|   decoder_mb_decode(dec, cbp, bs, pY_Cur, pU_Cur, pV_Cur, pMB);
  679|  28.0k|}
decoder.c:decoder_bframe:
 1360|  2.24k|{
 1361|  2.24k|  uint32_t x, y;
 1362|  2.24k|  VECTOR mv;
 1363|  2.24k|  const VECTOR zeromv = {0,0};
 1364|  2.24k|  int i;
 1365|  2.24k|  int resync_len;
 1366|       |
 1367|  2.24k|  if (!dec->is_edged[0]) {
  ------------------
  |  Branch (1367:7): [True: 2.24k, False: 0]
  ------------------
 1368|  2.24k|    start_timer();
 1369|  2.24k|    image_setedges(&dec->refn[0], dec->edged_width, dec->edged_height,
 1370|  2.24k|            dec->width, dec->height, dec->bs_version);
 1371|  2.24k|    dec->is_edged[0] = 1;
 1372|  2.24k|    stop_edges_timer();
 1373|  2.24k|  }
 1374|       |
 1375|  2.24k|  if (!dec->is_edged[1]) {
  ------------------
  |  Branch (1375:7): [True: 2.24k, False: 0]
  ------------------
 1376|  2.24k|    start_timer();
 1377|  2.24k|    image_setedges(&dec->refn[1], dec->edged_width, dec->edged_height,
 1378|  2.24k|            dec->width, dec->height, dec->bs_version);
 1379|  2.24k|    dec->is_edged[1] = 1;
 1380|  2.24k|    stop_edges_timer();
 1381|  2.24k|  }
 1382|       |
 1383|  2.24k|  resync_len = get_resync_len_b(fcode_backward, fcode_forward);
 1384|   121k|  for (y = 0; y < dec->mb_height; y++) {
  ------------------
  |  Branch (1384:15): [True: 119k, False: 2.24k]
  ------------------
 1385|       |    /* Initialize Pred Motion Vector */
 1386|   119k|    dec->p_fmv = dec->p_bmv = zeromv;
 1387|  4.70M|    for (x = 0; x < dec->mb_width; x++) {
  ------------------
  |  Branch (1387:17): [True: 4.58M, False: 119k]
  ------------------
 1388|  4.58M|      MACROBLOCK *mb = &dec->mbs[y * dec->mb_width + x];
 1389|  4.58M|      MACROBLOCK *last_mb = &dec->last_mbs[y * dec->mb_width + x];
 1390|  4.58M|      int intra_dc_threshold; /* fake variable */
 1391|       |
 1392|  4.58M|      mv =
 1393|  4.58M|      mb->b_mvs[0] = mb->b_mvs[1] = mb->b_mvs[2] = mb->b_mvs[3] =
 1394|  4.58M|      mb->mvs[0] = mb->mvs[1] = mb->mvs[2] = mb->mvs[3] = zeromv;
 1395|  4.58M|      mb->quant = quant;
 1396|       |
 1397|       |      /*
 1398|       |       * skip if the co-located P_VOP macroblock is not coded
 1399|       |       * if not codec in co-located S_VOP macroblock is _not_
 1400|       |       * automatically skipped
 1401|       |       */
 1402|       |
 1403|  4.58M|      if (last_mb->mode == MODE_NOT_CODED) {
  ------------------
  |  |   39|  4.58M|#define MODE_NOT_CODED	16
  ------------------
  |  Branch (1403:11): [True: 0, False: 4.58M]
  ------------------
 1404|      0|        mb->cbp = 0;
 1405|      0|        mb->mode = MODE_FORWARD;
  ------------------
  |  |   47|      0|#define MODE_FORWARD		3
  ------------------
 1406|      0|        decoder_mbinter(dec, mb, x, y, mb->cbp, bs, 0, 1, 1);
 1407|      0|        continue;
 1408|      0|      }
 1409|       |
 1410|  4.58M|      if (check_resync_marker(bs, resync_len)) {
  ------------------
  |  Branch (1410:11): [True: 3.84k, False: 4.58M]
  ------------------
 1411|  3.84k|        int bound = read_video_packet_header(bs, dec, resync_len, &quant,
 1412|  3.84k|                           &fcode_forward, &fcode_backward, &intra_dc_threshold);
 1413|       |
 1414|  3.84k|		bound = MAX(0, bound-1); /* valid bound must always be >0 */
  ------------------
  |  |   35|  3.84k|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (35:20): [True: 1.04k, False: 2.80k]
  |  |  ------------------
  ------------------
 1415|  3.84k|        x = bound % dec->mb_width;
 1416|  3.84k|        y = MIN((bound / dec->mb_width), (dec->mb_height-1));
  ------------------
  |  |   34|  3.84k|#define MIN(X, Y) ((X)<(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (34:20): [True: 3.33k, False: 517]
  |  |  ------------------
  ------------------
 1417|       |        /* reset predicted macroblocks */
 1418|  3.84k|        dec->p_fmv = dec->p_bmv = zeromv;
 1419|       |        /* update resync len with new fcodes */
 1420|  3.84k|        resync_len = get_resync_len_b(fcode_backward, fcode_forward);
 1421|  3.84k|		continue; /* re-init loop */
 1422|  3.84k|	  }
 1423|       |
 1424|  4.58M|      if (!BitstreamGetBit(bs)) { /* modb=='0' */
  ------------------
  |  Branch (1424:11): [True: 4.19M, False: 387k]
  ------------------
 1425|  4.19M|        const uint8_t modb2 = BitstreamGetBit(bs);
 1426|       |
 1427|  4.19M|        mb->mode = get_mbtype(bs);
 1428|       |
 1429|  4.19M|        if (!modb2)   /* modb=='00' */
  ------------------
  |  Branch (1429:13): [True: 4.02M, False: 166k]
  ------------------
 1430|  4.02M|          mb->cbp = BitstreamGetBits(bs, 6);
 1431|   166k|        else
 1432|   166k|          mb->cbp = 0;
 1433|       |
 1434|  4.19M|        if (mb->mode && mb->cbp) {
  ------------------
  |  Branch (1434:13): [True: 4.06M, False: 129k]
  |  Branch (1434:25): [True: 95.4k, False: 3.96M]
  ------------------
 1435|  95.4k|          quant += get_dbquant(bs);
 1436|  95.4k|          if (quant > 31)
  ------------------
  |  Branch (1436:15): [True: 5.62k, False: 89.8k]
  ------------------
 1437|  5.62k|            quant = 31;
 1438|  89.8k|          else if (quant < 1)
  ------------------
  |  Branch (1438:20): [True: 5.55k, False: 84.2k]
  ------------------
 1439|  5.55k|            quant = 1;
 1440|  95.4k|        }
 1441|  4.19M|        mb->quant = quant;
 1442|       |
 1443|  4.19M|        if (dec->interlacing) {
  ------------------
  |  Branch (1443:13): [True: 3.31M, False: 879k]
  ------------------
 1444|  3.31M|          if (mb->cbp) {
  ------------------
  |  Branch (1444:15): [True: 93.4k, False: 3.22M]
  ------------------
 1445|  93.4k|            mb->field_dct = BitstreamGetBit(bs);
 1446|  93.4k|            DPRINTF(XVID_DEBUG_MB,"decp: field_dct: %i\n", mb->field_dct);
  ------------------
  |  |  200|  93.4k|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
 1447|  93.4k|          }
 1448|       |
 1449|  3.31M|          if (mb->mode) {
  ------------------
  |  Branch (1449:15): [True: 3.22M, False: 92.1k]
  ------------------
 1450|  3.22M|            mb->field_pred = BitstreamGetBit(bs);
 1451|  3.22M|            DPRINTF(XVID_DEBUG_MB, "decp: field_pred: %i\n", mb->field_pred);
  ------------------
  |  |  200|  3.22M|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
 1452|       |
 1453|  3.22M|            if (mb->field_pred) {
  ------------------
  |  Branch (1453:17): [True: 68.6k, False: 3.15M]
  ------------------
 1454|  68.6k|              mb->field_for_top = BitstreamGetBit(bs);
 1455|  68.6k|              DPRINTF(XVID_DEBUG_MB,"decp: field_for_top: %i\n", mb->field_for_top);
  ------------------
  |  |  200|  68.6k|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
 1456|  68.6k|              mb->field_for_bot = BitstreamGetBit(bs);
 1457|  68.6k|              DPRINTF(XVID_DEBUG_MB,"decp: field_for_bot: %i\n", mb->field_for_bot);
  ------------------
  |  |  200|  68.6k|#define XVID_DEBUG_MB        (1<< 4)
  ------------------
 1458|  68.6k|            }
 1459|  3.22M|          }
 1460|  3.31M|        }
 1461|       |
 1462|  4.19M|      } else {
 1463|   387k|        mb->mode = MODE_DIRECT_NONE_MV;
  ------------------
  |  |   48|   387k|#define MODE_DIRECT_NONE_MV	4
  ------------------
 1464|   387k|        mb->cbp = 0;
 1465|   387k|      }
 1466|       |
 1467|  4.58M|      switch (mb->mode) {
 1468|   129k|      case MODE_DIRECT:
  ------------------
  |  |   44|   129k|#define MODE_DIRECT			0
  ------------------
  |  Branch (1468:7): [True: 129k, False: 4.45M]
  ------------------
 1469|   129k|        get_b_motion_vector(bs, &mv, 1, zeromv, dec, x, y);
 1470|       |
 1471|   517k|      case MODE_DIRECT_NONE_MV:
  ------------------
  |  |   48|   517k|#define MODE_DIRECT_NONE_MV	4
  ------------------
  |  Branch (1471:7): [True: 387k, False: 4.19M]
  ------------------
 1472|  2.58M|        for (i = 0; i < 4; i++) {
  ------------------
  |  Branch (1472:21): [True: 2.06M, False: 517k]
  ------------------
 1473|  2.06M|          mb->mvs[i].x = last_mb->mvs[i].x*dec->time_bp/dec->time_pp + mv.x;
 1474|  2.06M|          mb->mvs[i].y = last_mb->mvs[i].y*dec->time_bp/dec->time_pp + mv.y;
 1475|       |
 1476|  2.06M|          mb->b_mvs[i].x = (mv.x)
  ------------------
  |  Branch (1476:28): [True: 216k, False: 1.85M]
  ------------------
 1477|  2.06M|            ?  mb->mvs[i].x - last_mb->mvs[i].x
 1478|  2.06M|            : last_mb->mvs[i].x*(dec->time_bp - dec->time_pp)/dec->time_pp;
 1479|  2.06M|          mb->b_mvs[i].y = (mv.y)
  ------------------
  |  Branch (1479:28): [True: 171k, False: 1.89M]
  ------------------
 1480|  2.06M|            ? mb->mvs[i].y - last_mb->mvs[i].y
 1481|  2.06M|            : last_mb->mvs[i].y*(dec->time_bp - dec->time_pp)/dec->time_pp;
 1482|  2.06M|        }
 1483|       |
 1484|   517k|        decoder_bf_interpolate_mbinter(dec, dec->refn[1], dec->refn[0],
 1485|   517k|                        mb, x, y, bs, 1);
 1486|   517k|        break;
 1487|       |
 1488|   102k|      case MODE_INTERPOLATE:
  ------------------
  |  |   45|   102k|#define MODE_INTERPOLATE	1
  ------------------
  |  Branch (1488:7): [True: 102k, False: 4.47M]
  ------------------
 1489|   102k|        get_b_motion_vector(bs, &mb->mvs[0], fcode_forward, dec->p_fmv, dec, x, y);
 1490|   102k|        dec->p_fmv = mb->mvs[1] = mb->mvs[2] = mb->mvs[3] = mb->mvs[0];
 1491|       |
 1492|   102k|        get_b_motion_vector(bs, &mb->b_mvs[0], fcode_backward, dec->p_bmv, dec, x, y);
 1493|   102k|        dec->p_bmv = mb->b_mvs[1] = mb->b_mvs[2] = mb->b_mvs[3] = mb->b_mvs[0];
 1494|       |
 1495|   102k|        decoder_bf_interpolate_mbinter(dec, dec->refn[1], dec->refn[0],
 1496|   102k|                      mb, x, y, bs, 0);
 1497|   102k|        break;
 1498|       |
 1499|  44.1k|      case MODE_BACKWARD:
  ------------------
  |  |   46|  44.1k|#define MODE_BACKWARD		2
  ------------------
  |  Branch (1499:7): [True: 44.1k, False: 4.53M]
  ------------------
 1500|  44.1k|        get_b_motion_vector(bs, &mb->mvs[0], fcode_backward, dec->p_bmv, dec, x, y);
 1501|  44.1k|        dec->p_bmv = mb->mvs[1] = mb->mvs[2] = mb->mvs[3] = mb->mvs[0];
 1502|       |
 1503|  44.1k|        decoder_mbinter(dec, mb, x, y, mb->cbp, bs, 0, 0, 1);
 1504|  44.1k|        break;
 1505|       |
 1506|  24.9k|      case MODE_FORWARD:
  ------------------
  |  |   47|  24.9k|#define MODE_FORWARD		3
  ------------------
  |  Branch (1506:7): [True: 24.9k, False: 4.55M]
  ------------------
 1507|  24.9k|        get_b_motion_vector(bs, &mb->mvs[0], fcode_forward, dec->p_fmv, dec, x, y);
 1508|  24.9k|        dec->p_fmv = mb->mvs[1] = mb->mvs[2] = mb->mvs[3] = mb->mvs[0];
 1509|       |
 1510|  24.9k|        decoder_mbinter(dec, mb, x, y, mb->cbp, bs, 0, 1, 1);
 1511|  24.9k|        break;
 1512|       |
 1513|  3.89M|      default:
  ------------------
  |  Branch (1513:7): [True: 3.89M, False: 689k]
  ------------------
 1514|  3.89M|        DPRINTF(XVID_DEBUG_ERROR,"Not supported B-frame mb_type = %i\n", mb->mode);
  ------------------
  |  |  196|  3.89M|#define XVID_DEBUG_ERROR     (1<< 0)
  ------------------
 1515|  4.58M|      }
 1516|  4.58M|    } /* End of for */
 1517|   119k|  }
 1518|  2.24k|}
decoder.c:get_resync_len_b:
 1348|  6.08k|                                     const int fcode_forward) {
 1349|  6.08k|  int resync_len = ((fcode_forward>fcode_backward) ? fcode_forward : fcode_backward) - 1;
  ------------------
  |  Branch (1349:21): [True: 1.35k, False: 4.73k]
  ------------------
 1350|  6.08k|  if (resync_len < 1) resync_len = 1;
  ------------------
  |  Branch (1350:7): [True: 2.71k, False: 3.37k]
  ------------------
 1351|  6.08k|  return resync_len;
 1352|  6.08k|}
decoder.c:get_mbtype:
 1337|  4.19M|{
 1338|  4.19M|  int32_t mb_type;
 1339|       |
 1340|  20.0M|  for (mb_type = 0; mb_type <= 3; mb_type++)
  ------------------
  |  Branch (1340:21): [True: 16.1M, False: 3.89M]
  ------------------
 1341|  16.1M|    if (BitstreamGetBit(bs))
  ------------------
  |  Branch (1341:9): [True: 301k, False: 15.8M]
  ------------------
 1342|   301k|      return (mb_type);
 1343|       |
 1344|  3.89M|  return -1;
 1345|  4.19M|}
decoder.c:get_dbquant:
 1318|  95.4k|{
 1319|  95.4k|  if (!BitstreamGetBit(bs))   /*  '0' */
  ------------------
  |  Branch (1319:7): [True: 60.1k, False: 35.2k]
  ------------------
 1320|  60.1k|    return (0);
 1321|  35.2k|  else if (!BitstreamGetBit(bs))  /* '10' */
  ------------------
  |  Branch (1321:12): [True: 18.2k, False: 17.0k]
  ------------------
 1322|  18.2k|    return (-2);
 1323|  17.0k|  else              /* '11' */
 1324|  17.0k|    return (2);
 1325|  95.4k|}
decoder.c:get_b_motion_vector:
 1127|   404k|{
 1128|   404k|  const int scale_fac = 1 << (fcode - 1);
 1129|   404k|  const int high = (32 * scale_fac) - 1;
 1130|   404k|  const int low = ((-32) * scale_fac);
 1131|   404k|  const int range = (64 * scale_fac);
 1132|       |
 1133|   404k|  int mv_x = get_mv(bs, fcode);
 1134|   404k|  int mv_y = get_mv(bs, fcode);
 1135|       |
 1136|   404k|  mv_x += pmv.x;
 1137|   404k|  mv_y += pmv.y;
 1138|       |
 1139|   404k|  if (mv_x < low)
  ------------------
  |  Branch (1139:7): [True: 62.9k, False: 341k]
  ------------------
 1140|  62.9k|    mv_x += range;
 1141|   341k|  else if (mv_x > high)
  ------------------
  |  Branch (1141:12): [True: 105k, False: 236k]
  ------------------
 1142|   105k|    mv_x -= range;
 1143|       |
 1144|   404k|  if (mv_y < low)
  ------------------
  |  Branch (1144:7): [True: 59.0k, False: 345k]
  ------------------
 1145|  59.0k|    mv_y += range;
 1146|   345k|  else if (mv_y > high)
  ------------------
  |  Branch (1146:12): [True: 108k, False: 237k]
  ------------------
 1147|   108k|    mv_y -= range;
 1148|       |
 1149|   404k|  mv->x = mv_x;
 1150|   404k|  mv->y = mv_y;
 1151|   404k|}
decoder.c:decoder_bf_interpolate_mbinter:
 1163|   620k|{
 1164|   620k|  uint32_t stride = dec->edged_width;
 1165|   620k|  uint32_t stride2 = stride / 2;
 1166|   620k|  int uv_dx, uv_dy;
 1167|   620k|  int b_uv_dx, b_uv_dy;
 1168|   620k|  uint8_t *pY_Cur, *pU_Cur, *pV_Cur;
 1169|   620k|  const uint32_t cbp = pMB->cbp;
 1170|       |
 1171|   620k|  pY_Cur = dec->cur.y + (y_pos << 4) * stride + (x_pos << 4);
 1172|   620k|  pU_Cur = dec->cur.u + (y_pos << 3) * stride2 + (x_pos << 3);
 1173|   620k|  pV_Cur = dec->cur.v + (y_pos << 3) * stride2 + (x_pos << 3);
 1174|       |
 1175|   620k|  validate_vector(pMB->mvs, x_pos, y_pos, dec);
 1176|   620k|  validate_vector(pMB->b_mvs, x_pos, y_pos, dec);
 1177|       |
 1178|   620k|  if (!direct) {
  ------------------
  |  Branch (1178:7): [True: 102k, False: 517k]
  ------------------
 1179|   102k|    uv_dx = pMB->mvs[0].x;
 1180|   102k|    uv_dy = pMB->mvs[0].y;
 1181|   102k|    b_uv_dx = pMB->b_mvs[0].x;
 1182|   102k|    b_uv_dy = pMB->b_mvs[0].y;
 1183|       |
 1184|   102k|    if (dec->quarterpel) {
  ------------------
  |  Branch (1184:9): [True: 95.6k, False: 7.34k]
  ------------------
 1185|  95.6k|			if (dec->bs_version <= BS_VERSION_BUGGY_CHROMA_ROUNDING) {
  ------------------
  |  |  470|  95.6k|#define BS_VERSION_BUGGY_CHROMA_ROUNDING 1
  ------------------
  |  Branch (1185:8): [True: 549, False: 95.0k]
  ------------------
 1186|    549|				uv_dx = (uv_dx>>1) | (uv_dx&1);
 1187|    549|				uv_dy = (uv_dy>>1) | (uv_dy&1);
 1188|    549|				b_uv_dx = (b_uv_dx>>1) | (b_uv_dx&1);
 1189|    549|				b_uv_dy = (b_uv_dy>>1) | (b_uv_dy&1);
 1190|    549|			}
 1191|  95.0k|			else {
 1192|  95.0k|        uv_dx /= 2;
 1193|  95.0k|        uv_dy /= 2;
 1194|  95.0k|        b_uv_dx /= 2;
 1195|  95.0k|        b_uv_dy /= 2;
 1196|  95.0k|      }
 1197|  95.6k|    }
 1198|       |
 1199|   102k|    uv_dx = (uv_dx >> 1) + roundtab_79[uv_dx & 0x3];
 1200|   102k|    uv_dy = (uv_dy >> 1) + roundtab_79[uv_dy & 0x3];
 1201|   102k|    b_uv_dx = (b_uv_dx >> 1) + roundtab_79[b_uv_dx & 0x3];
 1202|   102k|    b_uv_dy = (b_uv_dy >> 1) + roundtab_79[b_uv_dy & 0x3];
 1203|       |
 1204|   517k|  } else {
 1205|   517k|	  if (dec->quarterpel) { /* for qpel the /2 shall be done before summation. We've done it right in the encoder in the past. */
  ------------------
  |  Branch (1205:8): [True: 387k, False: 130k]
  ------------------
 1206|       |							 /* TODO: figure out if we ever did it wrong on the encoder side. If yes, add some workaround */
 1207|   387k|		if (dec->bs_version <= BS_VERSION_BUGGY_CHROMA_ROUNDING) {
  ------------------
  |  |  470|   387k|#define BS_VERSION_BUGGY_CHROMA_ROUNDING 1
  ------------------
  |  Branch (1207:7): [True: 1.77k, False: 385k]
  ------------------
 1208|  1.77k|			int z;
 1209|  1.77k|			uv_dx = 0; uv_dy = 0;
 1210|  1.77k|			b_uv_dx = 0; b_uv_dy = 0;
 1211|  8.86k|			for (z = 0; z < 4; z++) {
  ------------------
  |  Branch (1211:16): [True: 7.08k, False: 1.77k]
  ------------------
 1212|  7.08k|			  uv_dx += ((pMB->mvs[z].x>>1) | (pMB->mvs[z].x&1));
 1213|  7.08k|			  uv_dy += ((pMB->mvs[z].y>>1) | (pMB->mvs[z].y&1));
 1214|  7.08k|			  b_uv_dx += ((pMB->b_mvs[z].x>>1) | (pMB->b_mvs[z].x&1));
 1215|  7.08k|			  b_uv_dy += ((pMB->b_mvs[z].y>>1) | (pMB->b_mvs[z].y&1));
 1216|  7.08k|			}
 1217|  1.77k|		}
 1218|   385k|		else {
 1219|   385k|			uv_dx = (pMB->mvs[0].x / 2) + (pMB->mvs[1].x / 2) + (pMB->mvs[2].x / 2) + (pMB->mvs[3].x / 2);
 1220|   385k|			uv_dy = (pMB->mvs[0].y / 2) + (pMB->mvs[1].y / 2) + (pMB->mvs[2].y / 2) + (pMB->mvs[3].y / 2);
 1221|   385k|			b_uv_dx = (pMB->b_mvs[0].x / 2) + (pMB->b_mvs[1].x / 2) + (pMB->b_mvs[2].x / 2) + (pMB->b_mvs[3].x / 2);
 1222|   385k|			b_uv_dy = (pMB->b_mvs[0].y / 2) + (pMB->b_mvs[1].y / 2) + (pMB->b_mvs[2].y / 2) + (pMB->b_mvs[3].y / 2);
 1223|   385k|		} 
 1224|   387k|	} else {
 1225|   130k|      uv_dx = pMB->mvs[0].x + pMB->mvs[1].x + pMB->mvs[2].x + pMB->mvs[3].x;
 1226|   130k|      uv_dy = pMB->mvs[0].y + pMB->mvs[1].y + pMB->mvs[2].y + pMB->mvs[3].y;
 1227|   130k|      b_uv_dx = pMB->b_mvs[0].x + pMB->b_mvs[1].x + pMB->b_mvs[2].x + pMB->b_mvs[3].x;
 1228|   130k|      b_uv_dy = pMB->b_mvs[0].y + pMB->b_mvs[1].y + pMB->b_mvs[2].y + pMB->b_mvs[3].y;
 1229|   130k|    }
 1230|       |
 1231|   517k|    uv_dx = (uv_dx >> 3) + roundtab_76[uv_dx & 0xf];
 1232|   517k|    uv_dy = (uv_dy >> 3) + roundtab_76[uv_dy & 0xf];
 1233|   517k|    b_uv_dx = (b_uv_dx >> 3) + roundtab_76[b_uv_dx & 0xf];
 1234|   517k|    b_uv_dy = (b_uv_dy >> 3) + roundtab_76[b_uv_dy & 0xf];
 1235|   517k|  }
 1236|       |
 1237|   620k|  start_timer();
 1238|   620k|  if(dec->quarterpel) {
  ------------------
  |  Branch (1238:6): [True: 482k, False: 137k]
  ------------------
 1239|   482k|    if(!direct) {
  ------------------
  |  Branch (1239:8): [True: 95.6k, False: 387k]
  ------------------
 1240|  95.6k|      interpolate16x16_quarterpel(dec->cur.y, forward.y, dec->qtmp.y, dec->qtmp.y + 64,
 1241|  95.6k|                    dec->qtmp.y + 128, 16*x_pos, 16*y_pos,
 1242|  95.6k|                    pMB->mvs[0].x, pMB->mvs[0].y, stride, 0);
 1243|   387k|    } else {
 1244|   387k|      interpolate8x8_quarterpel(dec->cur.y, forward.y, dec->qtmp.y, dec->qtmp.y + 64,
 1245|   387k|                    dec->qtmp.y + 128, 16*x_pos, 16*y_pos,
 1246|   387k|                    pMB->mvs[0].x, pMB->mvs[0].y, stride, 0);
 1247|   387k|      interpolate8x8_quarterpel(dec->cur.y, forward.y, dec->qtmp.y, dec->qtmp.y + 64,
 1248|   387k|                    dec->qtmp.y + 128, 16*x_pos + 8, 16*y_pos,
 1249|   387k|                    pMB->mvs[1].x, pMB->mvs[1].y, stride, 0);
 1250|   387k|      interpolate8x8_quarterpel(dec->cur.y, forward.y, dec->qtmp.y, dec->qtmp.y + 64,
 1251|   387k|                    dec->qtmp.y + 128, 16*x_pos, 16*y_pos + 8,
 1252|   387k|                    pMB->mvs[2].x, pMB->mvs[2].y, stride, 0);
 1253|   387k|      interpolate8x8_quarterpel(dec->cur.y, forward.y, dec->qtmp.y, dec->qtmp.y + 64,
 1254|   387k|                    dec->qtmp.y + 128, 16*x_pos + 8, 16*y_pos + 8,
 1255|   387k|                    pMB->mvs[3].x, pMB->mvs[3].y, stride, 0);
 1256|   387k|    }
 1257|   482k|  } else {
 1258|   137k|    interpolate8x8_switch(dec->cur.y, forward.y, 16 * x_pos, 16 * y_pos,
 1259|   137k|              pMB->mvs[0].x, pMB->mvs[0].y, stride, 0);
 1260|   137k|    interpolate8x8_switch(dec->cur.y, forward.y, 16 * x_pos + 8, 16 * y_pos,
 1261|   137k|              pMB->mvs[1].x, pMB->mvs[1].y, stride, 0);
 1262|   137k|    interpolate8x8_switch(dec->cur.y, forward.y, 16 * x_pos, 16 * y_pos + 8,
 1263|   137k|              pMB->mvs[2].x, pMB->mvs[2].y, stride, 0);
 1264|   137k|    interpolate8x8_switch(dec->cur.y, forward.y, 16 * x_pos + 8, 16 * y_pos + 8,
 1265|   137k|              pMB->mvs[3].x, pMB->mvs[3].y, stride, 0);
 1266|   137k|  }
 1267|       |
 1268|   620k|  interpolate8x8_switch(dec->cur.u, forward.u, 8 * x_pos, 8 * y_pos, uv_dx,
 1269|   620k|            uv_dy, stride2, 0);
 1270|   620k|  interpolate8x8_switch(dec->cur.v, forward.v, 8 * x_pos, 8 * y_pos, uv_dx,
 1271|   620k|            uv_dy, stride2, 0);
 1272|       |
 1273|       |
 1274|   620k|  if(dec->quarterpel) {
  ------------------
  |  Branch (1274:6): [True: 482k, False: 137k]
  ------------------
 1275|   482k|    if(!direct) {
  ------------------
  |  Branch (1275:8): [True: 95.6k, False: 387k]
  ------------------
 1276|  95.6k|      interpolate16x16_add_quarterpel(dec->cur.y, backward.y, dec->qtmp.y, dec->qtmp.y + 64,
 1277|  95.6k|          dec->qtmp.y + 128, 16*x_pos, 16*y_pos,
 1278|  95.6k|          pMB->b_mvs[0].x, pMB->b_mvs[0].y, stride, 0);
 1279|   387k|    } else {
 1280|   387k|      interpolate8x8_add_quarterpel(dec->cur.y, backward.y, dec->qtmp.y, dec->qtmp.y + 64,
 1281|   387k|          dec->qtmp.y + 128, 16*x_pos, 16*y_pos,
 1282|   387k|          pMB->b_mvs[0].x, pMB->b_mvs[0].y, stride, 0);
 1283|   387k|      interpolate8x8_add_quarterpel(dec->cur.y, backward.y, dec->qtmp.y, dec->qtmp.y + 64,
 1284|   387k|          dec->qtmp.y + 128, 16*x_pos + 8, 16*y_pos,
 1285|   387k|          pMB->b_mvs[1].x, pMB->b_mvs[1].y, stride, 0);
 1286|   387k|      interpolate8x8_add_quarterpel(dec->cur.y, backward.y, dec->qtmp.y, dec->qtmp.y + 64,
 1287|   387k|          dec->qtmp.y + 128, 16*x_pos, 16*y_pos + 8,
 1288|   387k|          pMB->b_mvs[2].x, pMB->b_mvs[2].y, stride, 0);
 1289|   387k|      interpolate8x8_add_quarterpel(dec->cur.y, backward.y, dec->qtmp.y, dec->qtmp.y + 64,
 1290|   387k|          dec->qtmp.y + 128, 16*x_pos + 8, 16*y_pos + 8,
 1291|   387k|          pMB->b_mvs[3].x, pMB->b_mvs[3].y, stride, 0);
 1292|   387k|    }
 1293|   482k|  } else {
 1294|   137k|    interpolate8x8_add_switch(dec->cur.y, backward.y, 16 * x_pos, 16 * y_pos,
 1295|   137k|        pMB->b_mvs[0].x, pMB->b_mvs[0].y, stride, 0);
 1296|   137k|    interpolate8x8_add_switch(dec->cur.y, backward.y, 16 * x_pos + 8,
 1297|   137k|        16 * y_pos, pMB->b_mvs[1].x, pMB->b_mvs[1].y, stride, 0);
 1298|   137k|    interpolate8x8_add_switch(dec->cur.y, backward.y, 16 * x_pos,
 1299|   137k|        16 * y_pos + 8, pMB->b_mvs[2].x, pMB->b_mvs[2].y, stride, 0);
 1300|   137k|    interpolate8x8_add_switch(dec->cur.y, backward.y, 16 * x_pos + 8,
 1301|   137k|        16 * y_pos + 8, pMB->b_mvs[3].x, pMB->b_mvs[3].y, stride, 0);
 1302|   137k|  }
 1303|       |
 1304|   620k|  interpolate8x8_add_switch(dec->cur.u, backward.u, 8 * x_pos, 8 * y_pos,
 1305|   620k|      b_uv_dx, b_uv_dy, stride2, 0);
 1306|   620k|  interpolate8x8_add_switch(dec->cur.v, backward.v, 8 * x_pos, 8 * y_pos,
 1307|   620k|      b_uv_dx, b_uv_dy, stride2, 0);
 1308|       |
 1309|   620k|  stop_comp_timer();
 1310|       |
 1311|   620k|  if (cbp)
  ------------------
  |  Branch (1311:7): [True: 66.8k, False: 553k]
  ------------------
 1312|  66.8k|    decoder_mb_decode(dec, cbp, bs, pY_Cur, pU_Cur, pV_Cur, pMB);
 1313|   620k|}

decoder.c:coding2type:
   64|  6.05k|{
   65|  6.05k|	return coding_type + 1;
   66|  6.05k|}
decoder.c:get_dc_scaler:
  233|   272M|{
  234|   272M|	if (quant < 5)
  ------------------
  |  Branch (234:6): [True: 69.1M, False: 203M]
  ------------------
  235|  69.1M|		return 8;
  236|       |
  237|   203M|	if (quant < 25 && !lum)
  ------------------
  |  Branch (237:6): [True: 110M, False: 93.3M]
  |  Branch (237:20): [True: 36.7M, False: 73.4M]
  ------------------
  238|  36.7M|		return (quant + 13) / 2;
  239|       |
  240|   166M|	if (quant < 9)
  ------------------
  |  Branch (240:6): [True: 27.3M, False: 139M]
  ------------------
  241|  27.3M|		return 2 * quant;
  242|       |
  243|   139M|	if (quant < 25)
  ------------------
  |  Branch (243:6): [True: 46.0M, False: 93.3M]
  ------------------
  244|  46.0M|		return quant + 8;
  245|       |
  246|  93.3M|	if (lum)
  ------------------
  |  Branch (246:6): [True: 62.2M, False: 31.1M]
  ------------------
  247|  62.2M|		return 2 * quant - 16;
  248|  31.1M|	else
  249|  31.1M|		return quant - 6;
  250|  93.3M|}

yv12_to_bgr_c:
   98|  3.85k|				 int width, int height, int vflip)	\
   99|  3.85k|{	\
  100|  3.85k|	int fixed_width = (width + 1) & ~1;				\
  101|  3.85k|	int x_dif = x_stride - (SIZE)*fixed_width;		\
  102|  3.85k|	int y_dif = y_stride - fixed_width;				\
  103|  3.85k|	int uv_dif = uv_stride - (fixed_width / 2);		\
  104|  3.85k|	int x, y;										\
  105|  3.85k|	if ((x_ptr == NULL) || (x_dif < 0)) return;		\
  ------------------
  |  Branch (105:6): [True: 0, False: 3.85k]
  |  Branch (105:25): [True: 0, False: 3.85k]
  ------------------
  106|  3.85k|	if (vflip) {								\
  ------------------
  |  Branch (106:6): [True: 0, False: 3.85k]
  ------------------
  107|      0|		x_ptr += (height - 1) * x_stride;			\
  108|      0|		x_dif = -(SIZE)*fixed_width - x_stride;		\
  109|      0|		x_stride = -x_stride;						\
  110|      0|	}												\
  111|  1.01M|	for (y = 0; y < height; y+=(VPIXELS)) {			\
  ------------------
  |  Branch (111:14): [True: 1.01M, False: 3.85k]
  ------------------
  112|  1.01M|		FUNC##_ROW(SIZE,C1,C2,C3,C4);				\
  113|   583M|		for (x = 0; x < fixed_width; x+=(PIXELS)) {	\
  ------------------
  |  Branch (113:15): [True: 582M, False: 1.01M]
  ------------------
  114|   582M|			FUNC(SIZE,C1,C2,C3,C4);				\
  ------------------
  |  |  450|   582M|MAKE_COLORSPACE(yv12_to_bgr_c,     3,2,2, YV12_TO_RGB,    2,1,0,0)
  |  |  ------------------
  |  |  |  |  405|   582M|	int rgb_y;												\
  |  |  |  |  406|   582M|	int b_u0 = B_U_tab[ u_ptr[0] ];							\
  |  |  |  |  407|   582M|	int g_uv0 = G_U_tab[ u_ptr[0] ] + G_V_tab[ v_ptr[0] ];	\
  |  |  |  |  408|   582M|	int r_v0 = R_V_tab[ v_ptr[0] ];							\
  |  |  |  |  409|   582M|	WRITE_RGB(SIZE, 0, 0, C1,C2,C3,C4)						\
  |  |  |  |  ------------------
  |  |  |  |  |  |  391|   582M|	rgb_y = RGB_Y_tab[ y_ptr[(ROW)*y_stride + 0] ];						\
  |  |  |  |  |  |  392|   582M|	x_ptr[(ROW)*x_stride+(C3)] = MAX(0, MIN(255, (rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 344M, False: 238M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 4.57M, False: 577M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 4.57M, False: 233M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  393|   582M|	x_ptr[(ROW)*x_stride+(C2)] = MAX(0, MIN(255, (rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 29.0M, False: 553M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 4.58M, False: 577M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 4.58M, False: 548M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  394|   582M|	x_ptr[(ROW)*x_stride+(C1)] = MAX(0, MIN(255, (rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 344M, False: 238M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 4.58M, False: 577M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 4.58M, False: 233M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  395|   582M|	if ((SIZE)>3) x_ptr[(ROW)*x_stride+(C4)] = 0;									\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (395:6): [Folded - Ignored]
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  396|   582M|	rgb_y = RGB_Y_tab[ y_ptr[(ROW)*y_stride + 1] ];									\
  |  |  |  |  |  |  397|   582M|	x_ptr[(ROW)*x_stride+(SIZE)+(C3)] = MAX(0, MIN(255, (rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 344M, False: 238M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 4.57M, False: 577M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 4.57M, False: 233M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  398|   582M|	x_ptr[(ROW)*x_stride+(SIZE)+(C2)] = MAX(0, MIN(255, (rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 29.0M, False: 553M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 4.58M, False: 577M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 4.58M, False: 548M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  399|   582M|	x_ptr[(ROW)*x_stride+(SIZE)+(C1)] = MAX(0, MIN(255, (rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 344M, False: 238M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 4.58M, False: 577M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 4.58M, False: 233M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  400|   582M|	if ((SIZE)>3) x_ptr[(ROW)*x_stride+(SIZE)+(C4)] = 0;
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (400:6): [Folded - Ignored]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  410|   582M|	WRITE_RGB(SIZE, 1, 0, C1,C2,C3,C4)
  |  |  |  |  ------------------
  |  |  |  |  |  |  391|   582M|	rgb_y = RGB_Y_tab[ y_ptr[(ROW)*y_stride + 0] ];						\
  |  |  |  |  |  |  392|   582M|	x_ptr[(ROW)*x_stride+(C3)] = MAX(0, MIN(255, (rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 344M, False: 238M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 4.57M, False: 577M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 4.57M, False: 233M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  393|   582M|	x_ptr[(ROW)*x_stride+(C2)] = MAX(0, MIN(255, (rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 29.0M, False: 553M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 4.58M, False: 577M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 4.58M, False: 548M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  394|   582M|	x_ptr[(ROW)*x_stride+(C1)] = MAX(0, MIN(255, (rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 344M, False: 238M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 4.58M, False: 577M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 4.58M, False: 233M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  395|   582M|	if ((SIZE)>3) x_ptr[(ROW)*x_stride+(C4)] = 0;									\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (395:6): [Folded - Ignored]
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  396|   582M|	rgb_y = RGB_Y_tab[ y_ptr[(ROW)*y_stride + 1] ];									\
  |  |  |  |  |  |  397|   582M|	x_ptr[(ROW)*x_stride+(SIZE)+(C3)] = MAX(0, MIN(255, (rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 344M, False: 238M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 4.57M, False: 577M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 4.57M, False: 233M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  398|   582M|	x_ptr[(ROW)*x_stride+(SIZE)+(C2)] = MAX(0, MIN(255, (rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 29.0M, False: 553M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 4.58M, False: 577M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 4.58M, False: 548M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  399|   582M|	x_ptr[(ROW)*x_stride+(SIZE)+(C1)] = MAX(0, MIN(255, (rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  1.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 344M, False: 238M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 4.58M, False: 577M]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 4.58M, False: 233M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  400|   582M|	if ((SIZE)>3) x_ptr[(ROW)*x_stride+(SIZE)+(C4)] = 0;
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (400:6): [Folded - Ignored]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  115|   582M|			x_ptr += (PIXELS)*(SIZE);				\
  116|   582M|			y_ptr += (PIXELS);						\
  117|   582M|			u_ptr += (PIXELS)/2;					\
  118|   582M|			v_ptr += (PIXELS)/2;					\
  119|   582M|		}											\
  120|  1.01M|		x_ptr += x_dif + (VPIXELS-1)*x_stride;		\
  121|  1.01M|		y_ptr += y_dif + (VPIXELS-1)*y_stride;		\
  122|  1.01M|		u_ptr += uv_dif + ((VPIXELS/2)-1)*uv_stride;	\
  123|  1.01M|		v_ptr += uv_dif + ((VPIXELS/2)-1)*uv_stride;	\
  124|  1.01M|	}												\
  125|  3.85k|}
yv12_to_bgri_c:
   98|  1.81k|				 int width, int height, int vflip)	\
   99|  1.81k|{	\
  100|  1.81k|	int fixed_width = (width + 1) & ~1;				\
  101|  1.81k|	int x_dif = x_stride - (SIZE)*fixed_width;		\
  102|  1.81k|	int y_dif = y_stride - fixed_width;				\
  103|  1.81k|	int uv_dif = uv_stride - (fixed_width / 2);		\
  104|  1.81k|	int x, y;										\
  105|  1.81k|	if ((x_ptr == NULL) || (x_dif < 0)) return;		\
  ------------------
  |  Branch (105:6): [True: 0, False: 1.81k]
  |  Branch (105:25): [True: 0, False: 1.81k]
  ------------------
  106|  1.81k|	if (vflip) {								\
  ------------------
  |  Branch (106:6): [True: 0, False: 1.81k]
  ------------------
  107|      0|		x_ptr += (height - 1) * x_stride;			\
  108|      0|		x_dif = -(SIZE)*fixed_width - x_stride;		\
  109|      0|		x_stride = -x_stride;						\
  110|      0|	}												\
  111|  1.88M|	for (y = 0; y < height; y+=(VPIXELS)) {			\
  ------------------
  |  Branch (111:14): [True: 1.88M, False: 1.81k]
  ------------------
  112|  1.88M|		FUNC##_ROW(SIZE,C1,C2,C3,C4);				\
  113|  1.08G|		for (x = 0; x < fixed_width; x+=(PIXELS)) {	\
  ------------------
  |  Branch (113:15): [True: 1.08G, False: 1.88M]
  ------------------
  114|  1.08G|			FUNC(SIZE,C1,C2,C3,C4);				\
  ------------------
  |  |  461|  1.08G|MAKE_COLORSPACE(yv12_to_bgri_c,    3,2,4, YV12_TO_RGBI,   2,1,0, 0)
  |  |  ------------------
  |  |  |  |  414|  1.08G|	int rgb_y;												\
  |  |  |  |  415|  1.08G|	int b_u0 = B_U_tab[ u_ptr[0] ];							\
  |  |  |  |  416|  1.08G|	int g_uv0 = G_U_tab[ u_ptr[0] ] + G_V_tab[ v_ptr[0] ];	\
  |  |  |  |  417|  1.08G|	int r_v0 = R_V_tab[ v_ptr[0] ];							\
  |  |  |  |  418|  1.08G|    int b_u1 = B_U_tab[ u_ptr[uv_stride] ];					\
  |  |  |  |  419|  1.08G|	int g_uv1 = G_U_tab[ u_ptr[uv_stride] ] + G_V_tab[ v_ptr[uv_stride] ];	\
  |  |  |  |  420|  1.08G|	int r_v1 = R_V_tab[ v_ptr[uv_stride] ];					\
  |  |  |  |  421|  1.08G|	WRITE_RGB(SIZE, 0, 0, C1,C2,C3,C4)		\
  |  |  |  |  ------------------
  |  |  |  |  |  |  391|  1.08G|	rgb_y = RGB_Y_tab[ y_ptr[(ROW)*y_stride + 0] ];						\
  |  |  |  |  |  |  392|  1.08G|	x_ptr[(ROW)*x_stride+(C3)] = MAX(0, MIN(255, (rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  2.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 655M, False: 429M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 400k, False: 1.08G]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 400k, False: 428M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  393|  1.08G|	x_ptr[(ROW)*x_stride+(C2)] = MAX(0, MIN(255, (rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  2.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 18.6M, False: 1.06G]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 837k, False: 1.08G]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 837k, False: 1.06G]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  394|  1.08G|	x_ptr[(ROW)*x_stride+(C1)] = MAX(0, MIN(255, (rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  2.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 654M, False: 429M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 457k, False: 1.08G]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 457k, False: 428M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  395|  1.08G|	if ((SIZE)>3) x_ptr[(ROW)*x_stride+(C4)] = 0;									\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (395:6): [Folded - Ignored]
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  396|  1.08G|	rgb_y = RGB_Y_tab[ y_ptr[(ROW)*y_stride + 1] ];									\
  |  |  |  |  |  |  397|  1.08G|	x_ptr[(ROW)*x_stride+(SIZE)+(C3)] = MAX(0, MIN(255, (rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  2.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 655M, False: 429M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 407k, False: 1.08G]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 407k, False: 428M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  398|  1.08G|	x_ptr[(ROW)*x_stride+(SIZE)+(C2)] = MAX(0, MIN(255, (rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  2.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 18.6M, False: 1.06G]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 875k, False: 1.08G]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 875k, False: 1.06G]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  399|  1.08G|	x_ptr[(ROW)*x_stride+(SIZE)+(C1)] = MAX(0, MIN(255, (rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  2.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 654M, False: 429M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 464k, False: 1.08G]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 464k, False: 428M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  400|  1.08G|	if ((SIZE)>3) x_ptr[(ROW)*x_stride+(SIZE)+(C4)] = 0;
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (400:6): [Folded - Ignored]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  422|  1.08G|	WRITE_RGB(SIZE, 1, 1, C1,C2,C3,C4)		\
  |  |  |  |  ------------------
  |  |  |  |  |  |  391|  1.08G|	rgb_y = RGB_Y_tab[ y_ptr[(ROW)*y_stride + 0] ];						\
  |  |  |  |  |  |  392|  1.08G|	x_ptr[(ROW)*x_stride+(C3)] = MAX(0, MIN(255, (rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  2.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 655M, False: 429M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 410k, False: 1.08G]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 410k, False: 428M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  393|  1.08G|	x_ptr[(ROW)*x_stride+(C2)] = MAX(0, MIN(255, (rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  2.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 18.6M, False: 1.06G]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 830k, False: 1.08G]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 830k, False: 1.06G]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  394|  1.08G|	x_ptr[(ROW)*x_stride+(C1)] = MAX(0, MIN(255, (rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  2.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 655M, False: 429M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 470k, False: 1.08G]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 470k, False: 428M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  395|  1.08G|	if ((SIZE)>3) x_ptr[(ROW)*x_stride+(C4)] = 0;									\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (395:6): [Folded - Ignored]
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  396|  1.08G|	rgb_y = RGB_Y_tab[ y_ptr[(ROW)*y_stride + 1] ];									\
  |  |  |  |  |  |  397|  1.08G|	x_ptr[(ROW)*x_stride+(SIZE)+(C3)] = MAX(0, MIN(255, (rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  2.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 655M, False: 429M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 404k, False: 1.08G]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 404k, False: 428M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  398|  1.08G|	x_ptr[(ROW)*x_stride+(SIZE)+(C2)] = MAX(0, MIN(255, (rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  2.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 18.6M, False: 1.06G]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 836k, False: 1.08G]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 836k, False: 1.06G]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  399|  1.08G|	x_ptr[(ROW)*x_stride+(SIZE)+(C1)] = MAX(0, MIN(255, (rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  2.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 654M, False: 429M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 463k, False: 1.08G]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 463k, False: 428M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  400|  1.08G|	if ((SIZE)>3) x_ptr[(ROW)*x_stride+(SIZE)+(C4)] = 0;
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (400:6): [Folded - Ignored]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  423|  1.08G|	WRITE_RGB(SIZE, 2, 0, C1,C2,C3,C4)		\
  |  |  |  |  ------------------
  |  |  |  |  |  |  391|  1.08G|	rgb_y = RGB_Y_tab[ y_ptr[(ROW)*y_stride + 0] ];						\
  |  |  |  |  |  |  392|  1.08G|	x_ptr[(ROW)*x_stride+(C3)] = MAX(0, MIN(255, (rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  2.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 655M, False: 429M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 438k, False: 1.08G]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 438k, False: 428M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  393|  1.08G|	x_ptr[(ROW)*x_stride+(C2)] = MAX(0, MIN(255, (rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  2.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 18.6M, False: 1.06G]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 857k, False: 1.08G]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 857k, False: 1.06G]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  394|  1.08G|	x_ptr[(ROW)*x_stride+(C1)] = MAX(0, MIN(255, (rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  2.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 654M, False: 429M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 502k, False: 1.08G]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 502k, False: 428M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  395|  1.08G|	if ((SIZE)>3) x_ptr[(ROW)*x_stride+(C4)] = 0;									\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (395:6): [Folded - Ignored]
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  396|  1.08G|	rgb_y = RGB_Y_tab[ y_ptr[(ROW)*y_stride + 1] ];									\
  |  |  |  |  |  |  397|  1.08G|	x_ptr[(ROW)*x_stride+(SIZE)+(C3)] = MAX(0, MIN(255, (rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  2.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 655M, False: 429M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 430k, False: 1.08G]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 430k, False: 428M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  398|  1.08G|	x_ptr[(ROW)*x_stride+(SIZE)+(C2)] = MAX(0, MIN(255, (rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  2.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 18.6M, False: 1.06G]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 850k, False: 1.08G]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 850k, False: 1.06G]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  399|  1.08G|	x_ptr[(ROW)*x_stride+(SIZE)+(C1)] = MAX(0, MIN(255, (rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  2.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 654M, False: 429M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 493k, False: 1.08G]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 493k, False: 428M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  400|  1.08G|	if ((SIZE)>3) x_ptr[(ROW)*x_stride+(SIZE)+(C4)] = 0;
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (400:6): [Folded - Ignored]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  |  |  424|  1.08G|	WRITE_RGB(SIZE, 3, 1, C1,C2,C3,C4)
  |  |  |  |  ------------------
  |  |  |  |  |  |  391|  1.08G|	rgb_y = RGB_Y_tab[ y_ptr[(ROW)*y_stride + 0] ];						\
  |  |  |  |  |  |  392|  1.08G|	x_ptr[(ROW)*x_stride+(C3)] = MAX(0, MIN(255, (rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  2.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 655M, False: 429M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 445k, False: 1.08G]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 445k, False: 428M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  393|  1.08G|	x_ptr[(ROW)*x_stride+(C2)] = MAX(0, MIN(255, (rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  2.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 18.6M, False: 1.06G]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 863k, False: 1.08G]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 863k, False: 1.06G]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  394|  1.08G|	x_ptr[(ROW)*x_stride+(C1)] = MAX(0, MIN(255, (rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  2.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 654M, False: 429M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 507k, False: 1.08G]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 507k, False: 428M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  395|  1.08G|	if ((SIZE)>3) x_ptr[(ROW)*x_stride+(C4)] = 0;									\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (395:6): [Folded - Ignored]
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  396|  1.08G|	rgb_y = RGB_Y_tab[ y_ptr[(ROW)*y_stride + 1] ];									\
  |  |  |  |  |  |  397|  1.08G|	x_ptr[(ROW)*x_stride+(SIZE)+(C3)] = MAX(0, MIN(255, (rgb_y + b_u##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  2.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 655M, False: 429M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 420k, False: 1.08G]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 420k, False: 428M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  398|  1.08G|	x_ptr[(ROW)*x_stride+(SIZE)+(C2)] = MAX(0, MIN(255, (rgb_y - g_uv##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  2.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 18.6M, False: 1.06G]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 859k, False: 1.08G]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 859k, False: 1.06G]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  399|  1.08G|	x_ptr[(ROW)*x_stride+(SIZE)+(C1)] = MAX(0, MIN(255, (rgb_y + r_v##UV_ROW) >> SCALEBITS_OUT));	\
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  258|  2.16G|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  |  |  Branch (258:20): [True: 654M, False: 429M]
  |  |  |  |  |  |  |  |  |  Branch (258:25): [True: 484k, False: 1.08G]
  |  |  |  |  |  |  |  |  |  Branch (258:33): [True: 484k, False: 428M]
  |  |  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  400|  1.08G|	if ((SIZE)>3) x_ptr[(ROW)*x_stride+(SIZE)+(C4)] = 0;
  |  |  |  |  |  |  ------------------
  |  |  |  |  |  |  |  Branch (400:6): [Folded - Ignored]
  |  |  |  |  |  |  ------------------
  |  |  |  |  ------------------
  |  |  ------------------
  ------------------
  115|  1.08G|			x_ptr += (PIXELS)*(SIZE);				\
  116|  1.08G|			y_ptr += (PIXELS);						\
  117|  1.08G|			u_ptr += (PIXELS)/2;					\
  118|  1.08G|			v_ptr += (PIXELS)/2;					\
  119|  1.08G|		}											\
  120|  1.88M|		x_ptr += x_dif + (VPIXELS-1)*x_stride;		\
  121|  1.88M|		y_ptr += y_dif + (VPIXELS-1)*y_stride;		\
  122|  1.88M|		u_ptr += uv_dif + ((VPIXELS/2)-1)*uv_stride;	\
  123|  1.88M|		v_ptr += uv_dif + ((VPIXELS/2)-1)*uv_stride;	\
  124|  1.88M|	}												\
  125|  1.81k|}
colorspace_init:
  528|      2|{
  529|      2|	int32_t i;
  530|       |
  531|    514|	for (i = 0; i < 256; i++) {
  ------------------
  |  Branch (531:14): [True: 512, False: 2]
  ------------------
  532|    512|		RGB_Y_tab[i] = FIX_OUT(RGB_Y_OUT) * (i - Y_ADD_OUT);
  ------------------
  |  |  331|    512|#define FIX_OUT(x)		((uint16_t) ((x) * (1L<<SCALEBITS_OUT) + 0.5))
  |  |  ------------------
  |  |  |  |  330|    512|#define SCALEBITS_OUT		13
  |  |  ------------------
  ------------------
              		RGB_Y_tab[i] = FIX_OUT(RGB_Y_OUT) * (i - Y_ADD_OUT);
  ------------------
  |  |  321|    512|#define Y_ADD_OUT		16
  ------------------
  533|    512|		B_U_tab[i] = FIX_OUT(B_U_OUT) * (i - U_ADD_OUT);
  ------------------
  |  |  331|    512|#define FIX_OUT(x)		((uint16_t) ((x) * (1L<<SCALEBITS_OUT) + 0.5))
  |  |  ------------------
  |  |  |  |  330|    512|#define SCALEBITS_OUT		13
  |  |  ------------------
  ------------------
              		B_U_tab[i] = FIX_OUT(B_U_OUT) * (i - U_ADD_OUT);
  ------------------
  |  |  325|    512|#define U_ADD_OUT		128
  ------------------
  534|    512|		G_U_tab[i] = FIX_OUT(G_U_OUT) * (i - U_ADD_OUT);
  ------------------
  |  |  331|    512|#define FIX_OUT(x)		((uint16_t) ((x) * (1L<<SCALEBITS_OUT) + 0.5))
  |  |  ------------------
  |  |  |  |  330|    512|#define SCALEBITS_OUT		13
  |  |  ------------------
  ------------------
              		G_U_tab[i] = FIX_OUT(G_U_OUT) * (i - U_ADD_OUT);
  ------------------
  |  |  325|    512|#define U_ADD_OUT		128
  ------------------
  535|    512|		G_V_tab[i] = FIX_OUT(G_V_OUT) * (i - V_ADD_OUT);
  ------------------
  |  |  331|    512|#define FIX_OUT(x)		((uint16_t) ((x) * (1L<<SCALEBITS_OUT) + 0.5))
  |  |  ------------------
  |  |  |  |  330|    512|#define SCALEBITS_OUT		13
  |  |  ------------------
  ------------------
              		G_V_tab[i] = FIX_OUT(G_V_OUT) * (i - V_ADD_OUT);
  ------------------
  |  |  328|    512|#define V_ADD_OUT		128
  ------------------
  536|    512|		R_V_tab[i] = FIX_OUT(R_V_OUT) * (i - V_ADD_OUT);
  ------------------
  |  |  331|    512|#define FIX_OUT(x)		((uint16_t) ((x) * (1L<<SCALEBITS_OUT) + 0.5))
  |  |  ------------------
  |  |  |  |  330|    512|#define SCALEBITS_OUT		13
  |  |  ------------------
  ------------------
              		R_V_tab[i] = FIX_OUT(R_V_OUT) * (i - V_ADD_OUT);
  ------------------
  |  |  328|    512|#define V_ADD_OUT		128
  ------------------
  537|    512|	}
  538|      2|}

image_printf:
  587|  4.09k|{
  588|  4.09k|	va_list args;
  589|  4.09k|	char buf[FONT_BUF_SZ];
  590|  4.09k|	int i;
  591|       |
  592|  4.09k|	va_start(args, fmt);
  593|  4.09k|	vsprintf(buf, fmt, args);
  594|  4.09k|	va_end(args);
  595|       |
  596|   130k|	for (i = 0; i < buf[i]; i++) {
  ------------------
  |  Branch (596:14): [True: 126k, False: 4.09k]
  ------------------
  597|   126k|		const char * font;
  598|       |
  599|   126k|		if (buf[i] >= '!' && buf[i] <= '@')
  ------------------
  |  Branch (599:7): [True: 111k, False: 14.6k]
  |  Branch (599:24): [True: 16.2k, False: 95.2k]
  ------------------
  600|  16.2k|			font = ascii33[buf[i]-'!'];
  601|   109k|		else if (buf[i] >= 'A' && buf[i] <= 'Z')
  ------------------
  |  Branch (601:12): [True: 95.2k, False: 14.6k]
  |  Branch (601:29): [True: 0, False: 95.2k]
  ------------------
  602|      0|			font = ascii65[buf[i]-'A'];
  603|   109k|		else if (buf[i] >= '[' && buf[i] <= '`')
  ------------------
  |  Branch (603:12): [True: 95.2k, False: 14.6k]
  |  Branch (603:29): [True: 0, False: 95.2k]
  ------------------
  604|      0|			font = ascii91[buf[i]-'['];
  605|   109k|		else if (buf[i] >= 'a' && buf[i] <= 'z')
  ------------------
  |  Branch (605:12): [True: 95.2k, False: 14.6k]
  |  Branch (605:29): [True: 95.2k, False: 0]
  ------------------
  606|  95.2k|			font = ascii65[buf[i]-'a'];
  607|  14.6k|		else
  608|  14.6k|			continue;
  609|       |
  610|   111k|		draw_num(img, edged_width, height, font, x + i*FONT_ZOOM*(FONT_WIDTH+1), y);
  ------------------
  |  |  562|   111k|#define FONT_ZOOM	4
  ------------------
              		draw_num(img, edged_width, height, font, x + i*FONT_ZOOM*(FONT_WIDTH+1), y);
  ------------------
  |  |   32|   111k|#define FONT_WIDTH	4
  ------------------
  611|   111k|	}
  612|  4.09k|}
font.c:draw_num:
  567|   111k|{
  568|   111k|	int i, j;
  569|       |
  570|  1.19M|	for (j = 0; j < FONT_ZOOM * FONT_HEIGHT && y+j < height; j++)
  ------------------
  |  |  562|  1.19M|#define FONT_ZOOM	4
  ------------------
              	for (j = 0; j < FONT_ZOOM * FONT_HEIGHT && y+j < height; j++)
  ------------------
  |  |   33|  2.39M|#define FONT_HEIGHT	6
  ------------------
  |  Branch (570:14): [True: 1.15M, False: 41.1k]
  |  Branch (570:45): [True: 1.08M, False: 70.2k]
  ------------------
  571|  8.40M|		for (i = 0; i < FONT_ZOOM * FONT_WIDTH && x+i < stride; i++)
  ------------------
  |  |  562|  8.40M|#define FONT_ZOOM	4
  ------------------
              		for (i = 0; i < FONT_ZOOM * FONT_WIDTH && x+i < stride; i++)
  ------------------
  |  |   32|  16.8M|#define FONT_WIDTH	4
  ------------------
  |  Branch (571:15): [True: 7.95M, False: 451k]
  |  Branch (571:45): [True: 7.31M, False: 636k]
  ------------------
  572|  7.31M|			if (font[(j/FONT_ZOOM)*FONT_WIDTH + (i/FONT_ZOOM)])
  ------------------
  |  |  562|  7.31M|#define FONT_ZOOM	4
  ------------------
              			if (font[(j/FONT_ZOOM)*FONT_WIDTH + (i/FONT_ZOOM)])
  ------------------
  |  |   32|  7.31M|#define FONT_WIDTH	4
  ------------------
              			if (font[(j/FONT_ZOOM)*FONT_WIDTH + (i/FONT_ZOOM)])
  ------------------
  |  |  562|  7.31M|#define FONT_ZOOM	4
  ------------------
  |  Branch (572:8): [True: 3.99M, False: 3.32M]
  ------------------
  573|  3.99M|			{
  574|  3.99M|				int offset = (y+j)*stride + (x+i);
  575|  3.99M|				int offset2 =((y+j)/2)*(stride/2) + ((x+i)/2);
  576|  3.99M|				img->y[offset] = 255;
  577|  3.99M|				img->u[offset2] = 127;
  578|  3.99M|				img->v[offset2] = 127;
  579|  3.99M|			}
  580|   111k|}

image_create:
   49|   139k|{
   50|   139k|	const uint32_t edged_width2 = edged_width / 2;
   51|   139k|	const uint32_t edged_height2 = edged_height / 2;
   52|       |
   53|   139k|	image->y =
   54|   139k|		xvid_malloc(edged_width * (edged_height + 1) + SAFETY, CACHE_LINE);
  ------------------
  |  |   41|   139k|#define SAFETY	64
  ------------------
              		xvid_malloc(edged_width * (edged_height + 1) + SAFETY, CACHE_LINE);
  ------------------
  |  |  131|   139k|#    define CACHE_LINE  64
  ------------------
   55|   139k|	if (image->y == NULL) {
  ------------------
  |  Branch (55:6): [True: 0, False: 139k]
  ------------------
   56|      0|		return -1;
   57|      0|	}
   58|   139k|	memset(image->y, 0, edged_width * (edged_height + 1) + SAFETY);
  ------------------
  |  |   41|   139k|#define SAFETY	64
  ------------------
   59|       |
   60|   139k|	image->u = xvid_malloc(edged_width2 * edged_height2 + SAFETY, CACHE_LINE);
  ------------------
  |  |   41|   139k|#define SAFETY	64
  ------------------
              	image->u = xvid_malloc(edged_width2 * edged_height2 + SAFETY, CACHE_LINE);
  ------------------
  |  |  131|   139k|#    define CACHE_LINE  64
  ------------------
   61|   139k|	if (image->u == NULL) {
  ------------------
  |  Branch (61:6): [True: 0, False: 139k]
  ------------------
   62|      0|		xvid_free(image->y);
   63|      0|		image->y = NULL;
   64|      0|		return -1;
   65|      0|	}
   66|   139k|	memset(image->u, 0, edged_width2 * edged_height2 + SAFETY);
  ------------------
  |  |   41|   139k|#define SAFETY	64
  ------------------
   67|       |
   68|   139k|	image->v = xvid_malloc(edged_width2 * edged_height2 + SAFETY, CACHE_LINE);
  ------------------
  |  |   41|   139k|#define SAFETY	64
  ------------------
              	image->v = xvid_malloc(edged_width2 * edged_height2 + SAFETY, CACHE_LINE);
  ------------------
  |  |  131|   139k|#    define CACHE_LINE  64
  ------------------
   69|   139k|	if (image->v == NULL) {
  ------------------
  |  Branch (69:6): [True: 0, False: 139k]
  ------------------
   70|      0|		xvid_free(image->u);
   71|      0|		image->u = NULL;
   72|      0|		xvid_free(image->y);
   73|      0|		image->y = NULL;
   74|      0|		return -1;
   75|      0|	}
   76|   139k|	memset(image->v, 0, edged_width2 * edged_height2 + SAFETY);
  ------------------
  |  |   41|   139k|#define SAFETY	64
  ------------------
   77|       |
   78|   139k|	image->y += EDGE_SIZE * edged_width + EDGE_SIZE;
  ------------------
  |  |   36|   139k|#define EDGE_SIZE  64
  ------------------
              	image->y += EDGE_SIZE * edged_width + EDGE_SIZE;
  ------------------
  |  |   36|   139k|#define EDGE_SIZE  64
  ------------------
   79|   139k|	image->u += EDGE_SIZE2 * edged_width2 + EDGE_SIZE2;
  ------------------
  |  |   42|   139k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   139k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
              	image->u += EDGE_SIZE2 * edged_width2 + EDGE_SIZE2;
  ------------------
  |  |   42|   139k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   139k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
   80|   139k|	image->v += EDGE_SIZE2 * edged_width2 + EDGE_SIZE2;
  ------------------
  |  |   42|   139k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   139k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
              	image->v += EDGE_SIZE2 * edged_width2 + EDGE_SIZE2;
  ------------------
  |  |   42|   139k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   139k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
   81|       |
   82|   139k|	return 0;
   83|   139k|}
image_destroy:
   91|   201k|{
   92|   201k|	const uint32_t edged_width2 = edged_width / 2;
   93|       |
   94|   201k|	if (image->y) {
  ------------------
  |  Branch (94:6): [True: 139k, False: 61.7k]
  ------------------
   95|   139k|		xvid_free(image->y - (EDGE_SIZE * edged_width + EDGE_SIZE));
  ------------------
  |  |   36|   139k|#define EDGE_SIZE  64
  ------------------
              		xvid_free(image->y - (EDGE_SIZE * edged_width + EDGE_SIZE));
  ------------------
  |  |   36|   139k|#define EDGE_SIZE  64
  ------------------
   96|   139k|		image->y = NULL;
   97|   139k|	}
   98|   201k|	if (image->u) {
  ------------------
  |  Branch (98:6): [True: 139k, False: 61.7k]
  ------------------
   99|   139k|		xvid_free(image->u - (EDGE_SIZE2 * edged_width2 + EDGE_SIZE2));
  ------------------
  |  |   42|   139k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   139k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
              		xvid_free(image->u - (EDGE_SIZE2 * edged_width2 + EDGE_SIZE2));
  ------------------
  |  |   42|   139k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   139k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  100|   139k|		image->u = NULL;
  101|   139k|	}
  102|   201k|	if (image->v) {
  ------------------
  |  Branch (102:6): [True: 139k, False: 61.7k]
  ------------------
  103|   139k|		xvid_free(image->v - (EDGE_SIZE2 * edged_width2 + EDGE_SIZE2));
  ------------------
  |  |   42|   139k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   139k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
              		xvid_free(image->v - (EDGE_SIZE2 * edged_width2 + EDGE_SIZE2));
  ------------------
  |  |   42|   139k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   139k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  104|   139k|		image->v = NULL;
  105|   139k|	}
  106|   201k|}
image_swap:
  112|  21.0k|{
  113|  21.0k|    SWAP(uint8_t*, image1->y, image2->y);
  ------------------
  |  |  264|  21.0k|#define SWAP(_T_,A,B)    { _T_ tmp = A; A = B; B = tmp; }
  ------------------
  114|  21.0k|    SWAP(uint8_t*, image1->u, image2->u);
  ------------------
  |  |  264|  21.0k|#define SWAP(_T_,A,B)    { _T_ tmp = A; A = B; B = tmp; }
  ------------------
  115|  21.0k|    SWAP(uint8_t*, image1->v, image2->v);
  ------------------
  |  |  264|  21.0k|#define SWAP(_T_,A,B)    { _T_ tmp = A; A = B; B = tmp; }
  ------------------
  116|  21.0k|}
image_copy:
  124|     42|{
  125|     42|	memcpy(image1->y, image2->y, edged_width * height);
  126|     42|	memcpy(image1->u, image2->u, edged_width * height / 4);
  127|     42|	memcpy(image1->v, image2->v, edged_width * height / 4);
  128|     42|}
image_setedges:
  142|  7.93k|{
  143|  7.93k|	const uint32_t edged_width2 = edged_width / 2;
  144|  7.93k|	uint32_t width2;
  145|  7.93k|	uint32_t i;
  146|  7.93k|	uint8_t *dst;
  147|  7.93k|	uint8_t *src;
  148|       |
  149|  7.93k|	dst = image->y - (EDGE_SIZE + EDGE_SIZE * edged_width);
  ------------------
  |  |   36|  7.93k|#define EDGE_SIZE  64
  ------------------
              	dst = image->y - (EDGE_SIZE + EDGE_SIZE * edged_width);
  ------------------
  |  |   36|  7.93k|#define EDGE_SIZE  64
  ------------------
  150|  7.93k|	src = image->y;
  151|       |
  152|       |	/* According to the Standard Clause 7.6.4, padding is done starting at 16
  153|       |	 * pixel width and height multiples. This was not respected in old xvids */
  154|  7.93k|	if ((bs_version >= SETEDGES_BUG_BEFORE &&
  ------------------
  |  |  131|  15.8k|#define SETEDGES_BUG_BEFORE		18
  ------------------
  |  Branch (154:7): [True: 7.73k, False: 206]
  ------------------
  155|  7.93k|		bs_version <  SETEDGES_BUG_AFTER) || 
  ------------------
  |  |  132|  7.73k|#define SETEDGES_BUG_AFTER		57
  ------------------
  |  Branch (155:3): [True: 5, False: 7.72k]
  ------------------
  156|  7.93k|		bs_version >= SETEDGES_BUG_REFIXED) {
  ------------------
  |  |  133|  7.93k|#define SETEDGES_BUG_REFIXED		63
  ------------------
  |  Branch (156:3): [True: 7.72k, False: 207]
  ------------------
  157|  7.73k|		width  = (width+15)&~15;
  158|  7.73k|		height = (height+15)&~15;
  159|  7.73k|	}
  160|       |
  161|  7.93k|	width2 = MAX(1, width/2);
  ------------------
  |  |  258|  7.93k|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (258:20): [True: 36, False: 7.90k]
  |  |  ------------------
  ------------------
  162|       |
  163|   515k|	for (i = 0; i < EDGE_SIZE; i++) {
  ------------------
  |  |   36|   515k|#define EDGE_SIZE  64
  ------------------
  |  Branch (163:14): [True: 508k, False: 7.93k]
  ------------------
  164|   508k|		memset(dst, *src, EDGE_SIZE);
  ------------------
  |  |   36|   508k|#define EDGE_SIZE  64
  ------------------
  165|   508k|		memcpy(dst + EDGE_SIZE, src, width);
  ------------------
  |  |   36|   508k|#define EDGE_SIZE  64
  ------------------
  166|   508k|		memset(dst + edged_width - EDGE_SIZE, *(src + width - 1),
  ------------------
  |  |   36|   508k|#define EDGE_SIZE  64
  ------------------
  167|   508k|			   EDGE_SIZE);
  ------------------
  |  |   36|   508k|#define EDGE_SIZE  64
  ------------------
  168|   508k|		dst += edged_width;
  169|   508k|	}
  170|       |
  171|  10.2M|	for (i = 0; i < height; i++) {
  ------------------
  |  Branch (171:14): [True: 10.2M, False: 7.93k]
  ------------------
  172|  10.2M|		memset(dst, *src, EDGE_SIZE);
  ------------------
  |  |   36|  10.2M|#define EDGE_SIZE  64
  ------------------
  173|  10.2M|		memset(dst + edged_width - EDGE_SIZE, src[width - 1], EDGE_SIZE);
  ------------------
  |  |   36|  10.2M|#define EDGE_SIZE  64
  ------------------
              		memset(dst + edged_width - EDGE_SIZE, src[width - 1], EDGE_SIZE);
  ------------------
  |  |   36|  10.2M|#define EDGE_SIZE  64
  ------------------
  174|  10.2M|		dst += edged_width;
  175|  10.2M|		src += edged_width;
  176|  10.2M|	}
  177|       |
  178|  7.93k|	src -= edged_width;
  179|   515k|	for (i = 0; i < EDGE_SIZE; i++) {
  ------------------
  |  |   36|   515k|#define EDGE_SIZE  64
  ------------------
  |  Branch (179:14): [True: 508k, False: 7.93k]
  ------------------
  180|   508k|		memset(dst, *src, EDGE_SIZE);
  ------------------
  |  |   36|   508k|#define EDGE_SIZE  64
  ------------------
  181|   508k|		memcpy(dst + EDGE_SIZE, src, width);
  ------------------
  |  |   36|   508k|#define EDGE_SIZE  64
  ------------------
  182|   508k|		memset(dst + edged_width - EDGE_SIZE, *(src + width - 1),
  ------------------
  |  |   36|   508k|#define EDGE_SIZE  64
  ------------------
  183|   508k|				   EDGE_SIZE);
  ------------------
  |  |   36|   508k|#define EDGE_SIZE  64
  ------------------
  184|   508k|		dst += edged_width;
  185|   508k|	}
  186|       |
  187|       |
  188|       |	/* U */
  189|  7.93k|	dst = image->u - (EDGE_SIZE2 + EDGE_SIZE2 * edged_width2);
  ------------------
  |  |   42|  7.93k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|  7.93k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
              	dst = image->u - (EDGE_SIZE2 + EDGE_SIZE2 * edged_width2);
  ------------------
  |  |   42|  7.93k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|  7.93k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  190|  7.93k|	src = image->u;
  191|       |
  192|   261k|	for (i = 0; i < EDGE_SIZE2; i++) {
  ------------------
  |  |   42|   261k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   261k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  |  Branch (192:14): [True: 254k, False: 7.93k]
  ------------------
  193|   254k|		memset(dst, *src, EDGE_SIZE2);
  ------------------
  |  |   42|   254k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   254k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  194|   254k|		memcpy(dst + EDGE_SIZE2, src, width2);
  ------------------
  |  |   42|   254k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   254k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  195|   254k|		memset(dst + edged_width2 - EDGE_SIZE2, *(src + width2 - 1),
  ------------------
  |  |   42|   254k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   254k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  196|   254k|			   EDGE_SIZE2);
  ------------------
  |  |   42|   254k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   254k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  197|   254k|		dst += edged_width2;
  198|   254k|	}
  199|       |
  200|  5.13M|	for (i = 0; i < height / 2; i++) {
  ------------------
  |  Branch (200:14): [True: 5.12M, False: 7.93k]
  ------------------
  201|  5.12M|		memset(dst, *src, EDGE_SIZE2);
  ------------------
  |  |   42|  5.12M|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|  5.12M|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  202|  5.12M|		memset(dst + edged_width2 - EDGE_SIZE2, src[width2 - 1], EDGE_SIZE2);
  ------------------
  |  |   42|  5.12M|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|  5.12M|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
              		memset(dst + edged_width2 - EDGE_SIZE2, src[width2 - 1], EDGE_SIZE2);
  ------------------
  |  |   42|  5.12M|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|  5.12M|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  203|  5.12M|		dst += edged_width2;
  204|  5.12M|		src += edged_width2;
  205|  5.12M|	}
  206|  7.93k|	src -= edged_width2;
  207|   261k|	for (i = 0; i < EDGE_SIZE2; i++) {
  ------------------
  |  |   42|   261k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   261k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  |  Branch (207:14): [True: 254k, False: 7.93k]
  ------------------
  208|   254k|		memset(dst, *src, EDGE_SIZE2);
  ------------------
  |  |   42|   254k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   254k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  209|   254k|		memcpy(dst + EDGE_SIZE2, src, width2);
  ------------------
  |  |   42|   254k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   254k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  210|   254k|		memset(dst + edged_width2 - EDGE_SIZE2, *(src + width2 - 1),
  ------------------
  |  |   42|   254k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   254k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  211|   254k|			   EDGE_SIZE2);
  ------------------
  |  |   42|   254k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   254k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  212|   254k|		dst += edged_width2;
  213|   254k|	}
  214|       |
  215|       |
  216|       |	/* V */
  217|  7.93k|	dst = image->v - (EDGE_SIZE2 + EDGE_SIZE2 * edged_width2);
  ------------------
  |  |   42|  7.93k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|  7.93k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
              	dst = image->v - (EDGE_SIZE2 + EDGE_SIZE2 * edged_width2);
  ------------------
  |  |   42|  7.93k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|  7.93k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  218|  7.93k|	src = image->v;
  219|       |
  220|   261k|	for (i = 0; i < EDGE_SIZE2; i++) {
  ------------------
  |  |   42|   261k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   261k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  |  Branch (220:14): [True: 254k, False: 7.93k]
  ------------------
  221|   254k|		memset(dst, *src, EDGE_SIZE2);
  ------------------
  |  |   42|   254k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   254k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  222|   254k|		memcpy(dst + EDGE_SIZE2, src, width2);
  ------------------
  |  |   42|   254k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   254k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  223|   254k|		memset(dst + edged_width2 - EDGE_SIZE2, *(src + width2 - 1),
  ------------------
  |  |   42|   254k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   254k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  224|   254k|			   EDGE_SIZE2);
  ------------------
  |  |   42|   254k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   254k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  225|   254k|		dst += edged_width2;
  226|   254k|	}
  227|       |
  228|  5.13M|	for (i = 0; i < height / 2; i++) {
  ------------------
  |  Branch (228:14): [True: 5.12M, False: 7.93k]
  ------------------
  229|  5.12M|		memset(dst, *src, EDGE_SIZE2);
  ------------------
  |  |   42|  5.12M|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|  5.12M|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  230|  5.12M|		memset(dst + edged_width2 - EDGE_SIZE2, src[width2 - 1], EDGE_SIZE2);
  ------------------
  |  |   42|  5.12M|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|  5.12M|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
              		memset(dst + edged_width2 - EDGE_SIZE2, src[width2 - 1], EDGE_SIZE2);
  ------------------
  |  |   42|  5.12M|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|  5.12M|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  231|  5.12M|		dst += edged_width2;
  232|  5.12M|		src += edged_width2;
  233|  5.12M|	}
  234|  7.93k|	src -= edged_width2;
  235|   261k|	for (i = 0; i < EDGE_SIZE2; i++) {
  ------------------
  |  |   42|   261k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   261k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  |  Branch (235:14): [True: 254k, False: 7.93k]
  ------------------
  236|   254k|		memset(dst, *src, EDGE_SIZE2);
  ------------------
  |  |   42|   254k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   254k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  237|   254k|		memcpy(dst + EDGE_SIZE2, src, width2);
  ------------------
  |  |   42|   254k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   254k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  238|   254k|		memset(dst + edged_width2 - EDGE_SIZE2, *(src + width2 - 1),
  ------------------
  |  |   42|   254k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   254k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  239|   254k|			   EDGE_SIZE2);
  ------------------
  |  |   42|   254k|#define EDGE_SIZE2  (EDGE_SIZE/2)
  |  |  ------------------
  |  |  |  |   36|   254k|#define EDGE_SIZE  64
  |  |  ------------------
  ------------------
  240|   254k|		dst += edged_width2;
  241|   254k|	}
  242|  7.93k|}
image_output:
  629|  6.05k|{
  630|  6.05k|	const int edged_width2 = edged_width/2;
  631|  6.05k|	int height2 = height/2;
  632|       |
  633|       |/*
  634|       |	if (interlacing)
  635|       |		image_printf(image, edged_width, height, 5,100, "[i]=%i,%i",width,height);
  636|       |	image_dump_yuvpgm(image, edged_width, width, height, "\\decode.pgm");
  637|       |*/
  638|       |
  639|  6.05k|	switch (csp & ~XVID_CSP_VFLIP) {
  ------------------
  |  |  127|  6.05k|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  |  Branch (639:10): [True: 0, False: 6.05k]
  ------------------
  640|      0|	case XVID_CSP_RGB555:
  ------------------
  |  |  122|      0|#define XVID_CSP_RGB555   (1<<10) /* 16-bit rgb555 packed */
  ------------------
  |  Branch (640:2): [True: 0, False: 6.05k]
  ------------------
  641|      0|		safe_packed_conv(
  642|      0|			dst[0], dst_stride[0], image->y, image->u, image->v,
  643|      0|			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
  ------------------
  |  |  127|      0|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  644|      0|			interlacing?yv12_to_rgb555i  :yv12_to_rgb555,
  ------------------
  |  Branch (644:4): [True: 0, False: 0]
  ------------------
  645|      0|			interlacing?yv12_to_rgb555i_c:yv12_to_rgb555_c, 2, interlacing);
  ------------------
  |  Branch (645:4): [True: 0, False: 0]
  ------------------
  646|      0|		return 0;
  647|       |
  648|      0|	case XVID_CSP_RGB565:
  ------------------
  |  |  123|      0|#define XVID_CSP_RGB565   (1<<11) /* 16-bit rgb565 packed */
  ------------------
  |  Branch (648:2): [True: 0, False: 6.05k]
  ------------------
  649|      0|		safe_packed_conv(
  650|      0|			dst[0], dst_stride[0], image->y, image->u, image->v,
  651|      0|			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
  ------------------
  |  |  127|      0|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  652|      0|			interlacing?yv12_to_rgb565i  :yv12_to_rgb565,
  ------------------
  |  Branch (652:4): [True: 0, False: 0]
  ------------------
  653|      0|			interlacing?yv12_to_rgb565i_c:yv12_to_rgb565_c, 2, interlacing);
  ------------------
  |  Branch (653:4): [True: 0, False: 0]
  ------------------
  654|      0|		return 0;
  655|       |
  656|  6.05k|    case XVID_CSP_BGR:
  ------------------
  |  |  121|  6.05k|#define XVID_CSP_BGR      (1<< 9) /* 24-bit bgr packed */
  ------------------
  |  Branch (656:5): [True: 6.05k, False: 0]
  ------------------
  657|  6.05k|		safe_packed_conv(
  658|  6.05k|			dst[0], dst_stride[0], image->y, image->u, image->v,
  659|  6.05k|			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
  ------------------
  |  |  127|  6.05k|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  660|  6.05k|			interlacing?yv12_to_bgri  :yv12_to_bgr,
  ------------------
  |  Branch (660:4): [True: 1.94k, False: 4.10k]
  ------------------
  661|  6.05k|			interlacing?yv12_to_bgri_c:yv12_to_bgr_c, 3, interlacing);
  ------------------
  |  Branch (661:4): [True: 1.94k, False: 4.10k]
  ------------------
  662|  6.05k|		return 0;
  663|       |
  664|      0|	case XVID_CSP_BGRA:
  ------------------
  |  |  117|      0|#define XVID_CSP_BGRA     (1<< 6) /* 32-bit bgra packed */
  ------------------
  |  Branch (664:2): [True: 0, False: 6.05k]
  ------------------
  665|      0|		safe_packed_conv(
  666|      0|			dst[0], dst_stride[0], image->y, image->u, image->v,
  667|      0|			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
  ------------------
  |  |  127|      0|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  668|      0|			interlacing?yv12_to_bgrai  :yv12_to_bgra,
  ------------------
  |  Branch (668:4): [True: 0, False: 0]
  ------------------
  669|      0|			interlacing?yv12_to_bgrai_c:yv12_to_bgra_c, 4, interlacing);
  ------------------
  |  Branch (669:4): [True: 0, False: 0]
  ------------------
  670|      0|		return 0;
  671|       |
  672|      0|	case XVID_CSP_ABGR:
  ------------------
  |  |  118|      0|#define XVID_CSP_ABGR     (1<< 7) /* 32-bit abgr packed */
  ------------------
  |  Branch (672:2): [True: 0, False: 6.05k]
  ------------------
  673|      0|		safe_packed_conv(
  674|      0|			dst[0], dst_stride[0], image->y, image->u, image->v,
  675|      0|			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
  ------------------
  |  |  127|      0|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  676|      0|			interlacing?yv12_to_abgri  :yv12_to_abgr,
  ------------------
  |  Branch (676:4): [True: 0, False: 0]
  ------------------
  677|      0|			interlacing?yv12_to_abgri_c:yv12_to_abgr_c, 4, interlacing);
  ------------------
  |  Branch (677:4): [True: 0, False: 0]
  ------------------
  678|      0|		return 0;
  679|       |
  680|      0|	case XVID_CSP_RGB:
  ------------------
  |  |  116|      0|#define XVID_CSP_RGB      (1<<16) /* 24-bit rgb packed */
  ------------------
  |  Branch (680:2): [True: 0, False: 6.05k]
  ------------------
  681|      0|		safe_packed_conv(
  682|      0|			dst[0], dst_stride[0], image->y, image->u, image->v,
  683|      0|			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
  ------------------
  |  |  127|      0|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  684|      0|			interlacing?yv12_to_rgbi  :yv12_to_rgb,
  ------------------
  |  Branch (684:4): [True: 0, False: 0]
  ------------------
  685|      0|			interlacing?yv12_to_rgbi_c:yv12_to_rgb_c, 3, interlacing);
  ------------------
  |  Branch (685:4): [True: 0, False: 0]
  ------------------
  686|      0|		return 0;
  687|       |
  688|      0|	case XVID_CSP_RGBA:
  ------------------
  |  |  119|      0|#define XVID_CSP_RGBA     (1<< 8) /* 32-bit rgba packed */
  ------------------
  |  Branch (688:2): [True: 0, False: 6.05k]
  ------------------
  689|      0|		safe_packed_conv(
  690|      0|			dst[0], dst_stride[0], image->y, image->u, image->v,
  691|      0|			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
  ------------------
  |  |  127|      0|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  692|      0|			interlacing?yv12_to_rgbai  :yv12_to_rgba,
  ------------------
  |  Branch (692:4): [True: 0, False: 0]
  ------------------
  693|      0|			interlacing?yv12_to_rgbai_c:yv12_to_rgba_c, 4, interlacing);
  ------------------
  |  Branch (693:4): [True: 0, False: 0]
  ------------------
  694|      0|		return 0;
  695|       |
  696|      0|	case XVID_CSP_ARGB:
  ------------------
  |  |  120|      0|#define XVID_CSP_ARGB     (1<<15) /* 32-bit argb packed */
  ------------------
  |  Branch (696:2): [True: 0, False: 6.05k]
  ------------------
  697|      0|		safe_packed_conv(
  698|      0|			dst[0], dst_stride[0], image->y, image->u, image->v,
  699|      0|			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
  ------------------
  |  |  127|      0|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  700|      0|			interlacing?yv12_to_argbi  :yv12_to_argb,
  ------------------
  |  Branch (700:4): [True: 0, False: 0]
  ------------------
  701|      0|			interlacing?yv12_to_argbi_c:yv12_to_argb_c, 4, interlacing);
  ------------------
  |  Branch (701:4): [True: 0, False: 0]
  ------------------
  702|      0|		return 0;
  703|       |
  704|      0|	case XVID_CSP_YUY2:
  ------------------
  |  |  113|      0|#define XVID_CSP_YUY2     (1<< 3) /* 4:2:2 packed */
  ------------------
  |  Branch (704:2): [True: 0, False: 6.05k]
  ------------------
  705|      0|		safe_packed_conv(
  706|      0|			dst[0], dst_stride[0], image->y, image->u, image->v,
  707|      0|			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
  ------------------
  |  |  127|      0|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  708|      0|			interlacing?yv12_to_yuyvi  :yv12_to_yuyv,
  ------------------
  |  Branch (708:4): [True: 0, False: 0]
  ------------------
  709|      0|			interlacing?yv12_to_yuyvi_c:yv12_to_yuyv_c, 2, interlacing);
  ------------------
  |  Branch (709:4): [True: 0, False: 0]
  ------------------
  710|      0|		return 0;
  711|       |
  712|      0|	case XVID_CSP_YVYU:		/* u,v swapped */
  ------------------
  |  |  115|      0|#define XVID_CSP_YVYU     (1<< 5) /* 4:2:2 packed */
  ------------------
  |  Branch (712:2): [True: 0, False: 6.05k]
  ------------------
  713|      0|		safe_packed_conv(
  714|      0|			dst[0], dst_stride[0], image->y, image->v, image->u,
  715|      0|			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
  ------------------
  |  |  127|      0|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  716|      0|			interlacing?yv12_to_yuyvi  :yv12_to_yuyv,
  ------------------
  |  Branch (716:4): [True: 0, False: 0]
  ------------------
  717|      0|			interlacing?yv12_to_yuyvi_c:yv12_to_yuyv_c, 2, interlacing);
  ------------------
  |  Branch (717:4): [True: 0, False: 0]
  ------------------
  718|      0|		return 0;
  719|       |
  720|      0|	case XVID_CSP_UYVY:
  ------------------
  |  |  114|      0|#define XVID_CSP_UYVY     (1<< 4) /* 4:2:2 packed */
  ------------------
  |  Branch (720:2): [True: 0, False: 6.05k]
  ------------------
  721|      0|		safe_packed_conv(
  722|      0|			dst[0], dst_stride[0], image->y, image->u, image->v,
  723|      0|			edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP),
  ------------------
  |  |  127|      0|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  724|      0|			interlacing?yv12_to_uyvyi  :yv12_to_uyvy,
  ------------------
  |  Branch (724:4): [True: 0, False: 0]
  ------------------
  725|      0|			interlacing?yv12_to_uyvyi_c:yv12_to_uyvy_c, 2, interlacing);
  ------------------
  |  Branch (725:4): [True: 0, False: 0]
  ------------------
  726|      0|		return 0;
  727|       |
  728|      0|	case XVID_CSP_I420: /* YCbCr == YUV == internal colorspace for MPEG */
  ------------------
  |  |  111|      0|#define XVID_CSP_I420     (1<< 1) /* 4:2:0 planar */
  ------------------
  |  Branch (728:2): [True: 0, False: 6.05k]
  ------------------
  729|      0|		yv12_to_yv12(dst[0], dst[0] + dst_stride[0]*height, dst[0] + dst_stride[0]*height + (dst_stride[0]/2)*height2,
  730|      0|			dst_stride[0], dst_stride[0]/2,
  731|      0|			image->y, image->u, image->v, edged_width, edged_width2,
  732|      0|			width, height, (csp & XVID_CSP_VFLIP));
  ------------------
  |  |  127|      0|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  733|      0|		return 0;
  734|       |
  735|      0|	case XVID_CSP_YV12:	/* YCrCb == YVU == U and V plane swapped */
  ------------------
  |  |  112|      0|#define XVID_CSP_YV12     (1<< 2) /* 4:2:0 planar */
  ------------------
  |  Branch (735:2): [True: 0, False: 6.05k]
  ------------------
  736|      0|		yv12_to_yv12(dst[0], dst[0] + dst_stride[0]*height, dst[0] + dst_stride[0]*height + (dst_stride[0]/2)*height2,
  737|      0|			dst_stride[0], dst_stride[0]/2,
  738|      0|			image->y, image->v, image->u, edged_width, edged_width2,
  739|      0|			width, height, (csp & XVID_CSP_VFLIP));
  ------------------
  |  |  127|      0|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  740|      0|		return 0;
  741|       |
  742|      0|	case XVID_CSP_PLANAR:  /* YCbCr with arbitrary pointers and different strides for Y and UV */
  ------------------
  |  |  109|      0|#define XVID_CSP_PLANAR   (1<< 0) /* 4:2:0 planar (==I420, except for pointers/strides) */
  ------------------
  |  Branch (742:2): [True: 0, False: 6.05k]
  ------------------
  743|      0|		yv12_to_yv12(dst[0], dst[1], dst[2],
  744|      0|			dst_stride[0], dst_stride[1],	/* v: dst_stride[2] not yet supported */
  745|      0|			image->y, image->u, image->v, edged_width, edged_width2,
  746|      0|			width, height, (csp & XVID_CSP_VFLIP));
  ------------------
  |  |  127|      0|#define XVID_CSP_VFLIP    (1<<31) /* vertical flip mask */
  ------------------
  747|      0|		return 0;
  748|       |
  749|      0|	case XVID_CSP_INTERNAL :
  ------------------
  |  |  125|      0|#define XVID_CSP_INTERNAL (1<<13) /* decoder only: 4:2:0 planar, returns ptrs to internal buffers */
  ------------------
  |  Branch (749:2): [True: 0, False: 6.05k]
  ------------------
  750|      0|		dst[0] = image->y;
  751|      0|		dst[1] = image->u;
  752|      0|		dst[2] = image->v;
  753|      0|		dst_stride[0] = edged_width;
  754|      0|		dst_stride[1] = edged_width/2;
  755|      0|		dst_stride[2] = edged_width/2;
  756|      0|		return 0;
  757|       |
  758|      0|	case XVID_CSP_NULL:
  ------------------
  |  |  126|      0|#define XVID_CSP_NULL     (1<<14) /* decoder only: dont output anything */
  ------------------
  |  Branch (758:2): [True: 0, False: 6.05k]
  ------------------
  759|      0|	case XVID_CSP_SLICE:
  ------------------
  |  |  124|      0|#define XVID_CSP_SLICE    (1<<12) /* decoder only: 4:2:0 planar, per slice rendering */
  ------------------
  |  Branch (759:2): [True: 0, False: 6.05k]
  ------------------
  760|      0|		return 0;
  761|       |
  762|  6.05k|	}
  763|       |
  764|      0|	return -1;
  765|  6.05k|}
image.c:safe_packed_conv:
  394|  6.05k|{
  395|  6.05k|	int width_opt, width_c, height_opt;
  396|       |
  397|  6.05k|    if (width<0 || width==1 || height==1) return; /* forget about it */
  ------------------
  |  Branch (397:9): [True: 0, False: 6.05k]
  |  Branch (397:20): [True: 231, False: 5.82k]
  |  Branch (397:32): [True: 150, False: 5.67k]
  ------------------
  398|       |
  399|  5.67k|	if (func_opt != func_c && x_stride < size*((width+15)/16)*16)
  ------------------
  |  Branch (399:6): [True: 0, False: 5.67k]
  |  Branch (399:28): [True: 0, False: 0]
  ------------------
  400|      0|	{
  401|      0|		width_opt = width & (~15);
  402|      0|		width_c = (width - width_opt) & (~1);
  403|      0|	}
  404|  5.67k|	else if (func_opt != func_c && !(width&1) && (size==3))
  ------------------
  |  Branch (404:11): [True: 0, False: 5.67k]
  |  Branch (404:33): [True: 0, False: 0]
  |  Branch (404:47): [True: 0, False: 0]
  ------------------
  405|      0|	{
  406|       |        /* MMX reads 4 bytes per pixel for RGB/BGR */
  407|      0|        width_opt = width - 2;
  408|      0|        width_c = 2;
  409|      0|    }
  410|  5.67k|    else {
  411|       |        /* Enforce the width to be divisable by two. */
  412|  5.67k|		width_opt = width & (~1);
  413|  5.67k|		width_c = 0;
  414|  5.67k|	}
  415|       |
  416|       |    /* packed conversions require height to be divisable by 2
  417|       |       (or even by 4 for interlaced conversion) */
  418|  5.67k|       if (interlacing)
  ------------------
  |  Branch (418:12): [True: 1.81k, False: 3.85k]
  ------------------
  419|  1.81k|               height_opt = height & (~3);
  420|  3.85k|       else
  421|  3.85k|               height_opt = height & (~1);
  422|       |
  423|  5.67k|	func_opt(x_ptr, x_stride,
  424|  5.67k|			y_ptr, u_ptr, v_ptr, y_stride, uv_stride,
  425|  5.67k|			width_opt, height_opt, vflip);
  426|       |
  427|  5.67k|	if (width_c)
  ------------------
  |  Branch (427:6): [True: 0, False: 5.67k]
  ------------------
  428|      0|	{
  429|      0|		func_c(x_ptr + size*width_opt, x_stride,
  430|      0|			y_ptr + width_opt, u_ptr + width_opt/2, v_ptr + width_opt/2,
  431|      0|			y_stride, uv_stride, width_c, height_opt, vflip);
  432|      0|	}
  433|  5.67k|}

decoder.c:image_null:
   43|   201k|{
   44|   201k|	image->y = image->u = image->v = NULL;
   45|   201k|}

interpolate8x8_avg2_c:
   61|  2.99M|{
   62|  2.99M|    uint32_t i;
   63|  2.99M|	const int32_t round = 1 - rounding;
   64|       |
   65|  26.9M|    for(i = 0; i < height; i++) {
  ------------------
  |  Branch (65:16): [True: 23.9M, False: 2.99M]
  ------------------
   66|  23.9M|        dst[0] = (src1[0] + src2[0] + round) >> 1;
   67|  23.9M|        dst[1] = (src1[1] + src2[1] + round) >> 1;
   68|  23.9M|        dst[2] = (src1[2] + src2[2] + round) >> 1;
   69|  23.9M|        dst[3] = (src1[3] + src2[3] + round) >> 1;
   70|  23.9M|        dst[4] = (src1[4] + src2[4] + round) >> 1;
   71|  23.9M|        dst[5] = (src1[5] + src2[5] + round) >> 1;
   72|  23.9M|        dst[6] = (src1[6] + src2[6] + round) >> 1;
   73|  23.9M|        dst[7] = (src1[7] + src2[7] + round) >> 1;
   74|       |
   75|  23.9M|        dst += stride;
   76|  23.9M|        src1 += stride;
   77|  23.9M|        src2 += stride;
   78|  23.9M|    }
   79|  2.99M|}
interpolate8x8_halfpel_add_c:
   83|  2.99M|{
   84|  2.99M|	interpolate8x8_avg2_c(dst, dst, src, stride, 0, 8);
   85|  2.99M|}
interpolate8x8_halfpel_h_c:
  117|   284k|{
  118|   284k|	uintptr_t j;
  ------------------
  |  |  138|   284k|#        define uintptr_t uint64_t
  ------------------
  119|       |
  120|   284k|	if (rounding) {
  ------------------
  |  Branch (120:6): [True: 29.8k, False: 254k]
  ------------------
  121|   268k|		for (j = 0; j < 8*stride; j+=stride) {
  ------------------
  |  Branch (121:15): [True: 238k, False: 29.8k]
  ------------------
  122|   238k|				dst[j + 0] = (uint8_t)((src[j + 0] + src[j + 1] )>>1);
  123|   238k|				dst[j + 1] = (uint8_t)((src[j + 1] + src[j + 2] )>>1);
  124|   238k|				dst[j + 2] = (uint8_t)((src[j + 2] + src[j + 3] )>>1);
  125|   238k|				dst[j + 3] = (uint8_t)((src[j + 3] + src[j + 4] )>>1);
  126|   238k|				dst[j + 4] = (uint8_t)((src[j + 4] + src[j + 5] )>>1);
  127|   238k|				dst[j + 5] = (uint8_t)((src[j + 5] + src[j + 6] )>>1);
  128|   238k|				dst[j + 6] = (uint8_t)((src[j + 6] + src[j + 7] )>>1);
  129|   238k|				dst[j + 7] = (uint8_t)((src[j + 7] + src[j + 8] )>>1);
  130|   238k|		}
  131|   254k|	} else {
  132|  2.29M|		for (j = 0; j < 8*stride; j+=stride) {
  ------------------
  |  Branch (132:15): [True: 2.03M, False: 254k]
  ------------------
  133|  2.03M|				dst[j + 0] = (uint8_t)((src[j + 0] + src[j + 1] + 1)>>1);
  134|  2.03M|				dst[j + 1] = (uint8_t)((src[j + 1] + src[j + 2] + 1)>>1);
  135|  2.03M|				dst[j + 2] = (uint8_t)((src[j + 2] + src[j + 3] + 1)>>1);
  136|  2.03M|				dst[j + 3] = (uint8_t)((src[j + 3] + src[j + 4] + 1)>>1);
  137|  2.03M|				dst[j + 4] = (uint8_t)((src[j + 4] + src[j + 5] + 1)>>1);
  138|  2.03M|				dst[j + 5] = (uint8_t)((src[j + 5] + src[j + 6] + 1)>>1);
  139|  2.03M|				dst[j + 6] = (uint8_t)((src[j + 6] + src[j + 7] + 1)>>1);
  140|  2.03M|				dst[j + 7] = (uint8_t)((src[j + 7] + src[j + 8] + 1)>>1);
  141|  2.03M|		}
  142|   254k|	}
  143|   284k|}
interpolate8x4_halfpel_h_c:
  152|  16.1k|{
  153|  16.1k|	uintptr_t j;
  ------------------
  |  |  138|  16.1k|#        define uintptr_t uint64_t
  ------------------
  154|       |
  155|  16.1k|	if (rounding) {
  ------------------
  |  Branch (155:6): [True: 9.36k, False: 6.83k]
  ------------------
  156|  46.8k|		for (j = 0; j < 4*stride; j+=stride) {
  ------------------
  |  Branch (156:15): [True: 37.4k, False: 9.36k]
  ------------------
  157|  37.4k|				dst[j + 0] = (uint8_t)((src[j + 0] + src[j + 1] )>>1);
  158|  37.4k|				dst[j + 1] = (uint8_t)((src[j + 1] + src[j + 2] )>>1);
  159|  37.4k|				dst[j + 2] = (uint8_t)((src[j + 2] + src[j + 3] )>>1);
  160|  37.4k|				dst[j + 3] = (uint8_t)((src[j + 3] + src[j + 4] )>>1);
  161|  37.4k|				dst[j + 4] = (uint8_t)((src[j + 4] + src[j + 5] )>>1);
  162|  37.4k|				dst[j + 5] = (uint8_t)((src[j + 5] + src[j + 6] )>>1);
  163|  37.4k|				dst[j + 6] = (uint8_t)((src[j + 6] + src[j + 7] )>>1);
  164|  37.4k|				dst[j + 7] = (uint8_t)((src[j + 7] + src[j + 8] )>>1);
  165|  37.4k|		}
  166|  9.36k|	} else {
  167|  34.1k|		for (j = 0; j < 4*stride; j+=stride) {
  ------------------
  |  Branch (167:15): [True: 27.3k, False: 6.83k]
  ------------------
  168|  27.3k|				dst[j + 0] = (uint8_t)((src[j + 0] + src[j + 1] + 1)>>1);
  169|  27.3k|				dst[j + 1] = (uint8_t)((src[j + 1] + src[j + 2] + 1)>>1);
  170|  27.3k|				dst[j + 2] = (uint8_t)((src[j + 2] + src[j + 3] + 1)>>1);
  171|  27.3k|				dst[j + 3] = (uint8_t)((src[j + 3] + src[j + 4] + 1)>>1);
  172|  27.3k|				dst[j + 4] = (uint8_t)((src[j + 4] + src[j + 5] + 1)>>1);
  173|  27.3k|				dst[j + 5] = (uint8_t)((src[j + 5] + src[j + 6] + 1)>>1);
  174|  27.3k|				dst[j + 6] = (uint8_t)((src[j + 6] + src[j + 7] + 1)>>1);
  175|  27.3k|				dst[j + 7] = (uint8_t)((src[j + 7] + src[j + 8] + 1)>>1);
  176|  27.3k|		}
  177|  6.83k|	}
  178|  16.1k|}
interpolate8x8_halfpel_h_add_c:
  187|   117k|{
  188|   117k|	uintptr_t j;
  ------------------
  |  |  138|   117k|#        define uintptr_t uint64_t
  ------------------
  189|       |
  190|   117k|	if (rounding) {
  ------------------
  |  Branch (190:6): [True: 0, False: 117k]
  ------------------
  191|      0|		for (j = 0; j < 8*stride; j+=stride) {
  ------------------
  |  Branch (191:15): [True: 0, False: 0]
  ------------------
  192|      0|				dst[j + 0] = (uint8_t)((((src[j + 0] + src[j + 1] )>>1) + dst[j+0] + 1)>>1);
  193|      0|				dst[j + 1] = (uint8_t)((((src[j + 1] + src[j + 2] )>>1) + dst[j+1] + 1)>>1);
  194|      0|				dst[j + 2] = (uint8_t)((((src[j + 2] + src[j + 3] )>>1) + dst[j+2] + 1)>>1);
  195|      0|				dst[j + 3] = (uint8_t)((((src[j + 3] + src[j + 4] )>>1) + dst[j+3] + 1)>>1);
  196|      0|				dst[j + 4] = (uint8_t)((((src[j + 4] + src[j + 5] )>>1) + dst[j+4] + 1)>>1);
  197|      0|				dst[j + 5] = (uint8_t)((((src[j + 5] + src[j + 6] )>>1) + dst[j+5] + 1)>>1);
  198|      0|				dst[j + 6] = (uint8_t)((((src[j + 6] + src[j + 7] )>>1) + dst[j+6] + 1)>>1);
  199|      0|				dst[j + 7] = (uint8_t)((((src[j + 7] + src[j + 8] )>>1) + dst[j+7] + 1)>>1);
  200|      0|		}
  201|   117k|	} else {
  202|  1.05M|		for (j = 0; j < 8*stride; j+=stride) {
  ------------------
  |  Branch (202:15): [True: 938k, False: 117k]
  ------------------
  203|   938k|				dst[j + 0] = (uint8_t)((((src[j + 0] + src[j + 1] + 1)>>1) + dst[j+0] + 1)>>1);
  204|   938k|				dst[j + 1] = (uint8_t)((((src[j + 1] + src[j + 2] + 1)>>1) + dst[j+1] + 1)>>1);
  205|   938k|				dst[j + 2] = (uint8_t)((((src[j + 2] + src[j + 3] + 1)>>1) + dst[j+2] + 1)>>1);
  206|   938k|				dst[j + 3] = (uint8_t)((((src[j + 3] + src[j + 4] + 1)>>1) + dst[j+3] + 1)>>1);
  207|   938k|				dst[j + 4] = (uint8_t)((((src[j + 4] + src[j + 5] + 1)>>1) + dst[j+4] + 1)>>1);
  208|   938k|				dst[j + 5] = (uint8_t)((((src[j + 5] + src[j + 6] + 1)>>1) + dst[j+5] + 1)>>1);
  209|   938k|				dst[j + 6] = (uint8_t)((((src[j + 6] + src[j + 7] + 1)>>1) + dst[j+6] + 1)>>1);
  210|   938k|				dst[j + 7] = (uint8_t)((((src[j + 7] + src[j + 8] + 1)>>1) + dst[j+7] + 1)>>1);
  211|   938k|		}
  212|   117k|	}
  213|   117k|}
interpolate8x8_halfpel_v_c:
  222|   241k|{
  223|   241k|	uintptr_t j;
  ------------------
  |  |  138|   241k|#        define uintptr_t uint64_t
  ------------------
  224|       |
  225|       |
  226|   241k|	if (rounding) {
  ------------------
  |  Branch (226:6): [True: 23.3k, False: 217k]
  ------------------
  227|   210k|		for (j = 0; j < 8*stride; j+=stride) {
  ------------------
  |  Branch (227:15): [True: 186k, False: 23.3k]
  ------------------
  228|   186k|				dst[j + 0] = (uint8_t)((src[j + 0] + src[j + stride + 0] )>>1);
  229|   186k|				dst[j + 1] = (uint8_t)((src[j + 1] + src[j + stride + 1] )>>1);
  230|   186k|				dst[j + 2] = (uint8_t)((src[j + 2] + src[j + stride + 2] )>>1);
  231|   186k|				dst[j + 3] = (uint8_t)((src[j + 3] + src[j + stride + 3] )>>1);
  232|   186k|				dst[j + 4] = (uint8_t)((src[j + 4] + src[j + stride + 4] )>>1);
  233|   186k|				dst[j + 5] = (uint8_t)((src[j + 5] + src[j + stride + 5] )>>1);
  234|   186k|				dst[j + 6] = (uint8_t)((src[j + 6] + src[j + stride + 6] )>>1);
  235|   186k|				dst[j + 7] = (uint8_t)((src[j + 7] + src[j + stride + 7] )>>1);
  236|   186k|		}
  237|   217k|	} else {
  238|  1.95M|		for (j = 0; j < 8*stride; j+=stride) {
  ------------------
  |  Branch (238:15): [True: 1.74M, False: 217k]
  ------------------
  239|  1.74M|				dst[j + 0] = (uint8_t)((src[j + 0] + src[j + stride + 0] + 1)>>1);
  240|  1.74M|				dst[j + 1] = (uint8_t)((src[j + 1] + src[j + stride + 1] + 1)>>1);
  241|  1.74M|				dst[j + 2] = (uint8_t)((src[j + 2] + src[j + stride + 2] + 1)>>1);
  242|  1.74M|				dst[j + 3] = (uint8_t)((src[j + 3] + src[j + stride + 3] + 1)>>1);
  243|  1.74M|				dst[j + 4] = (uint8_t)((src[j + 4] + src[j + stride + 4] + 1)>>1);
  244|  1.74M|				dst[j + 5] = (uint8_t)((src[j + 5] + src[j + stride + 5] + 1)>>1);
  245|  1.74M|				dst[j + 6] = (uint8_t)((src[j + 6] + src[j + stride + 6] + 1)>>1);
  246|  1.74M|				dst[j + 7] = (uint8_t)((src[j + 7] + src[j + stride + 7] + 1)>>1);
  247|  1.74M|		}
  248|   217k|	}
  249|   241k|}
interpolate8x4_halfpel_v_c:
  258|  19.0k|{
  259|  19.0k|	uintptr_t j;
  ------------------
  |  |  138|  19.0k|#        define uintptr_t uint64_t
  ------------------
  260|       |
  261|       |
  262|  19.0k|	if (rounding) {
  ------------------
  |  Branch (262:6): [True: 9.55k, False: 9.48k]
  ------------------
  263|  47.7k|		for (j = 0; j < 4*stride; j+=stride) {
  ------------------
  |  Branch (263:15): [True: 38.2k, False: 9.55k]
  ------------------
  264|  38.2k|				dst[j + 0] = (uint8_t)((src[j + 0] + src[j + stride + 0] )>>1);
  265|  38.2k|				dst[j + 1] = (uint8_t)((src[j + 1] + src[j + stride + 1] )>>1);
  266|  38.2k|				dst[j + 2] = (uint8_t)((src[j + 2] + src[j + stride + 2] )>>1);
  267|  38.2k|				dst[j + 3] = (uint8_t)((src[j + 3] + src[j + stride + 3] )>>1);
  268|  38.2k|				dst[j + 4] = (uint8_t)((src[j + 4] + src[j + stride + 4] )>>1);
  269|  38.2k|				dst[j + 5] = (uint8_t)((src[j + 5] + src[j + stride + 5] )>>1);
  270|  38.2k|				dst[j + 6] = (uint8_t)((src[j + 6] + src[j + stride + 6] )>>1);
  271|  38.2k|				dst[j + 7] = (uint8_t)((src[j + 7] + src[j + stride + 7] )>>1);
  272|  38.2k|		}
  273|  9.55k|	} else {
  274|  47.4k|		for (j = 0; j < 4*stride; j+=stride) {
  ------------------
  |  Branch (274:15): [True: 37.9k, False: 9.48k]
  ------------------
  275|  37.9k|				dst[j + 0] = (uint8_t)((src[j + 0] + src[j + stride + 0] + 1)>>1);
  276|  37.9k|				dst[j + 1] = (uint8_t)((src[j + 1] + src[j + stride + 1] + 1)>>1);
  277|  37.9k|				dst[j + 2] = (uint8_t)((src[j + 2] + src[j + stride + 2] + 1)>>1);
  278|  37.9k|				dst[j + 3] = (uint8_t)((src[j + 3] + src[j + stride + 3] + 1)>>1);
  279|  37.9k|				dst[j + 4] = (uint8_t)((src[j + 4] + src[j + stride + 4] + 1)>>1);
  280|  37.9k|				dst[j + 5] = (uint8_t)((src[j + 5] + src[j + stride + 5] + 1)>>1);
  281|  37.9k|				dst[j + 6] = (uint8_t)((src[j + 6] + src[j + stride + 6] + 1)>>1);
  282|  37.9k|				dst[j + 7] = (uint8_t)((src[j + 7] + src[j + stride + 7] + 1)>>1);
  283|  37.9k|		}
  284|  9.48k|	}
  285|  19.0k|}
interpolate8x8_halfpel_v_add_c:
  294|  66.8k|{
  295|  66.8k|	uintptr_t j;
  ------------------
  |  |  138|  66.8k|#        define uintptr_t uint64_t
  ------------------
  296|       |
  297|       |
  298|  66.8k|	if (rounding) {
  ------------------
  |  Branch (298:6): [True: 0, False: 66.8k]
  ------------------
  299|      0|		for (j = 0; j < 8*stride; j+=stride) {
  ------------------
  |  Branch (299:15): [True: 0, False: 0]
  ------------------
  300|      0|				dst[j + 0] = (uint8_t)((((src[j + 0] + src[j + stride + 0] )>>1) + dst[j+0] + 1)>>1);
  301|      0|				dst[j + 1] = (uint8_t)((((src[j + 1] + src[j + stride + 1] )>>1) + dst[j+1] + 1)>>1);
  302|      0|				dst[j + 2] = (uint8_t)((((src[j + 2] + src[j + stride + 2] )>>1) + dst[j+2] + 1)>>1);
  303|      0|				dst[j + 3] = (uint8_t)((((src[j + 3] + src[j + stride + 3] )>>1) + dst[j+3] + 1)>>1);
  304|      0|				dst[j + 4] = (uint8_t)((((src[j + 4] + src[j + stride + 4] )>>1) + dst[j+4] + 1)>>1);
  305|      0|				dst[j + 5] = (uint8_t)((((src[j + 5] + src[j + stride + 5] )>>1) + dst[j+5] + 1)>>1);
  306|      0|				dst[j + 6] = (uint8_t)((((src[j + 6] + src[j + stride + 6] )>>1) + dst[j+6] + 1)>>1);
  307|      0|				dst[j + 7] = (uint8_t)((((src[j + 7] + src[j + stride + 7] )>>1) + dst[j+7] + 1)>>1);
  308|      0|		}
  309|  66.8k|	} else {
  310|   601k|		for (j = 0; j < 8*stride; j+=stride) {
  ------------------
  |  Branch (310:15): [True: 534k, False: 66.8k]
  ------------------
  311|   534k|				dst[j + 0] = (uint8_t)((((src[j + 0] + src[j + stride + 0] + 1)>>1) + dst[j+0] + 1)>>1);
  312|   534k|				dst[j + 1] = (uint8_t)((((src[j + 1] + src[j + stride + 1] + 1)>>1) + dst[j+1] + 1)>>1);
  313|   534k|				dst[j + 2] = (uint8_t)((((src[j + 2] + src[j + stride + 2] + 1)>>1) + dst[j+2] + 1)>>1);
  314|   534k|				dst[j + 3] = (uint8_t)((((src[j + 3] + src[j + stride + 3] + 1)>>1) + dst[j+3] + 1)>>1);
  315|   534k|				dst[j + 4] = (uint8_t)((((src[j + 4] + src[j + stride + 4] + 1)>>1) + dst[j+4] + 1)>>1);
  316|   534k|				dst[j + 5] = (uint8_t)((((src[j + 5] + src[j + stride + 5] + 1)>>1) + dst[j+5] + 1)>>1);
  317|   534k|				dst[j + 6] = (uint8_t)((((src[j + 6] + src[j + stride + 6] + 1)>>1) + dst[j+6] + 1)>>1);
  318|   534k|				dst[j + 7] = (uint8_t)((((src[j + 7] + src[j + stride + 7] + 1)>>1) + dst[j+7] + 1)>>1);
  319|   534k|		}
  320|  66.8k|	}
  321|  66.8k|}
interpolate8x8_halfpel_hv_c:
  330|   135k|{
  331|   135k|	uintptr_t j;
  ------------------
  |  |  138|   135k|#        define uintptr_t uint64_t
  ------------------
  332|       |
  333|   135k|	if (rounding) {
  ------------------
  |  Branch (333:6): [True: 19.1k, False: 116k]
  ------------------
  334|   172k|		for (j = 0; j < 8*stride; j+=stride) {
  ------------------
  |  Branch (334:15): [True: 153k, False: 19.1k]
  ------------------
  335|   153k|				dst[j + 0] = (uint8_t)((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +1)>>2);
  336|   153k|				dst[j + 1] = (uint8_t)((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +1)>>2);
  337|   153k|				dst[j + 2] = (uint8_t)((src[j+2] + src[j+3] + src[j+stride+2] + src[j+stride+3] +1)>>2);
  338|   153k|				dst[j + 3] = (uint8_t)((src[j+3] + src[j+4] + src[j+stride+3] + src[j+stride+4] +1)>>2);
  339|   153k|				dst[j + 4] = (uint8_t)((src[j+4] + src[j+5] + src[j+stride+4] + src[j+stride+5] +1)>>2);
  340|   153k|				dst[j + 5] = (uint8_t)((src[j+5] + src[j+6] + src[j+stride+5] + src[j+stride+6] +1)>>2);
  341|   153k|				dst[j + 6] = (uint8_t)((src[j+6] + src[j+7] + src[j+stride+6] + src[j+stride+7] +1)>>2);
  342|   153k|				dst[j + 7] = (uint8_t)((src[j+7] + src[j+8] + src[j+stride+7] + src[j+stride+8] +1)>>2);
  343|   153k|		}
  344|   116k|	} else {
  345|  1.05M|		for (j = 0; j < 8*stride; j+=stride) {
  ------------------
  |  Branch (345:15): [True: 933k, False: 116k]
  ------------------
  346|   933k|				dst[j + 0] = (uint8_t)((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +2)>>2);
  347|   933k|				dst[j + 1] = (uint8_t)((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +2)>>2);
  348|   933k|				dst[j + 2] = (uint8_t)((src[j+2] + src[j+3] + src[j+stride+2] + src[j+stride+3] +2)>>2);
  349|   933k|				dst[j + 3] = (uint8_t)((src[j+3] + src[j+4] + src[j+stride+3] + src[j+stride+4] +2)>>2);
  350|   933k|				dst[j + 4] = (uint8_t)((src[j+4] + src[j+5] + src[j+stride+4] + src[j+stride+5] +2)>>2);
  351|   933k|				dst[j + 5] = (uint8_t)((src[j+5] + src[j+6] + src[j+stride+5] + src[j+stride+6] +2)>>2);
  352|   933k|				dst[j + 6] = (uint8_t)((src[j+6] + src[j+7] + src[j+stride+6] + src[j+stride+7] +2)>>2);
  353|   933k|				dst[j + 7] = (uint8_t)((src[j+7] + src[j+8] + src[j+stride+7] + src[j+stride+8] +2)>>2);
  354|   933k|		}
  355|   116k|	}
  356|   135k|}
interpolate8x4_halfpel_hv_c:
  365|  12.4k|{
  366|  12.4k|	uintptr_t j;
  ------------------
  |  |  138|  12.4k|#        define uintptr_t uint64_t
  ------------------
  367|       |
  368|  12.4k|	if (rounding) {
  ------------------
  |  Branch (368:6): [True: 7.80k, False: 4.64k]
  ------------------
  369|  39.0k|		for (j = 0; j < 4*stride; j+=stride) {
  ------------------
  |  Branch (369:15): [True: 31.2k, False: 7.80k]
  ------------------
  370|  31.2k|				dst[j + 0] = (uint8_t)((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +1)>>2);
  371|  31.2k|				dst[j + 1] = (uint8_t)((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +1)>>2);
  372|  31.2k|				dst[j + 2] = (uint8_t)((src[j+2] + src[j+3] + src[j+stride+2] + src[j+stride+3] +1)>>2);
  373|  31.2k|				dst[j + 3] = (uint8_t)((src[j+3] + src[j+4] + src[j+stride+3] + src[j+stride+4] +1)>>2);
  374|  31.2k|				dst[j + 4] = (uint8_t)((src[j+4] + src[j+5] + src[j+stride+4] + src[j+stride+5] +1)>>2);
  375|  31.2k|				dst[j + 5] = (uint8_t)((src[j+5] + src[j+6] + src[j+stride+5] + src[j+stride+6] +1)>>2);
  376|  31.2k|				dst[j + 6] = (uint8_t)((src[j+6] + src[j+7] + src[j+stride+6] + src[j+stride+7] +1)>>2);
  377|  31.2k|				dst[j + 7] = (uint8_t)((src[j+7] + src[j+8] + src[j+stride+7] + src[j+stride+8] +1)>>2);
  378|  31.2k|		}
  379|  7.80k|	} else {
  380|  23.2k|		for (j = 0; j < 4*stride; j+=stride) {
  ------------------
  |  Branch (380:15): [True: 18.5k, False: 4.64k]
  ------------------
  381|  18.5k|				dst[j + 0] = (uint8_t)((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +2)>>2);
  382|  18.5k|				dst[j + 1] = (uint8_t)((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +2)>>2);
  383|  18.5k|				dst[j + 2] = (uint8_t)((src[j+2] + src[j+3] + src[j+stride+2] + src[j+stride+3] +2)>>2);
  384|  18.5k|				dst[j + 3] = (uint8_t)((src[j+3] + src[j+4] + src[j+stride+3] + src[j+stride+4] +2)>>2);
  385|  18.5k|				dst[j + 4] = (uint8_t)((src[j+4] + src[j+5] + src[j+stride+4] + src[j+stride+5] +2)>>2);
  386|  18.5k|				dst[j + 5] = (uint8_t)((src[j+5] + src[j+6] + src[j+stride+5] + src[j+stride+6] +2)>>2);
  387|  18.5k|				dst[j + 6] = (uint8_t)((src[j+6] + src[j+7] + src[j+stride+6] + src[j+stride+7] +2)>>2);
  388|  18.5k|				dst[j + 7] = (uint8_t)((src[j+7] + src[j+8] + src[j+stride+7] + src[j+stride+8] +2)>>2);
  389|  18.5k|		}
  390|  4.64k|	}
  391|  12.4k|}
interpolate8x8_halfpel_hv_add_c:
  400|  44.0k|{
  401|  44.0k|	uintptr_t j;
  ------------------
  |  |  138|  44.0k|#        define uintptr_t uint64_t
  ------------------
  402|       |
  403|  44.0k|	if (rounding) {
  ------------------
  |  Branch (403:6): [True: 0, False: 44.0k]
  ------------------
  404|      0|		for (j = 0; j < 8*stride; j+=stride) {
  ------------------
  |  Branch (404:15): [True: 0, False: 0]
  ------------------
  405|      0|				dst[j + 0] = (uint8_t)((((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +1)>>2) + dst[j+0])>>1);
  406|      0|				dst[j + 1] = (uint8_t)((((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +1)>>2) + dst[j+1])>>1);
  407|      0|				dst[j + 2] = (uint8_t)((((src[j+2] + src[j+3] + src[j+stride+2] + src[j+stride+3] +1)>>2) + dst[j+2])>>1);
  408|      0|				dst[j + 3] = (uint8_t)((((src[j+3] + src[j+4] + src[j+stride+3] + src[j+stride+4] +1)>>2) + dst[j+3])>>1);
  409|      0|				dst[j + 4] = (uint8_t)((((src[j+4] + src[j+5] + src[j+stride+4] + src[j+stride+5] +1)>>2) + dst[j+4])>>1);
  410|      0|				dst[j + 5] = (uint8_t)((((src[j+5] + src[j+6] + src[j+stride+5] + src[j+stride+6] +1)>>2) + dst[j+5])>>1);
  411|      0|				dst[j + 6] = (uint8_t)((((src[j+6] + src[j+7] + src[j+stride+6] + src[j+stride+7] +1)>>2) + dst[j+6])>>1);
  412|      0|				dst[j + 7] = (uint8_t)((((src[j+7] + src[j+8] + src[j+stride+7] + src[j+stride+8] +1)>>2) + dst[j+7])>>1);
  413|      0|		}
  414|  44.0k|	} else {
  415|   396k|		for (j = 0; j < 8*stride; j+=stride) {
  ------------------
  |  Branch (415:15): [True: 352k, False: 44.0k]
  ------------------
  416|   352k|				dst[j + 0] = (uint8_t)((((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +2)>>2) + dst[j+0] + 1)>>1);
  417|   352k|				dst[j + 1] = (uint8_t)((((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +2)>>2) + dst[j+1] + 1)>>1);
  418|   352k|				dst[j + 2] = (uint8_t)((((src[j+2] + src[j+3] + src[j+stride+2] + src[j+stride+3] +2)>>2) + dst[j+2] + 1)>>1);
  419|   352k|				dst[j + 3] = (uint8_t)((((src[j+3] + src[j+4] + src[j+stride+3] + src[j+stride+4] +2)>>2) + dst[j+3] + 1)>>1);
  420|   352k|				dst[j + 4] = (uint8_t)((((src[j+4] + src[j+5] + src[j+stride+4] + src[j+stride+5] +2)>>2) + dst[j+4] + 1)>>1);
  421|   352k|				dst[j + 5] = (uint8_t)((((src[j+5] + src[j+6] + src[j+stride+5] + src[j+stride+6] +2)>>2) + dst[j+5] + 1)>>1);
  422|   352k|				dst[j + 6] = (uint8_t)((((src[j+6] + src[j+7] + src[j+stride+6] + src[j+stride+7] +2)>>2) + dst[j+6] + 1)>>1);
  423|   352k|				dst[j + 7] = (uint8_t)((((src[j+7] + src[j+8] + src[j+stride+7] + src[j+stride+8] +2)>>2) + dst[j+7] + 1)>>1);
  424|   352k|		}
  425|  44.0k|	}
  426|  44.0k|}

decoder.c:interpolate16x16_switch:
  325|   302k|{
  326|   302k|	interpolate8x8_switch(cur, refn, x,   y,   dx, dy, stride, rounding);
  327|   302k|	interpolate8x8_switch(cur, refn, x+8, y,   dx, dy, stride, rounding);
  328|   302k|	interpolate8x8_switch(cur, refn, x,   y+8, dx, dy, stride, rounding);
  329|   302k|	interpolate8x8_switch(cur, refn, x+8, y+8, dx, dy, stride, rounding);
  330|   302k|}
decoder.c:interpolate8x8_switch:
  265|  4.05M|{
  266|       |
  267|  4.05M|	const uint8_t * const src = refn + (int)((y + (dy>>1)) * stride + x + (dx>>1));
  268|  4.05M|	uint8_t * const dst = cur + (int)(y * stride + x);
  269|       |
  270|  4.05M|	switch (((dx & 1) << 1) + (dy & 1))	{ /* ((dx%2)?2:0)+((dy%2)?1:0) */
  271|  3.38M|	case 0:
  ------------------
  |  Branch (271:2): [True: 3.38M, False: 661k]
  ------------------
  272|  3.38M|		transfer8x8_copy(dst, src, stride);
  273|  3.38M|		break;
  274|   241k|	case 1:
  ------------------
  |  Branch (274:2): [True: 241k, False: 3.81M]
  ------------------
  275|   241k|		interpolate8x8_halfpel_v(dst, src, stride, rounding);
  276|   241k|		break;
  277|   284k|	case 2:
  ------------------
  |  Branch (277:2): [True: 284k, False: 3.76M]
  ------------------
  278|   284k|		interpolate8x8_halfpel_h(dst, src, stride, rounding);
  279|   284k|		break;
  280|   135k|	default:
  ------------------
  |  Branch (280:2): [True: 135k, False: 3.91M]
  ------------------
  281|   135k|		interpolate8x8_halfpel_hv(dst, src, stride, rounding);
  282|   135k|		break;
  283|  4.05M|	}
  284|  4.05M|}
decoder.c:interpolate8x4_switch:
  234|  85.1k|{
  235|       |
  236|  85.1k|	const uint8_t * const src = refn + (int)((y + (dy>>1)) * stride + x + (dx>>1));
  237|  85.1k|	uint8_t * const dst = cur + (int)(y * stride + x);
  238|       |
  239|  85.1k|	switch (((dx & 1) << 1) + (dy & 1))	
  240|  85.1k|	{ /* ((dx%2)?2:0)+((dy%2)?1:0) */
  241|  37.4k|	case 0:
  ------------------
  |  Branch (241:2): [True: 37.4k, False: 47.6k]
  ------------------
  242|  37.4k|		transfer8x4_copy(dst, src, stride);
  243|  37.4k|		break;
  244|  19.0k|	case 1:
  ------------------
  |  Branch (244:2): [True: 19.0k, False: 66.0k]
  ------------------
  245|  19.0k|		interpolate8x4_halfpel_v(dst, src, stride, rounding);
  246|  19.0k|		break;
  247|  16.1k|	case 2:
  ------------------
  |  Branch (247:2): [True: 16.1k, False: 68.9k]
  ------------------
  248|  16.1k|		interpolate8x4_halfpel_h(dst, src, stride, rounding);
  249|  16.1k|		break;
  250|  12.4k|	default:
  ------------------
  |  Branch (250:2): [True: 12.4k, False: 72.6k]
  ------------------
  251|  12.4k|		interpolate8x4_halfpel_hv(dst, src, stride, rounding);
  252|  12.4k|		break;
  253|  85.1k|	}
  254|  85.1k|}
decoder.c:interpolate8x8_add_switch:
  295|  1.79M|{
  296|       |
  297|  1.79M|	const uint8_t * const src = refn + (int)((y + (dy>>1)) * stride + x + (dx>>1));
  298|  1.79M|	uint8_t * const dst = cur + (int)(y * stride + x);
  299|       |
  300|  1.79M|	switch (((dx & 1) << 1) + (dy & 1))	{ /* ((dx%2)?2:0)+((dy%2)?1:0) */
  301|  1.56M|	case 0:
  ------------------
  |  Branch (301:2): [True: 1.56M, False: 228k]
  ------------------
  302|  1.56M|		interpolate8x8_halfpel_add(dst, src, stride, rounding);
  303|  1.56M|		break;
  304|  66.8k|	case 1:
  ------------------
  |  Branch (304:2): [True: 66.8k, False: 1.72M]
  ------------------
  305|  66.8k|		interpolate8x8_halfpel_v_add(dst, src, stride, rounding);
  306|  66.8k|		break;
  307|   117k|	case 2:
  ------------------
  |  Branch (307:2): [True: 117k, False: 1.67M]
  ------------------
  308|   117k|		interpolate8x8_halfpel_h_add(dst, src, stride, rounding);
  309|   117k|		break;
  310|  44.0k|	default:
  ------------------
  |  Branch (310:2): [True: 44.0k, False: 1.74M]
  ------------------
  311|  44.0k|		interpolate8x8_halfpel_hv_add(dst, src, stride, rounding);
  312|  44.0k|		break;
  313|  1.79M|	}
  314|  1.79M|}

init_postproc:
   51|  10.2k|{
   52|  10.2k|	init_deblock(tbls);
   53|  10.2k|	init_noise(tbls);
   54|  10.2k|}
init_deblock:
  214|  10.2k|{
  215|  10.2k|	int i;
  216|       |
  217|  5.27M|	for(i = -255; i < 256; i++) {
  ------------------
  |  Branch (217:16): [True: 5.26M, False: 10.2k]
  ------------------
  218|  5.26M|		tbls->xvid_thresh_tbl[i + 255] = 0;
  219|  5.26M|		if(ABS(i) < THR1)
  ------------------
  |  |   48|  5.26M|#define ABS(X)    (((X)>0)?(X):-(X)) 
  |  |  ------------------
  |  |  |  Branch (48:20): [True: 2.62M, False: 2.63M]
  |  |  ------------------
  ------------------
              		if(ABS(i) < THR1)
  ------------------
  |  |   34|  5.26M|#define THR1 2
  ------------------
  |  Branch (219:6): [True: 30.8k, False: 5.23M]
  ------------------
  220|  30.8k|			tbls->xvid_thresh_tbl[i + 255] = 1;
  221|  5.26M|		tbls->xvid_abs_tbl[i + 255] = ABS(i);
  ------------------
  |  |   48|  5.26M|#define ABS(X)    (((X)>0)?(X):-(X)) 
  |  |  ------------------
  |  |  |  Branch (48:20): [True: 2.62M, False: 2.63M]
  |  |  ------------------
  ------------------
  222|  5.26M|	}
  223|  10.2k|}
init_noise:
  427|  10.2k|{
  428|  10.2k|	int i, j;
  429|  10.2k|	int patt[4] = { -1,0,1,0 };
  430|       |
  431|  10.2k|	emms();
  432|       |
  433|  10.2k|	srand(123457);
  434|       |
  435|  42.1M|	for(i = 0, j = 0; i < MAX_NOISE; i++, j++)
  ------------------
  |  |   37|  42.1M|#define MAX_NOISE 4096
  ------------------
  |  Branch (435:20): [True: 42.1M, False: 10.2k]
  ------------------
  436|  42.1M|	{
  437|  42.1M|		double x1, x2, w, y1, y2;
  438|       |		
  439|  54.0M|		do {
  440|  54.0M|			x1 = 2.0 * rand() / (float) RAND_MAX - 1.0;
  441|  54.0M|			x2 = 2.0 * rand() / (float) RAND_MAX - 1.0;
  442|  54.0M|			w = x1 * x1 + x2 * x2;
  443|  54.0M|		} while (w >= 1.0);
  ------------------
  |  Branch (443:12): [True: 11.8M, False: 42.1M]
  ------------------
  444|       |		
  445|  42.1M|		w = sqrt((-2.0 * log(w)) / w);
  446|  42.1M|		y1 = x1 * w;
  447|  42.1M|		y2 = x1 * w;
  448|       |
  449|  42.1M|		y1 *= STRENGTH1 / sqrt(3.0);
  ------------------
  |  |  423|  42.1M|#define STRENGTH1 12
  ------------------
  450|  42.1M|		y2 *= STRENGTH2 / sqrt(3.0);
  ------------------
  |  |  424|  42.1M|#define STRENGTH2 8
  ------------------
  451|       |
  452|  42.1M|	    y1 /= 2;
  453|  42.1M|		y2 /= 2;
  454|  42.1M|	    y1 += patt[j%4] * STRENGTH1 * 0.35;
  ------------------
  |  |  423|  42.1M|#define STRENGTH1 12
  ------------------
  455|  42.1M|		y2 += patt[j%4] * STRENGTH2 * 0.35;
  ------------------
  |  |  424|  42.1M|#define STRENGTH2 8
  ------------------
  456|       |
  457|  42.1M|		if (y1 < -128) {
  ------------------
  |  Branch (457:7): [True: 0, False: 42.1M]
  ------------------
  458|      0|			y1=-128;
  459|      0|		}
  460|  42.1M|		else if (y1 > 127) {
  ------------------
  |  Branch (460:12): [True: 0, False: 42.1M]
  ------------------
  461|      0|			y1= 127;
  462|      0|		}
  463|       |
  464|  42.1M|		if (y2 < -128) {
  ------------------
  |  Branch (464:7): [True: 0, False: 42.1M]
  ------------------
  465|      0|			y2=-128;
  466|      0|		}
  467|  42.1M|		else if (y2 > 127) {
  ------------------
  |  Branch (467:12): [True: 0, False: 42.1M]
  ------------------
  468|      0|			y2= 127;
  469|      0|		}
  470|       |
  471|  42.1M|		y1 /= 3.0;
  472|  42.1M|		y2 /= 3.0;
  473|  42.1M|		tbls->xvid_noise1[i] = (int) y1;
  474|  42.1M|		tbls->xvid_noise2[i] = (int) y2;
  475|       |	
  476|  42.1M|		if (RAND_N(6) == 0) {
  ------------------
  |  |  422|  42.1M|#define RAND_N(range) ((int) ((double)range * rand() / (RAND_MAX + 1.0)))
  ------------------
  |  Branch (476:7): [True: 7.09M, False: 35.0M]
  ------------------
  477|  7.09M|			j--;
  478|  7.09M|		}
  479|  42.1M|	}
  480|       |	
  481|  31.6M|	for (i = 0; i < MAX_RES; i++)
  ------------------
  |  |   39|  31.6M|#define MAX_RES (MAX_NOISE - MAX_SHIFT)
  |  |  ------------------
  |  |  |  |   37|  31.6M|#define MAX_NOISE 4096
  |  |  ------------------
  |  |               #define MAX_RES (MAX_NOISE - MAX_SHIFT)
  |  |  ------------------
  |  |  |  |   38|  31.6M|#define MAX_SHIFT 1024
  |  |  ------------------
  ------------------
  |  Branch (481:14): [True: 31.6M, False: 10.2k]
  ------------------
  482|   126M|		for (j = 0; j < 3; j++) {
  ------------------
  |  Branch (482:15): [True: 94.9M, False: 31.6M]
  ------------------
  483|  94.9M|			tbls->xvid_prev_shift[i][j] = tbls->xvid_noise1 + (rand() & (MAX_SHIFT - 1));
  ------------------
  |  |   38|  94.9M|#define MAX_SHIFT 1024
  ------------------
  484|  94.9M|			tbls->xvid_prev_shift[i][3 + j] = tbls->xvid_noise2 + (rand() & (MAX_SHIFT - 1));
  ------------------
  |  |   38|  94.9M|#define MAX_SHIFT 1024
  ------------------
  485|  94.9M|		}
  486|  10.2k|}

xvid_Init_QP:
  412|      2|{
  413|       |#if defined (ARCH_IS_IA32) || defined (ARCH_IS_X86_64)
  414|       |	int i;
  415|       |
  416|       |	for(i=0; i<256; ++i) {
  417|       |		xvid_Expand_mmx[i][0] = i;
  418|       |		xvid_Expand_mmx[i][1] = i;
  419|       |		xvid_Expand_mmx[i][2] = i;
  420|       |		xvid_Expand_mmx[i][3] = i;
  421|       |	}
  422|       |#endif
  423|       |
  424|       |	/* Alternate way of filtering (cf. USE_TABLES flag in qpel_mmx.asm) */
  425|       |
  426|      2|	Init_FIR_Table(xvid_FIR_1_0_0_0,   -1,  0,  0,  0);
  427|      2|	Init_FIR_Table(xvid_FIR_3_1_0_0,    3, -1,  0,  0);
  428|      2|	Init_FIR_Table(xvid_FIR_6_3_1_0,   -6,  3, -1,  0);
  429|      2|	Init_FIR_Table(xvid_FIR_14_3_2_1,  14, -3,  2, -1);
  430|      2|	Init_FIR_Table(xvid_FIR_20_6_3_1,  20, -6,  3, -1);
  431|      2|	Init_FIR_Table(xvid_FIR_20_20_6_3, 20, 20, -6,  3);
  432|      2|	Init_FIR_Table(xvid_FIR_23_19_6_3, 23, 19, -6,  3);
  433|      2|	Init_FIR_Table(xvid_FIR_7_20_20_6, -7, 20, 20, -6);
  434|      2|	Init_FIR_Table(xvid_FIR_6_20_20_6, -6, 20, 20, -6);
  435|      2|	Init_FIR_Table(xvid_FIR_6_20_20_7, -6, 20, 20, -7);
  436|      2|	Init_FIR_Table(xvid_FIR_3_6_20_20,  3, -6, 20, 20);
  437|      2|	Init_FIR_Table(xvid_FIR_3_6_19_23,  3, -6, 19, 23);
  438|      2|	Init_FIR_Table(xvid_FIR_1_3_6_20,  -1,  3, -6, 20);
  439|      2|	Init_FIR_Table(xvid_FIR_1_2_3_14,  -1,  2, -3, 14);
  440|      2|	Init_FIR_Table(xvid_FIR_0_1_3_6,    0, -1,  3, -6);
  441|      2|	Init_FIR_Table(xvid_FIR_0_0_1_3,    0,  0, -1,  3);
  442|      2|	Init_FIR_Table(xvid_FIR_0_0_0_1,    0,  0,  0, -1);
  443|       |
  444|      2|}
qpel.c:H_Pass_16_C:
  617|  27.8k|{
  618|  27.8k|#if (SIZE==16)
  619|   489k|  while(H-->0) {
  ------------------
  |  Branch (619:9): [True: 461k, False: 27.8k]
  ------------------
  620|   461k|    int C;
  621|   461k|    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  622|   461k|    CLIP_STORE(Dst[ 0],C);
  ------------------
  |  |  612|   461k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 1.64k, False: 459k]
  |  |  |  Branch (612:28): [True: 5.62k, False: 454k]
  |  |  ------------------
  |  |  613|   461k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   461k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  623|   461k|    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  624|   461k|    CLIP_STORE(Dst[ 1],C);
  ------------------
  |  |  612|   461k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.82k, False: 458k]
  |  |  |  Branch (612:28): [True: 6.42k, False: 452k]
  |  |  ------------------
  |  |  613|   461k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   461k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  625|   461k|    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  626|   461k|    CLIP_STORE(Dst[ 2],C);
  ------------------
  |  |  612|   461k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.15k, False: 459k]
  |  |  |  Branch (612:28): [True: 5.29k, False: 454k]
  |  |  ------------------
  |  |  613|   461k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   461k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  627|   461k|    C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
  628|   461k|    CLIP_STORE(Dst[ 3],C);
  ------------------
  |  |  612|   461k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 3.06k, False: 458k]
  |  |  |  Branch (612:28): [True: 5.97k, False: 452k]
  |  |  ------------------
  |  |  613|   461k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   461k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  629|   461k|    C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
  630|   461k|    CLIP_STORE(Dst[ 4],C);
  ------------------
  |  |  612|   461k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.72k, False: 458k]
  |  |  |  Branch (612:28): [True: 6.07k, False: 452k]
  |  |  ------------------
  |  |  613|   461k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   461k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  631|   461k|    C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
  632|   461k|    CLIP_STORE(Dst[ 5],C);
  ------------------
  |  |  612|   461k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.69k, False: 458k]
  |  |  |  Branch (612:28): [True: 5.60k, False: 453k]
  |  |  ------------------
  |  |  613|   461k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   461k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  633|   461k|    C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
  634|   461k|    CLIP_STORE(Dst[ 6],C);
  ------------------
  |  |  612|   461k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.49k, False: 459k]
  |  |  |  Branch (612:28): [True: 5.30k, False: 453k]
  |  |  ------------------
  |  |  613|   461k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   461k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  635|   461k|    C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
  636|   461k|    CLIP_STORE(Dst[ 7],C);
  ------------------
  |  |  612|   461k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 3.18k, False: 458k]
  |  |  |  Branch (612:28): [True: 5.97k, False: 452k]
  |  |  ------------------
  |  |  613|   461k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   461k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  637|   461k|    C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
  638|   461k|    CLIP_STORE(Dst[ 8],C);
  ------------------
  |  |  612|   461k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 3.17k, False: 458k]
  |  |  |  Branch (612:28): [True: 5.17k, False: 453k]
  |  |  ------------------
  |  |  613|   461k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   461k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  639|   461k|    C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
  640|   461k|    CLIP_STORE(Dst[ 9],C);
  ------------------
  |  |  612|   461k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 3.51k, False: 458k]
  |  |  |  Branch (612:28): [True: 5.86k, False: 452k]
  |  |  ------------------
  |  |  613|   461k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   461k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  641|   461k|    C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
  642|   461k|    CLIP_STORE(Dst[10],C);
  ------------------
  |  |  612|   461k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 3.09k, False: 458k]
  |  |  |  Branch (612:28): [True: 5.60k, False: 452k]
  |  |  ------------------
  |  |  613|   461k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   461k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  643|   461k|    C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
  644|   461k|    CLIP_STORE(Dst[11],C);
  ------------------
  |  |  612|   461k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.77k, False: 458k]
  |  |  |  Branch (612:28): [True: 5.88k, False: 452k]
  |  |  ------------------
  |  |  613|   461k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   461k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  645|   461k|    C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
  646|   461k|    CLIP_STORE(Dst[12],C);
  ------------------
  |  |  612|   461k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.79k, False: 458k]
  |  |  |  Branch (612:28): [True: 6.04k, False: 452k]
  |  |  ------------------
  |  |  613|   461k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   461k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  647|   461k|    C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
  648|   461k|    CLIP_STORE(Dst[13],C);
  ------------------
  |  |  612|   461k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 1.67k, False: 459k]
  |  |  |  Branch (612:28): [True: 5.54k, False: 454k]
  |  |  ------------------
  |  |  613|   461k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   461k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  649|   461k|    C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
  650|   461k|    CLIP_STORE(Dst[14],C);
  ------------------
  |  |  612|   461k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.66k, False: 458k]
  |  |  |  Branch (612:28): [True: 5.77k, False: 453k]
  |  |  ------------------
  |  |  613|   461k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   461k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  651|   461k|    C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
  652|   461k|    CLIP_STORE(Dst[15],C);
  ------------------
  |  |  612|   461k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 1.32k, False: 460k]
  |  |  |  Branch (612:28): [True: 5.47k, False: 454k]
  |  |  ------------------
  |  |  613|   461k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   461k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  653|   461k|    Src += BpS;
  654|   461k|    Dst += BpS;
  655|   461k|  }
  656|       |#else
  657|       |  while(H-->0) {
  658|       |    int C;
  659|       |    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  660|       |    CLIP_STORE(Dst[0],C);
  661|       |    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  662|       |    CLIP_STORE(Dst[1],C);
  663|       |    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  664|       |    CLIP_STORE(Dst[2],C);
  665|       |    C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
  666|       |    CLIP_STORE(Dst[3],C);
  667|       |    C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
  668|       |    CLIP_STORE(Dst[4],C);
  669|       |    C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];
  670|       |    CLIP_STORE(Dst[5],C);
  671|       |    C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];
  672|       |    CLIP_STORE(Dst[6],C);
  673|       |    C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];
  674|       |    CLIP_STORE(Dst[7],C);
  675|       |    Src += BpS;
  676|       |    Dst += BpS;
  677|       |  }
  678|       |#endif
  679|  27.8k|}
qpel.c:H_Pass_Avrg_16_C:
  689|  32.3k|{
  690|  32.3k|#if (SIZE==16)
  691|   569k|  while(H-->0) {
  ------------------
  |  Branch (691:9): [True: 537k, False: 32.3k]
  ------------------
  692|   537k|    int C;
  693|   537k|    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  694|   537k|    CLIP_STORE(0,C);
  ------------------
  |  |  683|   537k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 1.51k, False: 536k]
  |  |  |  Branch (683:28): [True: 5.89k, False: 530k]
  |  |  ------------------
  |  |  684|   537k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   537k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   537k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  695|   537k|    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  696|   537k|    CLIP_STORE( 1,C);
  ------------------
  |  |  683|   537k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.37k, False: 535k]
  |  |  |  Branch (683:28): [True: 5.49k, False: 529k]
  |  |  ------------------
  |  |  684|   537k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   537k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   537k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  697|   537k|    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  698|   537k|    CLIP_STORE( 2,C);
  ------------------
  |  |  683|   537k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.96k, False: 534k]
  |  |  |  Branch (683:28): [True: 4.82k, False: 529k]
  |  |  ------------------
  |  |  684|   537k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   537k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   537k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  699|   537k|    C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
  700|   537k|    CLIP_STORE( 3,C);
  ------------------
  |  |  683|   537k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.30k, False: 535k]
  |  |  |  Branch (683:28): [True: 5.02k, False: 530k]
  |  |  ------------------
  |  |  684|   537k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   537k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   537k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  701|   537k|    C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
  702|   537k|    CLIP_STORE( 4,C);
  ------------------
  |  |  683|   537k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 3.26k, False: 534k]
  |  |  |  Branch (683:28): [True: 5.13k, False: 529k]
  |  |  ------------------
  |  |  684|   537k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   537k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   537k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  703|   537k|    C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
  704|   537k|    CLIP_STORE( 5,C);
  ------------------
  |  |  683|   537k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.25k, False: 535k]
  |  |  |  Branch (683:28): [True: 4.60k, False: 530k]
  |  |  ------------------
  |  |  684|   537k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   537k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   537k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  705|   537k|    C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
  706|   537k|    CLIP_STORE( 6,C);
  ------------------
  |  |  683|   537k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 3.13k, False: 534k]
  |  |  |  Branch (683:28): [True: 5.34k, False: 529k]
  |  |  ------------------
  |  |  684|   537k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   537k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   537k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  707|   537k|    C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
  708|   537k|    CLIP_STORE( 7,C);
  ------------------
  |  |  683|   537k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.73k, False: 534k]
  |  |  |  Branch (683:28): [True: 4.53k, False: 530k]
  |  |  ------------------
  |  |  684|   537k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   537k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   537k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  709|   537k|    C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
  710|   537k|    CLIP_STORE( 8,C);
  ------------------
  |  |  683|   537k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 4.09k, False: 533k]
  |  |  |  Branch (683:28): [True: 5.75k, False: 527k]
  |  |  ------------------
  |  |  684|   537k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   537k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   537k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  711|   537k|    C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
  712|   537k|    CLIP_STORE( 9,C);
  ------------------
  |  |  683|   537k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.99k, False: 534k]
  |  |  |  Branch (683:28): [True: 4.58k, False: 530k]
  |  |  ------------------
  |  |  684|   537k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   537k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   537k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  713|   537k|    C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
  714|   537k|    CLIP_STORE(10,C);
  ------------------
  |  |  683|   537k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 3.79k, False: 533k]
  |  |  |  Branch (683:28): [True: 5.64k, False: 528k]
  |  |  ------------------
  |  |  684|   537k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   537k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   537k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  715|   537k|    C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
  716|   537k|    CLIP_STORE(11,C);
  ------------------
  |  |  683|   537k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.53k, False: 535k]
  |  |  |  Branch (683:28): [True: 5.33k, False: 529k]
  |  |  ------------------
  |  |  684|   537k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   537k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   537k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  717|   537k|    C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
  718|   537k|    CLIP_STORE(12,C);
  ------------------
  |  |  683|   537k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 4.49k, False: 533k]
  |  |  |  Branch (683:28): [True: 5.54k, False: 527k]
  |  |  ------------------
  |  |  684|   537k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   537k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   537k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  719|   537k|    C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
  720|   537k|    CLIP_STORE(13,C);
  ------------------
  |  |  683|   537k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 1.73k, False: 535k]
  |  |  |  Branch (683:28): [True: 3.70k, False: 532k]
  |  |  ------------------
  |  |  684|   537k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   537k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   537k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  721|   537k|    C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
  722|   537k|    CLIP_STORE(14,C);
  ------------------
  |  |  683|   537k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 4.31k, False: 533k]
  |  |  |  Branch (683:28): [True: 5.48k, False: 527k]
  |  |  ------------------
  |  |  684|   537k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   537k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   537k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  723|   537k|    C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
  724|   537k|    CLIP_STORE(15,C);
  ------------------
  |  |  683|   537k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 1.46k, False: 536k]
  |  |  |  Branch (683:28): [True: 3.67k, False: 532k]
  |  |  ------------------
  |  |  684|   537k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   537k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   537k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  725|   537k|    Src += BpS;
  726|   537k|    Dst += BpS;
  727|   537k|  }
  728|       |#else
  729|       |  while(H-->0) {
  730|       |    int C;
  731|       |    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  732|       |    CLIP_STORE(0,C);
  733|       |    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  734|       |    CLIP_STORE(1,C);
  735|       |    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  736|       |    CLIP_STORE(2,C);
  737|       |    C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
  738|       |    CLIP_STORE(3,C);
  739|       |    C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
  740|       |    CLIP_STORE(4,C);
  741|       |    C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];
  742|       |    CLIP_STORE(5,C);
  743|       |    C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];
  744|       |    CLIP_STORE(6,C);
  745|       |    C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];
  746|       |    CLIP_STORE(7,C);
  747|       |    Src += BpS;
  748|       |    Dst += BpS;
  749|       |  }
  750|       |#endif
  751|  32.3k|}
qpel.c:H_Pass_Avrg_Up_16_C:
  761|  38.2k|{
  762|  38.2k|#if (SIZE==16)
  763|   671k|  while(H-->0) {
  ------------------
  |  Branch (763:9): [True: 632k, False: 38.2k]
  ------------------
  764|   632k|    int C;
  765|   632k|    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  766|   632k|    CLIP_STORE(0,C);
  ------------------
  |  |  755|   632k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 1.81k, False: 631k]
  |  |  |  Branch (755:28): [True: 5.12k, False: 625k]
  |  |  ------------------
  |  |  756|   632k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   632k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   632k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  767|   632k|    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  768|   632k|    CLIP_STORE( 1,C);
  ------------------
  |  |  755|   632k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 4.36k, False: 628k]
  |  |  |  Branch (755:28): [True: 8.84k, False: 619k]
  |  |  ------------------
  |  |  756|   632k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   632k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   632k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  769|   632k|    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  770|   632k|    CLIP_STORE( 2,C);
  ------------------
  |  |  755|   632k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.11k, False: 630k]
  |  |  |  Branch (755:28): [True: 5.43k, False: 625k]
  |  |  ------------------
  |  |  756|   632k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   632k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   632k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  771|   632k|    C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
  772|   632k|    CLIP_STORE( 3,C);
  ------------------
  |  |  755|   632k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 5.66k, False: 627k]
  |  |  |  Branch (755:28): [True: 7.17k, False: 620k]
  |  |  ------------------
  |  |  756|   632k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   632k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   632k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  773|   632k|    C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
  774|   632k|    CLIP_STORE( 4,C);
  ------------------
  |  |  755|   632k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.31k, False: 630k]
  |  |  |  Branch (755:28): [True: 7.48k, False: 623k]
  |  |  ------------------
  |  |  756|   632k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   632k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   632k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  775|   632k|    C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
  776|   632k|    CLIP_STORE( 5,C);
  ------------------
  |  |  755|   632k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 4.03k, False: 628k]
  |  |  |  Branch (755:28): [True: 7.48k, False: 621k]
  |  |  ------------------
  |  |  756|   632k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   632k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   632k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  777|   632k|    C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
  778|   632k|    CLIP_STORE( 6,C);
  ------------------
  |  |  755|   632k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.37k, False: 630k]
  |  |  |  Branch (755:28): [True: 6.22k, False: 624k]
  |  |  ------------------
  |  |  756|   632k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   632k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   632k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  779|   632k|    C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
  780|   632k|    CLIP_STORE( 7,C);
  ------------------
  |  |  755|   632k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 3.69k, False: 629k]
  |  |  |  Branch (755:28): [True: 7.83k, False: 621k]
  |  |  ------------------
  |  |  756|   632k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   632k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   632k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  781|   632k|    C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
  782|   632k|    CLIP_STORE( 8,C);
  ------------------
  |  |  755|   632k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.63k, False: 630k]
  |  |  |  Branch (755:28): [True: 6.40k, False: 623k]
  |  |  ------------------
  |  |  756|   632k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   632k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   632k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  783|   632k|    C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
  784|   632k|    CLIP_STORE( 9,C);
  ------------------
  |  |  755|   632k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 5.73k, False: 627k]
  |  |  |  Branch (755:28): [True: 8.74k, False: 618k]
  |  |  ------------------
  |  |  756|   632k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   632k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   632k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  785|   632k|    C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
  786|   632k|    CLIP_STORE(10,C);
  ------------------
  |  |  755|   632k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.76k, False: 630k]
  |  |  |  Branch (755:28): [True: 6.57k, False: 623k]
  |  |  ------------------
  |  |  756|   632k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   632k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   632k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  787|   632k|    C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
  788|   632k|    CLIP_STORE(11,C);
  ------------------
  |  |  755|   632k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 5.27k, False: 627k]
  |  |  |  Branch (755:28): [True: 8.57k, False: 618k]
  |  |  ------------------
  |  |  756|   632k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   632k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   632k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  789|   632k|    C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
  790|   632k|    CLIP_STORE(12,C);
  ------------------
  |  |  755|   632k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.49k, False: 630k]
  |  |  |  Branch (755:28): [True: 8.29k, False: 622k]
  |  |  ------------------
  |  |  756|   632k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   632k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   632k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  791|   632k|    C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
  792|   632k|    CLIP_STORE(13,C);
  ------------------
  |  |  755|   632k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.35k, False: 630k]
  |  |  |  Branch (755:28): [True: 8.46k, False: 622k]
  |  |  ------------------
  |  |  756|   632k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   632k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   632k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  793|   632k|    C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
  794|   632k|    CLIP_STORE(14,C);
  ------------------
  |  |  755|   632k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.12k, False: 630k]
  |  |  |  Branch (755:28): [True: 8.60k, False: 622k]
  |  |  ------------------
  |  |  756|   632k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   632k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   632k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  795|   632k|    C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
  796|   632k|    CLIP_STORE(15,C);
  ------------------
  |  |  755|   632k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 1.70k, False: 631k]
  |  |  |  Branch (755:28): [True: 8.24k, False: 622k]
  |  |  ------------------
  |  |  756|   632k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   632k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  145|   632k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  797|   632k|    Src += BpS;
  798|   632k|    Dst += BpS;
  799|   632k|  }
  800|       |#else
  801|       |  while(H-->0) {
  802|       |    int C;
  803|       |    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  804|       |    CLIP_STORE(0,C);
  805|       |    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  806|       |    CLIP_STORE(1,C);
  807|       |    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  808|       |    CLIP_STORE(2,C);
  809|       |    C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
  810|       |    CLIP_STORE(3,C);
  811|       |    C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
  812|       |    CLIP_STORE(4,C);
  813|       |    C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];
  814|       |    CLIP_STORE(5,C);
  815|       |    C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];
  816|       |    CLIP_STORE(6,C);
  817|       |    C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];
  818|       |    CLIP_STORE(7,C);
  819|       |    Src += BpS;
  820|       |    Dst += BpS;
  821|       |  }
  822|       |#endif
  823|  38.2k|}
qpel.c:V_Pass_16_C:
  837|  24.1k|{
  838|  24.1k|#if (SIZE==16)
  839|   410k|  while(H-->0) {
  ------------------
  |  Branch (839:9): [True: 386k, False: 24.1k]
  ------------------
  840|   386k|    int C;
  841|   386k|    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  842|   386k|    CLIP_STORE(Dst[BpS* 0],C);
  ------------------
  |  |  832|   386k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 951, False: 385k]
  |  |  |  Branch (832:28): [True: 2.32k, False: 382k]
  |  |  ------------------
  |  |  833|   386k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   386k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  843|   386k|    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  844|   386k|    CLIP_STORE(Dst[BpS* 1],C);
  ------------------
  |  |  832|   386k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.62k, False: 383k]
  |  |  |  Branch (832:28): [True: 2.32k, False: 381k]
  |  |  ------------------
  |  |  833|   386k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   386k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  845|   386k|    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  846|   386k|    CLIP_STORE(Dst[BpS* 2],C);
  ------------------
  |  |  832|   386k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 1.42k, False: 384k]
  |  |  |  Branch (832:28): [True: 2.11k, False: 382k]
  |  |  ------------------
  |  |  833|   386k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   386k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  847|   386k|    C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]);
  848|   386k|    CLIP_STORE(Dst[BpS* 3],C);
  ------------------
  |  |  832|   386k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 3.05k, False: 383k]
  |  |  |  Branch (832:28): [True: 2.42k, False: 380k]
  |  |  ------------------
  |  |  833|   386k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   386k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  849|   386k|    C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]);
  850|   386k|    CLIP_STORE(Dst[BpS* 4],C);
  ------------------
  |  |  832|   386k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 1.69k, False: 384k]
  |  |  |  Branch (832:28): [True: 2.67k, False: 381k]
  |  |  ------------------
  |  |  833|   386k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   386k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  851|   386k|    C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]);
  852|   386k|    CLIP_STORE(Dst[BpS* 5],C);
  ------------------
  |  |  832|   386k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 1.91k, False: 384k]
  |  |  |  Branch (832:28): [True: 2.49k, False: 381k]
  |  |  ------------------
  |  |  833|   386k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   386k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  853|   386k|    C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]);
  854|   386k|    CLIP_STORE(Dst[BpS* 6],C);
  ------------------
  |  |  832|   386k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 1.81k, False: 384k]
  |  |  |  Branch (832:28): [True: 2.70k, False: 381k]
  |  |  ------------------
  |  |  833|   386k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   386k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  855|   386k|    C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]);
  856|   386k|    CLIP_STORE(Dst[BpS* 7],C);
  ------------------
  |  |  832|   386k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 1.44k, False: 384k]
  |  |  |  Branch (832:28): [True: 2.74k, False: 381k]
  |  |  ------------------
  |  |  833|   386k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   386k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  857|   386k|    C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]);
  858|   386k|    CLIP_STORE(Dst[BpS* 8],C);
  ------------------
  |  |  832|   386k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 1.59k, False: 384k]
  |  |  |  Branch (832:28): [True: 3.48k, False: 381k]
  |  |  ------------------
  |  |  833|   386k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   386k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  859|   386k|    C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]);
  860|   386k|    CLIP_STORE(Dst[BpS* 9],C);
  ------------------
  |  |  832|   386k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 1.99k, False: 384k]
  |  |  |  Branch (832:28): [True: 2.69k, False: 381k]
  |  |  ------------------
  |  |  833|   386k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   386k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  861|   386k|    C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]);
  862|   386k|    CLIP_STORE(Dst[BpS*10],C);
  ------------------
  |  |  832|   386k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.31k, False: 383k]
  |  |  |  Branch (832:28): [True: 2.58k, False: 381k]
  |  |  ------------------
  |  |  833|   386k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   386k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  863|   386k|    C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]);
  864|   386k|    CLIP_STORE(Dst[BpS*11],C);
  ------------------
  |  |  832|   386k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 1.80k, False: 384k]
  |  |  |  Branch (832:28): [True: 3.14k, False: 381k]
  |  |  ------------------
  |  |  833|   386k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   386k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  865|   386k|    C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]);
  866|   386k|    CLIP_STORE(Dst[BpS*12],C);
  ------------------
  |  |  832|   386k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 1.70k, False: 384k]
  |  |  |  Branch (832:28): [True: 3.09k, False: 381k]
  |  |  ------------------
  |  |  833|   386k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   386k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  867|   386k|    C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16];
  868|   386k|    CLIP_STORE(Dst[BpS*13],C);
  ------------------
  |  |  832|   386k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.04k, False: 384k]
  |  |  |  Branch (832:28): [True: 2.36k, False: 381k]
  |  |  ------------------
  |  |  833|   386k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   386k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  869|   386k|    C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15];
  870|   386k|    CLIP_STORE(Dst[BpS*14],C);
  ------------------
  |  |  832|   386k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 1.87k, False: 384k]
  |  |  |  Branch (832:28): [True: 2.74k, False: 381k]
  |  |  ------------------
  |  |  833|   386k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   386k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  871|   386k|    C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16];
  872|   386k|    CLIP_STORE(Dst[BpS*15],C);
  ------------------
  |  |  832|   386k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.77k, False: 383k]
  |  |  |  Branch (832:28): [True: 2.21k, False: 381k]
  |  |  ------------------
  |  |  833|   386k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  145|   386k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  873|   386k|    Src += 1;
  874|   386k|    Dst += 1;
  875|   386k|  }
  876|       |#else
  877|       |  while(H-->0) {
  878|       |    int C;
  879|       |    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  880|       |    CLIP_STORE(Dst[BpS*0],C);
  881|       |    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  882|       |    CLIP_STORE(Dst[BpS*1],C);
  883|       |    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  884|       |    CLIP_STORE(Dst[BpS*2],C);
  885|       |    C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]);
  886|       |    CLIP_STORE(Dst[BpS*3],C);
  887|       |    C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]);
  888|       |    CLIP_STORE(Dst[BpS*4],C);
  889|       |    C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8];
  890|       |    CLIP_STORE(Dst[BpS*5],C);
  891|       |    C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7];
  892|       |    CLIP_STORE(Dst[BpS*6],C);
  893|       |    C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8];
  894|       |    CLIP_STORE(Dst[BpS*7],C);
  895|       |    Src += 1;
  896|       |    Dst += 1;
  897|       |  }
  898|       |#endif
  899|  24.1k|}
qpel.c:V_Pass_Avrg_16_C:
  909|  24.7k|{
  910|  24.7k|#if (SIZE==16)
  911|   421k|  while(H-->0) {
  ------------------
  |  Branch (911:9): [True: 396k, False: 24.7k]
  ------------------
  912|   396k|    int C;
  913|   396k|    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  914|   396k|    CLIP_STORE(0,C);
  ------------------
  |  |  903|   396k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.26k, False: 395k]
  |  |  |  Branch (903:28): [True: 2.23k, False: 392k]
  |  |  ------------------
  |  |  904|   396k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   396k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   396k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  915|   396k|    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  916|   396k|    CLIP_STORE( 1,C);
  ------------------
  |  |  903|   396k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.46k, False: 395k]
  |  |  |  Branch (903:28): [True: 2.40k, False: 392k]
  |  |  ------------------
  |  |  904|   396k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   396k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   396k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  917|   396k|    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  918|   396k|    CLIP_STORE( 2,C);
  ------------------
  |  |  903|   396k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.68k, False: 394k]
  |  |  |  Branch (903:28): [True: 2.29k, False: 392k]
  |  |  ------------------
  |  |  904|   396k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   396k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   396k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  919|   396k|    C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]);
  920|   396k|    CLIP_STORE( 3,C);
  ------------------
  |  |  903|   396k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 2.06k, False: 394k]
  |  |  |  Branch (903:28): [True: 2.53k, False: 391k]
  |  |  ------------------
  |  |  904|   396k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   396k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   396k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  921|   396k|    C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]);
  922|   396k|    CLIP_STORE( 4,C);
  ------------------
  |  |  903|   396k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 2.24k, False: 394k]
  |  |  |  Branch (903:28): [True: 2.54k, False: 391k]
  |  |  ------------------
  |  |  904|   396k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   396k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   396k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  923|   396k|    C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]);
  924|   396k|    CLIP_STORE( 5,C);
  ------------------
  |  |  903|   396k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.86k, False: 394k]
  |  |  |  Branch (903:28): [True: 2.57k, False: 392k]
  |  |  ------------------
  |  |  904|   396k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   396k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   396k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  925|   396k|    C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]);
  926|   396k|    CLIP_STORE( 6,C);
  ------------------
  |  |  903|   396k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 2.46k, False: 394k]
  |  |  |  Branch (903:28): [True: 2.89k, False: 391k]
  |  |  ------------------
  |  |  904|   396k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   396k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   396k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  927|   396k|    C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]);
  928|   396k|    CLIP_STORE( 7,C);
  ------------------
  |  |  903|   396k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.34k, False: 395k]
  |  |  |  Branch (903:28): [True: 2.50k, False: 392k]
  |  |  ------------------
  |  |  904|   396k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   396k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   396k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  929|   396k|    C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]);
  930|   396k|    CLIP_STORE( 8,C);
  ------------------
  |  |  903|   396k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.63k, False: 394k]
  |  |  |  Branch (903:28): [True: 3.46k, False: 391k]
  |  |  ------------------
  |  |  904|   396k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   396k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   396k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  931|   396k|    C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]);
  932|   396k|    CLIP_STORE( 9,C);
  ------------------
  |  |  903|   396k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.64k, False: 394k]
  |  |  |  Branch (903:28): [True: 2.41k, False: 392k]
  |  |  ------------------
  |  |  904|   396k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   396k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   396k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  933|   396k|    C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]);
  934|   396k|    CLIP_STORE(10,C);
  ------------------
  |  |  903|   396k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 2.71k, False: 393k]
  |  |  |  Branch (903:28): [True: 3.26k, False: 390k]
  |  |  ------------------
  |  |  904|   396k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   396k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   396k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  935|   396k|    C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]);
  936|   396k|    CLIP_STORE(11,C);
  ------------------
  |  |  903|   396k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.74k, False: 394k]
  |  |  |  Branch (903:28): [True: 2.87k, False: 391k]
  |  |  ------------------
  |  |  904|   396k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   396k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   396k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  937|   396k|    C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]);
  938|   396k|    CLIP_STORE(12,C);
  ------------------
  |  |  903|   396k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 4.52k, False: 391k]
  |  |  |  Branch (903:28): [True: 2.94k, False: 388k]
  |  |  ------------------
  |  |  904|   396k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   396k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   396k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  939|   396k|    C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16];
  940|   396k|    CLIP_STORE(13,C);
  ------------------
  |  |  903|   396k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.73k, False: 394k]
  |  |  |  Branch (903:28): [True: 1.49k, False: 393k]
  |  |  ------------------
  |  |  904|   396k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   396k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   396k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  941|   396k|    C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15];
  942|   396k|    CLIP_STORE(14,C);
  ------------------
  |  |  903|   396k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 4.34k, False: 392k]
  |  |  |  Branch (903:28): [True: 3.09k, False: 389k]
  |  |  ------------------
  |  |  904|   396k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   396k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   396k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  943|   396k|    C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16];
  944|   396k|    CLIP_STORE(15,C);
  ------------------
  |  |  903|   396k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.25k, False: 395k]
  |  |  |  Branch (903:28): [True: 1.72k, False: 393k]
  |  |  ------------------
  |  |  904|   396k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   396k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   396k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  945|   396k|    Src += 1;
  946|   396k|    Dst += 1;
  947|   396k|  }
  948|       |#else
  949|       |  while(H-->0) {
  950|       |    int C;
  951|       |    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  952|       |    CLIP_STORE(0,C);
  953|       |    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  954|       |    CLIP_STORE(1,C);
  955|       |    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  956|       |    CLIP_STORE(2,C);
  957|       |    C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]);
  958|       |    CLIP_STORE(3,C);
  959|       |    C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]);
  960|       |    CLIP_STORE(4,C);
  961|       |    C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8];
  962|       |    CLIP_STORE(5,C);
  963|       |    C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7];
  964|       |    CLIP_STORE(6,C);
  965|       |    C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8];
  966|       |    CLIP_STORE(7,C);
  967|       |    Src += 1;
  968|       |    Dst += 1;
  969|       |  }
  970|       |#endif
  971|  24.7k|}
qpel.c:V_Pass_Avrg_Up_16_C:
  981|  29.2k|{
  982|  29.2k|#if (SIZE==16)
  983|   498k|  while(H-->0) {
  ------------------
  |  Branch (983:9): [True: 468k, False: 29.2k]
  ------------------
  984|   468k|    int C;
  985|   468k|    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  986|   468k|    CLIP_STORE(0,C);
  ------------------
  |  |  975|   468k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 1.29k, False: 467k]
  |  |  |  Branch (975:28): [True: 1.99k, False: 465k]
  |  |  ------------------
  |  |  976|   468k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   468k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   468k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  987|   468k|    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  988|   468k|    CLIP_STORE( 1,C);
  ------------------
  |  |  975|   468k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 5.07k, False: 463k]
  |  |  |  Branch (975:28): [True: 2.65k, False: 461k]
  |  |  ------------------
  |  |  976|   468k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   468k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   468k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  989|   468k|    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  990|   468k|    CLIP_STORE( 2,C);
  ------------------
  |  |  975|   468k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 1.64k, False: 467k]
  |  |  |  Branch (975:28): [True: 1.87k, False: 465k]
  |  |  ------------------
  |  |  976|   468k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   468k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   468k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  991|   468k|    C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]);
  992|   468k|    CLIP_STORE( 3,C);
  ------------------
  |  |  975|   468k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 5.78k, False: 462k]
  |  |  |  Branch (975:28): [True: 2.35k, False: 460k]
  |  |  ------------------
  |  |  976|   468k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   468k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   468k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  993|   468k|    C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]);
  994|   468k|    CLIP_STORE( 4,C);
  ------------------
  |  |  975|   468k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.10k, False: 466k]
  |  |  |  Branch (975:28): [True: 2.77k, False: 463k]
  |  |  ------------------
  |  |  976|   468k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   468k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   468k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  995|   468k|    C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]);
  996|   468k|    CLIP_STORE( 5,C);
  ------------------
  |  |  975|   468k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.51k, False: 466k]
  |  |  |  Branch (975:28): [True: 2.90k, False: 463k]
  |  |  ------------------
  |  |  976|   468k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   468k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   468k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  997|   468k|    C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]);
  998|   468k|    CLIP_STORE( 6,C);
  ------------------
  |  |  975|   468k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.06k, False: 466k]
  |  |  |  Branch (975:28): [True: 2.41k, False: 464k]
  |  |  ------------------
  |  |  976|   468k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   468k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   468k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  999|   468k|    C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]);
 1000|   468k|    CLIP_STORE( 7,C);
  ------------------
  |  |  975|   468k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.88k, False: 465k]
  |  |  |  Branch (975:28): [True: 3.14k, False: 462k]
  |  |  ------------------
  |  |  976|   468k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   468k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   468k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1001|   468k|    C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]);
 1002|   468k|    CLIP_STORE( 8,C);
  ------------------
  |  |  975|   468k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 1.29k, False: 467k]
  |  |  |  Branch (975:28): [True: 2.81k, False: 464k]
  |  |  ------------------
  |  |  976|   468k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   468k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   468k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1003|   468k|    C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]);
 1004|   468k|    CLIP_STORE( 9,C);
  ------------------
  |  |  975|   468k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.63k, False: 466k]
  |  |  |  Branch (975:28): [True: 3.19k, False: 462k]
  |  |  ------------------
  |  |  976|   468k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   468k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   468k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1005|   468k|    C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]);
 1006|   468k|    CLIP_STORE(10,C);
  ------------------
  |  |  975|   468k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.11k, False: 466k]
  |  |  |  Branch (975:28): [True: 2.47k, False: 464k]
  |  |  ------------------
  |  |  976|   468k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   468k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   468k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1007|   468k|    C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]);
 1008|   468k|    CLIP_STORE(11,C);
  ------------------
  |  |  975|   468k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.93k, False: 465k]
  |  |  |  Branch (975:28): [True: 3.27k, False: 462k]
  |  |  ------------------
  |  |  976|   468k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   468k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   468k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1009|   468k|    C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]);
 1010|   468k|    CLIP_STORE(12,C);
  ------------------
  |  |  975|   468k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 1.56k, False: 467k]
  |  |  |  Branch (975:28): [True: 2.99k, False: 464k]
  |  |  ------------------
  |  |  976|   468k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   468k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   468k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1011|   468k|    C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16];
 1012|   468k|    CLIP_STORE(13,C);
  ------------------
  |  |  975|   468k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.10k, False: 466k]
  |  |  |  Branch (975:28): [True: 3.16k, False: 463k]
  |  |  ------------------
  |  |  976|   468k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   468k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   468k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1013|   468k|    C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15];
 1014|   468k|    CLIP_STORE(14,C);
  ------------------
  |  |  975|   468k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 1.36k, False: 467k]
  |  |  |  Branch (975:28): [True: 3.43k, False: 463k]
  |  |  ------------------
  |  |  976|   468k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   468k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   468k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1015|   468k|    C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16];
 1016|   468k|    CLIP_STORE(15,C);
  ------------------
  |  |  975|   468k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 1.66k, False: 467k]
  |  |  |  Branch (975:28): [True: 3.21k, False: 463k]
  |  |  ------------------
  |  |  976|   468k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   468k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  145|   468k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1017|   468k|    Src += 1;
 1018|   468k|    Dst += 1;
 1019|   468k|  }
 1020|       |#else
 1021|       |  while(H-->0) {
 1022|       |    int C;
 1023|       |    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
 1024|       |    CLIP_STORE(0,C);
 1025|       |    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
 1026|       |    CLIP_STORE(1,C);
 1027|       |    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
 1028|       |    CLIP_STORE(2,C);
 1029|       |    C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]);
 1030|       |    CLIP_STORE(3,C);
 1031|       |    C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]);
 1032|       |    CLIP_STORE(4,C);
 1033|       |    C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8];
 1034|       |    CLIP_STORE(5,C);
 1035|       |    C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7];
 1036|       |    CLIP_STORE(6,C);
 1037|       |    C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8];
 1038|       |    CLIP_STORE(7,C);
 1039|       |    Src += 1;
 1040|       |    Dst += 1;
 1041|       |  }
 1042|       |#endif
 1043|  29.2k|}
qpel.c:H_Pass_8_C:
  617|  55.3k|{
  618|       |#if (SIZE==16)
  619|       |  while(H-->0) {
  620|       |    int C;
  621|       |    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  622|       |    CLIP_STORE(Dst[ 0],C);
  623|       |    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  624|       |    CLIP_STORE(Dst[ 1],C);
  625|       |    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  626|       |    CLIP_STORE(Dst[ 2],C);
  627|       |    C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
  628|       |    CLIP_STORE(Dst[ 3],C);
  629|       |    C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
  630|       |    CLIP_STORE(Dst[ 4],C);
  631|       |    C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
  632|       |    CLIP_STORE(Dst[ 5],C);
  633|       |    C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
  634|       |    CLIP_STORE(Dst[ 6],C);
  635|       |    C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
  636|       |    CLIP_STORE(Dst[ 7],C);
  637|       |    C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
  638|       |    CLIP_STORE(Dst[ 8],C);
  639|       |    C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
  640|       |    CLIP_STORE(Dst[ 9],C);
  641|       |    C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
  642|       |    CLIP_STORE(Dst[10],C);
  643|       |    C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
  644|       |    CLIP_STORE(Dst[11],C);
  645|       |    C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
  646|       |    CLIP_STORE(Dst[12],C);
  647|       |    C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
  648|       |    CLIP_STORE(Dst[13],C);
  649|       |    C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
  650|       |    CLIP_STORE(Dst[14],C);
  651|       |    C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
  652|       |    CLIP_STORE(Dst[15],C);
  653|       |    Src += BpS;
  654|       |    Dst += BpS;
  655|       |  }
  656|       |#else
  657|   527k|  while(H-->0) {
  ------------------
  |  Branch (657:9): [True: 471k, False: 55.3k]
  ------------------
  658|   471k|    int C;
  659|   471k|    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  660|   471k|    CLIP_STORE(Dst[0],C);
  ------------------
  |  |  612|   471k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.33k, False: 469k]
  |  |  |  Branch (612:28): [True: 4.52k, False: 464k]
  |  |  ------------------
  |  |  613|   471k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   471k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  661|   471k|    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  662|   471k|    CLIP_STORE(Dst[1],C);
  ------------------
  |  |  612|   471k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.72k, False: 468k]
  |  |  |  Branch (612:28): [True: 5.33k, False: 463k]
  |  |  ------------------
  |  |  613|   471k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   471k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  663|   471k|    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  664|   471k|    CLIP_STORE(Dst[2],C);
  ------------------
  |  |  612|   471k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.45k, False: 469k]
  |  |  |  Branch (612:28): [True: 4.70k, False: 464k]
  |  |  ------------------
  |  |  613|   471k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   471k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  665|   471k|    C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
  666|   471k|    CLIP_STORE(Dst[3],C);
  ------------------
  |  |  612|   471k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 3.77k, False: 467k]
  |  |  |  Branch (612:28): [True: 4.82k, False: 463k]
  |  |  ------------------
  |  |  613|   471k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   471k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  667|   471k|    C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
  668|   471k|    CLIP_STORE(Dst[4],C);
  ------------------
  |  |  612|   471k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.55k, False: 469k]
  |  |  |  Branch (612:28): [True: 5.33k, False: 463k]
  |  |  ------------------
  |  |  613|   471k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   471k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  669|   471k|    C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];
  670|   471k|    CLIP_STORE(Dst[5],C);
  ------------------
  |  |  612|   471k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 1.99k, False: 469k]
  |  |  |  Branch (612:28): [True: 5.29k, False: 464k]
  |  |  ------------------
  |  |  613|   471k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   471k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  671|   471k|    C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];
  672|   471k|    CLIP_STORE(Dst[6],C);
  ------------------
  |  |  612|   471k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.94k, False: 468k]
  |  |  |  Branch (612:28): [True: 5.28k, False: 463k]
  |  |  ------------------
  |  |  613|   471k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   471k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  673|   471k|    C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];
  674|   471k|    CLIP_STORE(Dst[7],C);
  ------------------
  |  |  612|   471k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 1.94k, False: 469k]
  |  |  |  Branch (612:28): [True: 4.71k, False: 465k]
  |  |  ------------------
  |  |  613|   471k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   471k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  675|   471k|    Src += BpS;
  676|   471k|    Dst += BpS;
  677|   471k|  }
  678|  55.3k|#endif
  679|  55.3k|}
qpel.c:H_Pass_Avrg_8_C:
  689|  99.4k|{
  690|       |#if (SIZE==16)
  691|       |  while(H-->0) {
  692|       |    int C;
  693|       |    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  694|       |    CLIP_STORE(0,C);
  695|       |    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  696|       |    CLIP_STORE( 1,C);
  697|       |    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  698|       |    CLIP_STORE( 2,C);
  699|       |    C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
  700|       |    CLIP_STORE( 3,C);
  701|       |    C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
  702|       |    CLIP_STORE( 4,C);
  703|       |    C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
  704|       |    CLIP_STORE( 5,C);
  705|       |    C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
  706|       |    CLIP_STORE( 6,C);
  707|       |    C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
  708|       |    CLIP_STORE( 7,C);
  709|       |    C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
  710|       |    CLIP_STORE( 8,C);
  711|       |    C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
  712|       |    CLIP_STORE( 9,C);
  713|       |    C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
  714|       |    CLIP_STORE(10,C);
  715|       |    C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
  716|       |    CLIP_STORE(11,C);
  717|       |    C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
  718|       |    CLIP_STORE(12,C);
  719|       |    C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
  720|       |    CLIP_STORE(13,C);
  721|       |    C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
  722|       |    CLIP_STORE(14,C);
  723|       |    C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
  724|       |    CLIP_STORE(15,C);
  725|       |    Src += BpS;
  726|       |    Dst += BpS;
  727|       |  }
  728|       |#else
  729|   954k|  while(H-->0) {
  ------------------
  |  Branch (729:9): [True: 854k, False: 99.4k]
  ------------------
  730|   854k|    int C;
  731|   854k|    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  732|   854k|    CLIP_STORE(0,C);
  ------------------
  |  |  683|   854k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 3.33k, False: 851k]
  |  |  |  Branch (683:28): [True: 12.2k, False: 839k]
  |  |  ------------------
  |  |  684|   854k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   854k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|   854k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  733|   854k|    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  734|   854k|    CLIP_STORE(1,C);
  ------------------
  |  |  683|   854k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 3.58k, False: 851k]
  |  |  |  Branch (683:28): [True: 11.5k, False: 839k]
  |  |  ------------------
  |  |  684|   854k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   854k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|   854k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  735|   854k|    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  736|   854k|    CLIP_STORE(2,C);
  ------------------
  |  |  683|   854k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 4.62k, False: 850k]
  |  |  |  Branch (683:28): [True: 10.9k, False: 839k]
  |  |  ------------------
  |  |  684|   854k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   854k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|   854k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  737|   854k|    C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
  738|   854k|    CLIP_STORE(3,C);
  ------------------
  |  |  683|   854k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 4.13k, False: 850k]
  |  |  |  Branch (683:28): [True: 11.5k, False: 839k]
  |  |  ------------------
  |  |  684|   854k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   854k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|   854k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  739|   854k|    C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
  740|   854k|    CLIP_STORE(4,C);
  ------------------
  |  |  683|   854k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 7.12k, False: 847k]
  |  |  |  Branch (683:28): [True: 11.4k, False: 836k]
  |  |  ------------------
  |  |  684|   854k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   854k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|   854k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  741|   854k|    C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];
  742|   854k|    CLIP_STORE(5,C);
  ------------------
  |  |  683|   854k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 3.18k, False: 851k]
  |  |  |  Branch (683:28): [True: 9.61k, False: 842k]
  |  |  ------------------
  |  |  684|   854k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   854k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|   854k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  743|   854k|    C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];
  744|   854k|    CLIP_STORE(6,C);
  ------------------
  |  |  683|   854k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 6.64k, False: 848k]
  |  |  |  Branch (683:28): [True: 11.8k, False: 836k]
  |  |  ------------------
  |  |  684|   854k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   854k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|   854k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  745|   854k|    C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];
  746|   854k|    CLIP_STORE(7,C);
  ------------------
  |  |  683|   854k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.62k, False: 852k]
  |  |  |  Branch (683:28): [True: 10.2k, False: 842k]
  |  |  ------------------
  |  |  684|   854k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   854k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|   854k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  747|   854k|    Src += BpS;
  748|   854k|    Dst += BpS;
  749|   854k|  }
  750|  99.4k|#endif
  751|  99.4k|}
qpel.c:H_Pass_Avrg_Up_8_C:
  761|   106k|{
  762|       |#if (SIZE==16)
  763|       |  while(H-->0) {
  764|       |    int C;
  765|       |    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  766|       |    CLIP_STORE(0,C);
  767|       |    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  768|       |    CLIP_STORE( 1,C);
  769|       |    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  770|       |    CLIP_STORE( 2,C);
  771|       |    C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
  772|       |    CLIP_STORE( 3,C);
  773|       |    C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
  774|       |    CLIP_STORE( 4,C);
  775|       |    C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
  776|       |    CLIP_STORE( 5,C);
  777|       |    C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
  778|       |    CLIP_STORE( 6,C);
  779|       |    C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
  780|       |    CLIP_STORE( 7,C);
  781|       |    C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
  782|       |    CLIP_STORE( 8,C);
  783|       |    C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
  784|       |    CLIP_STORE( 9,C);
  785|       |    C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
  786|       |    CLIP_STORE(10,C);
  787|       |    C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
  788|       |    CLIP_STORE(11,C);
  789|       |    C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
  790|       |    CLIP_STORE(12,C);
  791|       |    C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
  792|       |    CLIP_STORE(13,C);
  793|       |    C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
  794|       |    CLIP_STORE(14,C);
  795|       |    C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
  796|       |    CLIP_STORE(15,C);
  797|       |    Src += BpS;
  798|       |    Dst += BpS;
  799|       |  }
  800|       |#else
  801|  1.03M|  while(H-->0) {
  ------------------
  |  Branch (801:9): [True: 924k, False: 106k]
  ------------------
  802|   924k|    int C;
  803|   924k|    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  804|   924k|    CLIP_STORE(0,C);
  ------------------
  |  |  755|   924k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.51k, False: 921k]
  |  |  |  Branch (755:28): [True: 8.42k, False: 913k]
  |  |  ------------------
  |  |  756|   924k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   924k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|   924k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  805|   924k|    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  806|   924k|    CLIP_STORE(1,C);
  ------------------
  |  |  755|   924k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 6.51k, False: 917k]
  |  |  |  Branch (755:28): [True: 11.9k, False: 905k]
  |  |  ------------------
  |  |  756|   924k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   924k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|   924k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  807|   924k|    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  808|   924k|    CLIP_STORE(2,C);
  ------------------
  |  |  755|   924k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.93k, False: 921k]
  |  |  |  Branch (755:28): [True: 8.74k, False: 912k]
  |  |  ------------------
  |  |  756|   924k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   924k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|   924k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  809|   924k|    C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
  810|   924k|    CLIP_STORE(3,C);
  ------------------
  |  |  755|   924k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 6.13k, False: 918k]
  |  |  |  Branch (755:28): [True: 11.4k, False: 906k]
  |  |  ------------------
  |  |  756|   924k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   924k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|   924k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  811|   924k|    C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
  812|   924k|    CLIP_STORE(4,C);
  ------------------
  |  |  755|   924k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 4.15k, False: 919k]
  |  |  |  Branch (755:28): [True: 11.0k, False: 908k]
  |  |  ------------------
  |  |  756|   924k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   924k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|   924k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  813|   924k|    C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];
  814|   924k|    CLIP_STORE(5,C);
  ------------------
  |  |  755|   924k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 3.80k, False: 920k]
  |  |  |  Branch (755:28): [True: 10.8k, False: 909k]
  |  |  ------------------
  |  |  756|   924k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   924k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|   924k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  815|   924k|    C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];
  816|   924k|    CLIP_STORE(6,C);
  ------------------
  |  |  755|   924k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 3.73k, False: 920k]
  |  |  |  Branch (755:28): [True: 10.6k, False: 909k]
  |  |  ------------------
  |  |  756|   924k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   924k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|   924k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  817|   924k|    C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];
  818|   924k|    CLIP_STORE(7,C);
  ------------------
  |  |  755|   924k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 3.54k, False: 920k]
  |  |  |  Branch (755:28): [True: 10.7k, False: 909k]
  |  |  ------------------
  |  |  756|   924k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   924k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  174|   924k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  819|   924k|    Src += BpS;
  820|   924k|    Dst += BpS;
  821|   924k|  }
  822|   106k|#endif
  823|   106k|}
qpel.c:V_Pass_8_C:
  837|  52.6k|{
  838|       |#if (SIZE==16)
  839|       |  while(H-->0) {
  840|       |    int C;
  841|       |    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  842|       |    CLIP_STORE(Dst[BpS* 0],C);
  843|       |    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  844|       |    CLIP_STORE(Dst[BpS* 1],C);
  845|       |    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  846|       |    CLIP_STORE(Dst[BpS* 2],C);
  847|       |    C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]);
  848|       |    CLIP_STORE(Dst[BpS* 3],C);
  849|       |    C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]);
  850|       |    CLIP_STORE(Dst[BpS* 4],C);
  851|       |    C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]);
  852|       |    CLIP_STORE(Dst[BpS* 5],C);
  853|       |    C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]);
  854|       |    CLIP_STORE(Dst[BpS* 6],C);
  855|       |    C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]);
  856|       |    CLIP_STORE(Dst[BpS* 7],C);
  857|       |    C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]);
  858|       |    CLIP_STORE(Dst[BpS* 8],C);
  859|       |    C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]);
  860|       |    CLIP_STORE(Dst[BpS* 9],C);
  861|       |    C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]);
  862|       |    CLIP_STORE(Dst[BpS*10],C);
  863|       |    C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]);
  864|       |    CLIP_STORE(Dst[BpS*11],C);
  865|       |    C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]);
  866|       |    CLIP_STORE(Dst[BpS*12],C);
  867|       |    C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16];
  868|       |    CLIP_STORE(Dst[BpS*13],C);
  869|       |    C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15];
  870|       |    CLIP_STORE(Dst[BpS*14],C);
  871|       |    C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16];
  872|       |    CLIP_STORE(Dst[BpS*15],C);
  873|       |    Src += 1;
  874|       |    Dst += 1;
  875|       |  }
  876|       |#else
  877|   474k|  while(H-->0) {
  ------------------
  |  Branch (877:9): [True: 421k, False: 52.6k]
  ------------------
  878|   421k|    int C;
  879|   421k|    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  880|   421k|    CLIP_STORE(Dst[BpS*0],C);
  ------------------
  |  |  832|   421k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 1.99k, False: 419k]
  |  |  |  Branch (832:28): [True: 4.96k, False: 414k]
  |  |  ------------------
  |  |  833|   421k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   421k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  881|   421k|    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  882|   421k|    CLIP_STORE(Dst[BpS*1],C);
  ------------------
  |  |  832|   421k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.66k, False: 418k]
  |  |  |  Branch (832:28): [True: 4.07k, False: 414k]
  |  |  ------------------
  |  |  833|   421k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   421k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  883|   421k|    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  884|   421k|    CLIP_STORE(Dst[BpS*2],C);
  ------------------
  |  |  832|   421k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 3.93k, False: 417k]
  |  |  |  Branch (832:28): [True: 4.15k, False: 413k]
  |  |  ------------------
  |  |  833|   421k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   421k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  885|   421k|    C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]);
  886|   421k|    CLIP_STORE(Dst[BpS*3],C);
  ------------------
  |  |  832|   421k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 3.08k, False: 418k]
  |  |  |  Branch (832:28): [True: 3.85k, False: 414k]
  |  |  ------------------
  |  |  833|   421k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   421k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  887|   421k|    C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]);
  888|   421k|    CLIP_STORE(Dst[BpS*4],C);
  ------------------
  |  |  832|   421k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 3.28k, False: 418k]
  |  |  |  Branch (832:28): [True: 5.17k, False: 413k]
  |  |  ------------------
  |  |  833|   421k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   421k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  889|   421k|    C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8];
  890|   421k|    CLIP_STORE(Dst[BpS*5],C);
  ------------------
  |  |  832|   421k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.06k, False: 419k]
  |  |  |  Branch (832:28): [True: 4.29k, False: 415k]
  |  |  ------------------
  |  |  833|   421k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   421k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  891|   421k|    C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7];
  892|   421k|    CLIP_STORE(Dst[BpS*6],C);
  ------------------
  |  |  832|   421k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 3.44k, False: 418k]
  |  |  |  Branch (832:28): [True: 4.41k, False: 413k]
  |  |  ------------------
  |  |  833|   421k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   421k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  893|   421k|    C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8];
  894|   421k|    CLIP_STORE(Dst[BpS*7],C);
  ------------------
  |  |  832|   421k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 1.92k, False: 419k]
  |  |  |  Branch (832:28): [True: 4.10k, False: 415k]
  |  |  ------------------
  |  |  833|   421k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  174|   421k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  895|   421k|    Src += 1;
  896|   421k|    Dst += 1;
  897|   421k|  }
  898|  52.6k|#endif
  899|  52.6k|}
qpel.c:V_Pass_Avrg_8_C:
  909|  53.8k|{
  910|       |#if (SIZE==16)
  911|       |  while(H-->0) {
  912|       |    int C;
  913|       |    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  914|       |    CLIP_STORE(0,C);
  915|       |    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  916|       |    CLIP_STORE( 1,C);
  917|       |    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  918|       |    CLIP_STORE( 2,C);
  919|       |    C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]);
  920|       |    CLIP_STORE( 3,C);
  921|       |    C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]);
  922|       |    CLIP_STORE( 4,C);
  923|       |    C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]);
  924|       |    CLIP_STORE( 5,C);
  925|       |    C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]);
  926|       |    CLIP_STORE( 6,C);
  927|       |    C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]);
  928|       |    CLIP_STORE( 7,C);
  929|       |    C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]);
  930|       |    CLIP_STORE( 8,C);
  931|       |    C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]);
  932|       |    CLIP_STORE( 9,C);
  933|       |    C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]);
  934|       |    CLIP_STORE(10,C);
  935|       |    C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]);
  936|       |    CLIP_STORE(11,C);
  937|       |    C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]);
  938|       |    CLIP_STORE(12,C);
  939|       |    C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16];
  940|       |    CLIP_STORE(13,C);
  941|       |    C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15];
  942|       |    CLIP_STORE(14,C);
  943|       |    C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16];
  944|       |    CLIP_STORE(15,C);
  945|       |    Src += 1;
  946|       |    Dst += 1;
  947|       |  }
  948|       |#else
  949|   484k|  while(H-->0) {
  ------------------
  |  Branch (949:9): [True: 430k, False: 53.8k]
  ------------------
  950|   430k|    int C;
  951|   430k|    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  952|   430k|    CLIP_STORE(0,C);
  ------------------
  |  |  903|   430k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.12k, False: 429k]
  |  |  |  Branch (903:28): [True: 2.72k, False: 427k]
  |  |  ------------------
  |  |  904|   430k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   430k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   430k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  953|   430k|    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  954|   430k|    CLIP_STORE(1,C);
  ------------------
  |  |  903|   430k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.46k, False: 429k]
  |  |  |  Branch (903:28): [True: 2.91k, False: 426k]
  |  |  ------------------
  |  |  904|   430k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   430k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   430k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  955|   430k|    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  956|   430k|    CLIP_STORE(2,C);
  ------------------
  |  |  903|   430k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 2.41k, False: 428k]
  |  |  |  Branch (903:28): [True: 2.58k, False: 425k]
  |  |  ------------------
  |  |  904|   430k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   430k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   430k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  957|   430k|    C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]);
  958|   430k|    CLIP_STORE(3,C);
  ------------------
  |  |  903|   430k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.55k, False: 429k]
  |  |  |  Branch (903:28): [True: 2.88k, False: 426k]
  |  |  ------------------
  |  |  904|   430k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   430k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   430k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  959|   430k|    C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]);
  960|   430k|    CLIP_STORE(4,C);
  ------------------
  |  |  903|   430k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 2.78k, False: 428k]
  |  |  |  Branch (903:28): [True: 2.97k, False: 425k]
  |  |  ------------------
  |  |  904|   430k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   430k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   430k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  961|   430k|    C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8];
  962|   430k|    CLIP_STORE(5,C);
  ------------------
  |  |  903|   430k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.13k, False: 429k]
  |  |  |  Branch (903:28): [True: 3.22k, False: 426k]
  |  |  ------------------
  |  |  904|   430k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   430k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   430k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  963|   430k|    C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7];
  964|   430k|    CLIP_STORE(6,C);
  ------------------
  |  |  903|   430k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.70k, False: 429k]
  |  |  |  Branch (903:28): [True: 3.44k, False: 425k]
  |  |  ------------------
  |  |  904|   430k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   430k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   430k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  965|   430k|    C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8];
  966|   430k|    CLIP_STORE(7,C);
  ------------------
  |  |  903|   430k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 940, False: 429k]
  |  |  |  Branch (903:28): [True: 3.56k, False: 426k]
  |  |  ------------------
  |  |  904|   430k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   430k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   430k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
  967|   430k|    Src += 1;
  968|   430k|    Dst += 1;
  969|   430k|  }
  970|  53.8k|#endif
  971|  53.8k|}
qpel.c:V_Pass_Avrg_Up_8_C:
  981|  67.8k|{
  982|       |#if (SIZE==16)
  983|       |  while(H-->0) {
  984|       |    int C;
  985|       |    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  986|       |    CLIP_STORE(0,C);
  987|       |    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  988|       |    CLIP_STORE( 1,C);
  989|       |    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  990|       |    CLIP_STORE( 2,C);
  991|       |    C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]);
  992|       |    CLIP_STORE( 3,C);
  993|       |    C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]);
  994|       |    CLIP_STORE( 4,C);
  995|       |    C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]);
  996|       |    CLIP_STORE( 5,C);
  997|       |    C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]);
  998|       |    CLIP_STORE( 6,C);
  999|       |    C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]);
 1000|       |    CLIP_STORE( 7,C);
 1001|       |    C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]);
 1002|       |    CLIP_STORE( 8,C);
 1003|       |    C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]);
 1004|       |    CLIP_STORE( 9,C);
 1005|       |    C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]);
 1006|       |    CLIP_STORE(10,C);
 1007|       |    C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]);
 1008|       |    CLIP_STORE(11,C);
 1009|       |    C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]);
 1010|       |    CLIP_STORE(12,C);
 1011|       |    C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16];
 1012|       |    CLIP_STORE(13,C);
 1013|       |    C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15];
 1014|       |    CLIP_STORE(14,C);
 1015|       |    C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16];
 1016|       |    CLIP_STORE(15,C);
 1017|       |    Src += 1;
 1018|       |    Dst += 1;
 1019|       |  }
 1020|       |#else
 1021|   610k|  while(H-->0) {
  ------------------
  |  Branch (1021:9): [True: 542k, False: 67.8k]
  ------------------
 1022|   542k|    int C;
 1023|   542k|    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
 1024|   542k|    CLIP_STORE(0,C);
  ------------------
  |  |  975|   542k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.14k, False: 540k]
  |  |  |  Branch (975:28): [True: 2.60k, False: 538k]
  |  |  ------------------
  |  |  976|   542k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   542k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   542k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1025|   542k|    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
 1026|   542k|    CLIP_STORE(1,C);
  ------------------
  |  |  975|   542k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.36k, False: 540k]
  |  |  |  Branch (975:28): [True: 3.14k, False: 537k]
  |  |  ------------------
  |  |  976|   542k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   542k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   542k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1027|   542k|    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
 1028|   542k|    CLIP_STORE(2,C);
  ------------------
  |  |  975|   542k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 1.37k, False: 541k]
  |  |  |  Branch (975:28): [True: 2.62k, False: 538k]
  |  |  ------------------
  |  |  976|   542k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   542k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   542k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1029|   542k|    C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]);
 1030|   542k|    CLIP_STORE(3,C);
  ------------------
  |  |  975|   542k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.49k, False: 540k]
  |  |  |  Branch (975:28): [True: 3.39k, False: 537k]
  |  |  ------------------
  |  |  976|   542k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   542k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   542k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1031|   542k|    C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]);
 1032|   542k|    CLIP_STORE(4,C);
  ------------------
  |  |  975|   542k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 1.41k, False: 541k]
  |  |  |  Branch (975:28): [True: 2.91k, False: 538k]
  |  |  ------------------
  |  |  976|   542k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   542k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   542k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1033|   542k|    C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8];
 1034|   542k|    CLIP_STORE(5,C);
  ------------------
  |  |  975|   542k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 1.71k, False: 541k]
  |  |  |  Branch (975:28): [True: 2.64k, False: 538k]
  |  |  ------------------
  |  |  976|   542k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   542k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   542k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1035|   542k|    C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7];
 1036|   542k|    CLIP_STORE(6,C);
  ------------------
  |  |  975|   542k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 1.10k, False: 541k]
  |  |  |  Branch (975:28): [True: 2.91k, False: 538k]
  |  |  ------------------
  |  |  976|   542k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   542k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   542k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1037|   542k|    C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8];
 1038|   542k|    CLIP_STORE(7,C);
  ------------------
  |  |  975|   542k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 1.59k, False: 541k]
  |  |  |  Branch (975:28): [True: 3.00k, False: 538k]
  |  |  ------------------
  |  |  976|   542k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   542k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  174|   542k|#define STORE(d,s)  (d) = (s)
  |  |  ------------------
  ------------------
 1039|   542k|    Src += 1;
 1040|   542k|    Dst += 1;
 1041|   542k|  }
 1042|  67.8k|#endif
 1043|  67.8k|}
qpel.c:H_Pass_16_Add_C:
  617|  7.74k|{
  618|  7.74k|#if (SIZE==16)
  619|   131k|  while(H-->0) {
  ------------------
  |  Branch (619:9): [True: 123k, False: 7.74k]
  ------------------
  620|   123k|    int C;
  621|   123k|    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  622|   123k|    CLIP_STORE(Dst[ 0],C);
  ------------------
  |  |  612|   123k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.00k, False: 121k]
  |  |  |  Branch (612:28): [True: 3.27k, False: 118k]
  |  |  ------------------
  |  |  613|   123k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   123k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  623|   123k|    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  624|   123k|    CLIP_STORE(Dst[ 1],C);
  ------------------
  |  |  612|   123k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.54k, False: 121k]
  |  |  |  Branch (612:28): [True: 3.92k, False: 117k]
  |  |  ------------------
  |  |  613|   123k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   123k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  625|   123k|    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  626|   123k|    CLIP_STORE(Dst[ 2],C);
  ------------------
  |  |  612|   123k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.49k, False: 121k]
  |  |  |  Branch (612:28): [True: 2.87k, False: 118k]
  |  |  ------------------
  |  |  613|   123k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   123k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  627|   123k|    C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
  628|   123k|    CLIP_STORE(Dst[ 3],C);
  ------------------
  |  |  612|   123k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.68k, False: 121k]
  |  |  |  Branch (612:28): [True: 3.39k, False: 117k]
  |  |  ------------------
  |  |  613|   123k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   123k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  629|   123k|    C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
  630|   123k|    CLIP_STORE(Dst[ 4],C);
  ------------------
  |  |  612|   123k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.43k, False: 121k]
  |  |  |  Branch (612:28): [True: 3.13k, False: 118k]
  |  |  ------------------
  |  |  613|   123k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   123k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  631|   123k|    C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
  632|   123k|    CLIP_STORE(Dst[ 5],C);
  ------------------
  |  |  612|   123k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.46k, False: 121k]
  |  |  |  Branch (612:28): [True: 3.30k, False: 118k]
  |  |  ------------------
  |  |  613|   123k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   123k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  633|   123k|    C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
  634|   123k|    CLIP_STORE(Dst[ 6],C);
  ------------------
  |  |  612|   123k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.96k, False: 121k]
  |  |  |  Branch (612:28): [True: 3.13k, False: 117k]
  |  |  ------------------
  |  |  613|   123k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   123k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  635|   123k|    C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
  636|   123k|    CLIP_STORE(Dst[ 7],C);
  ------------------
  |  |  612|   123k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.97k, False: 121k]
  |  |  |  Branch (612:28): [True: 3.44k, False: 117k]
  |  |  ------------------
  |  |  613|   123k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   123k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  637|   123k|    C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
  638|   123k|    CLIP_STORE(Dst[ 8],C);
  ------------------
  |  |  612|   123k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.83k, False: 121k]
  |  |  |  Branch (612:28): [True: 2.80k, False: 118k]
  |  |  ------------------
  |  |  613|   123k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   123k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  639|   123k|    C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
  640|   123k|    CLIP_STORE(Dst[ 9],C);
  ------------------
  |  |  612|   123k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 3.70k, False: 120k]
  |  |  |  Branch (612:28): [True: 4.20k, False: 116k]
  |  |  ------------------
  |  |  613|   123k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   123k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  641|   123k|    C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
  642|   123k|    CLIP_STORE(Dst[10],C);
  ------------------
  |  |  612|   123k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 3.05k, False: 120k]
  |  |  |  Branch (612:28): [True: 3.07k, False: 117k]
  |  |  ------------------
  |  |  613|   123k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   123k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  643|   123k|    C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
  644|   123k|    CLIP_STORE(Dst[11],C);
  ------------------
  |  |  612|   123k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 4.09k, False: 119k]
  |  |  |  Branch (612:28): [True: 3.48k, False: 116k]
  |  |  ------------------
  |  |  613|   123k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   123k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  645|   123k|    C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
  646|   123k|    CLIP_STORE(Dst[12],C);
  ------------------
  |  |  612|   123k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 3.14k, False: 120k]
  |  |  |  Branch (612:28): [True: 3.50k, False: 117k]
  |  |  ------------------
  |  |  613|   123k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   123k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  647|   123k|    C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
  648|   123k|    CLIP_STORE(Dst[13],C);
  ------------------
  |  |  612|   123k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.41k, False: 121k]
  |  |  |  Branch (612:28): [True: 3.43k, False: 118k]
  |  |  ------------------
  |  |  613|   123k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   123k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  649|   123k|    C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
  650|   123k|    CLIP_STORE(Dst[14],C);
  ------------------
  |  |  612|   123k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 3.21k, False: 120k]
  |  |  |  Branch (612:28): [True: 3.84k, False: 116k]
  |  |  ------------------
  |  |  613|   123k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   123k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  651|   123k|    C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
  652|   123k|    CLIP_STORE(Dst[15],C);
  ------------------
  |  |  612|   123k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 1.90k, False: 122k]
  |  |  |  Branch (612:28): [True: 3.54k, False: 118k]
  |  |  ------------------
  |  |  613|   123k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   123k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  653|   123k|    Src += BpS;
  654|   123k|    Dst += BpS;
  655|   123k|  }
  656|       |#else
  657|       |  while(H-->0) {
  658|       |    int C;
  659|       |    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  660|       |    CLIP_STORE(Dst[0],C);
  661|       |    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  662|       |    CLIP_STORE(Dst[1],C);
  663|       |    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  664|       |    CLIP_STORE(Dst[2],C);
  665|       |    C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
  666|       |    CLIP_STORE(Dst[3],C);
  667|       |    C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
  668|       |    CLIP_STORE(Dst[4],C);
  669|       |    C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];
  670|       |    CLIP_STORE(Dst[5],C);
  671|       |    C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];
  672|       |    CLIP_STORE(Dst[6],C);
  673|       |    C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];
  674|       |    CLIP_STORE(Dst[7],C);
  675|       |    Src += BpS;
  676|       |    Dst += BpS;
  677|       |  }
  678|       |#endif
  679|  7.74k|}
qpel.c:H_Pass_Avrg_16_Add_C:
  689|  9.27k|{
  690|  9.27k|#if (SIZE==16)
  691|   157k|  while(H-->0) {
  ------------------
  |  Branch (691:9): [True: 148k, False: 9.27k]
  ------------------
  692|   148k|    int C;
  693|   148k|    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  694|   148k|    CLIP_STORE(0,C);
  ------------------
  |  |  683|   148k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 1.25k, False: 147k]
  |  |  |  Branch (683:28): [True: 4.02k, False: 143k]
  |  |  ------------------
  |  |  684|   148k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   148k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   148k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  695|   148k|    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  696|   148k|    CLIP_STORE( 1,C);
  ------------------
  |  |  683|   148k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 1.87k, False: 146k]
  |  |  |  Branch (683:28): [True: 3.53k, False: 143k]
  |  |  ------------------
  |  |  684|   148k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   148k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   148k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  697|   148k|    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  698|   148k|    CLIP_STORE( 2,C);
  ------------------
  |  |  683|   148k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.13k, False: 146k]
  |  |  |  Branch (683:28): [True: 3.27k, False: 143k]
  |  |  ------------------
  |  |  684|   148k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   148k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   148k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  699|   148k|    C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
  700|   148k|    CLIP_STORE( 3,C);
  ------------------
  |  |  683|   148k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.09k, False: 146k]
  |  |  |  Branch (683:28): [True: 3.65k, False: 142k]
  |  |  ------------------
  |  |  684|   148k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   148k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   148k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  701|   148k|    C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
  702|   148k|    CLIP_STORE( 4,C);
  ------------------
  |  |  683|   148k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.29k, False: 146k]
  |  |  |  Branch (683:28): [True: 3.32k, False: 142k]
  |  |  ------------------
  |  |  684|   148k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   148k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   148k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  703|   148k|    C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
  704|   148k|    CLIP_STORE( 5,C);
  ------------------
  |  |  683|   148k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.10k, False: 146k]
  |  |  |  Branch (683:28): [True: 2.64k, False: 143k]
  |  |  ------------------
  |  |  684|   148k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   148k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   148k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  705|   148k|    C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
  706|   148k|    CLIP_STORE( 6,C);
  ------------------
  |  |  683|   148k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.09k, False: 146k]
  |  |  |  Branch (683:28): [True: 3.66k, False: 142k]
  |  |  ------------------
  |  |  684|   148k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   148k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   148k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  707|   148k|    C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
  708|   148k|    CLIP_STORE( 7,C);
  ------------------
  |  |  683|   148k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.68k, False: 145k]
  |  |  |  Branch (683:28): [True: 2.81k, False: 142k]
  |  |  ------------------
  |  |  684|   148k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   148k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   148k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  709|   148k|    C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
  710|   148k|    CLIP_STORE( 8,C);
  ------------------
  |  |  683|   148k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 3.83k, False: 144k]
  |  |  |  Branch (683:28): [True: 2.89k, False: 141k]
  |  |  ------------------
  |  |  684|   148k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   148k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   148k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  711|   148k|    C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
  712|   148k|    CLIP_STORE( 9,C);
  ------------------
  |  |  683|   148k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.22k, False: 146k]
  |  |  |  Branch (683:28): [True: 2.69k, False: 143k]
  |  |  ------------------
  |  |  684|   148k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   148k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   148k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  713|   148k|    C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
  714|   148k|    CLIP_STORE(10,C);
  ------------------
  |  |  683|   148k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 3.74k, False: 144k]
  |  |  |  Branch (683:28): [True: 2.95k, False: 141k]
  |  |  ------------------
  |  |  684|   148k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   148k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   148k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  715|   148k|    C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
  716|   148k|    CLIP_STORE(11,C);
  ------------------
  |  |  683|   148k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 1.70k, False: 146k]
  |  |  |  Branch (683:28): [True: 2.78k, False: 143k]
  |  |  ------------------
  |  |  684|   148k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   148k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   148k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  717|   148k|    C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
  718|   148k|    CLIP_STORE(12,C);
  ------------------
  |  |  683|   148k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 4.00k, False: 144k]
  |  |  |  Branch (683:28): [True: 2.81k, False: 141k]
  |  |  ------------------
  |  |  684|   148k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   148k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   148k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  719|   148k|    C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
  720|   148k|    CLIP_STORE(13,C);
  ------------------
  |  |  683|   148k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 1.70k, False: 146k]
  |  |  |  Branch (683:28): [True: 2.28k, False: 144k]
  |  |  ------------------
  |  |  684|   148k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   148k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   148k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  721|   148k|    C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
  722|   148k|    CLIP_STORE(14,C);
  ------------------
  |  |  683|   148k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 3.77k, False: 144k]
  |  |  |  Branch (683:28): [True: 2.83k, False: 141k]
  |  |  ------------------
  |  |  684|   148k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   148k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   148k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  723|   148k|    C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
  724|   148k|    CLIP_STORE(15,C);
  ------------------
  |  |  683|   148k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.02k, False: 146k]
  |  |  |  Branch (683:28): [True: 2.17k, False: 144k]
  |  |  ------------------
  |  |  684|   148k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   148k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   148k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  725|   148k|    Src += BpS;
  726|   148k|    Dst += BpS;
  727|   148k|  }
  728|       |#else
  729|       |  while(H-->0) {
  730|       |    int C;
  731|       |    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  732|       |    CLIP_STORE(0,C);
  733|       |    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  734|       |    CLIP_STORE(1,C);
  735|       |    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  736|       |    CLIP_STORE(2,C);
  737|       |    C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
  738|       |    CLIP_STORE(3,C);
  739|       |    C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
  740|       |    CLIP_STORE(4,C);
  741|       |    C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];
  742|       |    CLIP_STORE(5,C);
  743|       |    C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];
  744|       |    CLIP_STORE(6,C);
  745|       |    C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];
  746|       |    CLIP_STORE(7,C);
  747|       |    Src += BpS;
  748|       |    Dst += BpS;
  749|       |  }
  750|       |#endif
  751|  9.27k|}
qpel.c:H_Pass_Avrg_Up_16_Add_C:
  761|  10.4k|{
  762|  10.4k|#if (SIZE==16)
  763|   177k|  while(H-->0) {
  ------------------
  |  Branch (763:9): [True: 167k, False: 10.4k]
  ------------------
  764|   167k|    int C;
  765|   167k|    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  766|   167k|    CLIP_STORE(0,C);
  ------------------
  |  |  755|   167k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 1.69k, False: 165k]
  |  |  |  Branch (755:28): [True: 2.64k, False: 162k]
  |  |  ------------------
  |  |  756|   167k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   167k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   167k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  767|   167k|    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  768|   167k|    CLIP_STORE( 1,C);
  ------------------
  |  |  755|   167k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.44k, False: 164k]
  |  |  |  Branch (755:28): [True: 5.01k, False: 159k]
  |  |  ------------------
  |  |  756|   167k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   167k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   167k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  769|   167k|    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  770|   167k|    CLIP_STORE( 2,C);
  ------------------
  |  |  755|   167k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.22k, False: 165k]
  |  |  |  Branch (755:28): [True: 2.88k, False: 162k]
  |  |  ------------------
  |  |  756|   167k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   167k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   167k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  771|   167k|    C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
  772|   167k|    CLIP_STORE( 3,C);
  ------------------
  |  |  755|   167k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 4.12k, False: 163k]
  |  |  |  Branch (755:28): [True: 3.50k, False: 159k]
  |  |  ------------------
  |  |  756|   167k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   167k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   167k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  773|   167k|    C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
  774|   167k|    CLIP_STORE( 4,C);
  ------------------
  |  |  755|   167k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 1.84k, False: 165k]
  |  |  |  Branch (755:28): [True: 4.31k, False: 161k]
  |  |  ------------------
  |  |  756|   167k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   167k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   167k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  775|   167k|    C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
  776|   167k|    CLIP_STORE( 5,C);
  ------------------
  |  |  755|   167k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 3.41k, False: 163k]
  |  |  |  Branch (755:28): [True: 4.28k, False: 159k]
  |  |  ------------------
  |  |  756|   167k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   167k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   167k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  777|   167k|    C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
  778|   167k|    CLIP_STORE( 6,C);
  ------------------
  |  |  755|   167k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.04k, False: 165k]
  |  |  |  Branch (755:28): [True: 2.97k, False: 162k]
  |  |  ------------------
  |  |  756|   167k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   167k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   167k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  779|   167k|    C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
  780|   167k|    CLIP_STORE( 7,C);
  ------------------
  |  |  755|   167k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.71k, False: 164k]
  |  |  |  Branch (755:28): [True: 4.56k, False: 159k]
  |  |  ------------------
  |  |  756|   167k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   167k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   167k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  781|   167k|    C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
  782|   167k|    CLIP_STORE( 8,C);
  ------------------
  |  |  755|   167k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.93k, False: 164k]
  |  |  |  Branch (755:28): [True: 2.97k, False: 161k]
  |  |  ------------------
  |  |  756|   167k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   167k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   167k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  783|   167k|    C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
  784|   167k|    CLIP_STORE( 9,C);
  ------------------
  |  |  755|   167k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 4.74k, False: 162k]
  |  |  |  Branch (755:28): [True: 3.78k, False: 158k]
  |  |  ------------------
  |  |  756|   167k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   167k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   167k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  785|   167k|    C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
  786|   167k|    CLIP_STORE(10,C);
  ------------------
  |  |  755|   167k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.88k, False: 164k]
  |  |  |  Branch (755:28): [True: 3.04k, False: 161k]
  |  |  ------------------
  |  |  756|   167k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   167k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   167k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  787|   167k|    C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
  788|   167k|    CLIP_STORE(11,C);
  ------------------
  |  |  755|   167k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 4.50k, False: 162k]
  |  |  |  Branch (755:28): [True: 3.41k, False: 159k]
  |  |  ------------------
  |  |  756|   167k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   167k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   167k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  789|   167k|    C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
  790|   167k|    CLIP_STORE(12,C);
  ------------------
  |  |  755|   167k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.43k, False: 164k]
  |  |  |  Branch (755:28): [True: 3.44k, False: 161k]
  |  |  ------------------
  |  |  756|   167k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   167k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   167k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  791|   167k|    C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
  792|   167k|    CLIP_STORE(13,C);
  ------------------
  |  |  755|   167k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 1.86k, False: 165k]
  |  |  |  Branch (755:28): [True: 3.80k, False: 161k]
  |  |  ------------------
  |  |  756|   167k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   167k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   167k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  793|   167k|    C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
  794|   167k|    CLIP_STORE(14,C);
  ------------------
  |  |  755|   167k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 1.80k, False: 165k]
  |  |  |  Branch (755:28): [True: 3.25k, False: 162k]
  |  |  ------------------
  |  |  756|   167k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   167k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   167k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  795|   167k|    C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
  796|   167k|    CLIP_STORE(15,C);
  ------------------
  |  |  755|   167k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.48k, False: 164k]
  |  |  |  Branch (755:28): [True: 3.65k, False: 161k]
  |  |  ------------------
  |  |  756|   167k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   167k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  156|   167k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  797|   167k|    Src += BpS;
  798|   167k|    Dst += BpS;
  799|   167k|  }
  800|       |#else
  801|       |  while(H-->0) {
  802|       |    int C;
  803|       |    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  804|       |    CLIP_STORE(0,C);
  805|       |    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  806|       |    CLIP_STORE(1,C);
  807|       |    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  808|       |    CLIP_STORE(2,C);
  809|       |    C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
  810|       |    CLIP_STORE(3,C);
  811|       |    C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
  812|       |    CLIP_STORE(4,C);
  813|       |    C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];
  814|       |    CLIP_STORE(5,C);
  815|       |    C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];
  816|       |    CLIP_STORE(6,C);
  817|       |    C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];
  818|       |    CLIP_STORE(7,C);
  819|       |    Src += BpS;
  820|       |    Dst += BpS;
  821|       |  }
  822|       |#endif
  823|  10.4k|}
qpel.c:V_Pass_16_Add_C:
  837|  9.70k|{
  838|  9.70k|#if (SIZE==16)
  839|   165k|  while(H-->0) {
  ------------------
  |  Branch (839:9): [True: 155k, False: 9.70k]
  ------------------
  840|   155k|    int C;
  841|   155k|    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  842|   155k|    CLIP_STORE(Dst[BpS* 0],C);
  ------------------
  |  |  832|   155k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.06k, False: 153k]
  |  |  |  Branch (832:28): [True: 1.97k, False: 151k]
  |  |  ------------------
  |  |  833|   155k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   155k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  843|   155k|    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  844|   155k|    CLIP_STORE(Dst[BpS* 1],C);
  ------------------
  |  |  832|   155k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 4.28k, False: 151k]
  |  |  |  Branch (832:28): [True: 2.55k, False: 148k]
  |  |  ------------------
  |  |  833|   155k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   155k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  845|   155k|    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  846|   155k|    CLIP_STORE(Dst[BpS* 2],C);
  ------------------
  |  |  832|   155k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.00k, False: 153k]
  |  |  |  Branch (832:28): [True: 2.22k, False: 151k]
  |  |  ------------------
  |  |  833|   155k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   155k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  847|   155k|    C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]);
  848|   155k|    CLIP_STORE(Dst[BpS* 3],C);
  ------------------
  |  |  832|   155k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 4.57k, False: 150k]
  |  |  |  Branch (832:28): [True: 2.47k, False: 148k]
  |  |  ------------------
  |  |  833|   155k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   155k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  849|   155k|    C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]);
  850|   155k|    CLIP_STORE(Dst[BpS* 4],C);
  ------------------
  |  |  832|   155k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 5.37k, False: 149k]
  |  |  |  Branch (832:28): [True: 2.73k, False: 147k]
  |  |  ------------------
  |  |  833|   155k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   155k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  851|   155k|    C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]);
  852|   155k|    CLIP_STORE(Dst[BpS* 5],C);
  ------------------
  |  |  832|   155k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.61k, False: 152k]
  |  |  |  Branch (832:28): [True: 2.38k, False: 150k]
  |  |  ------------------
  |  |  833|   155k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   155k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  853|   155k|    C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]);
  854|   155k|    CLIP_STORE(Dst[BpS* 6],C);
  ------------------
  |  |  832|   155k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 6.47k, False: 148k]
  |  |  |  Branch (832:28): [True: 2.46k, False: 146k]
  |  |  ------------------
  |  |  833|   155k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   155k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  855|   155k|    C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]);
  856|   155k|    CLIP_STORE(Dst[BpS* 7],C);
  ------------------
  |  |  832|   155k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.12k, False: 153k]
  |  |  |  Branch (832:28): [True: 2.48k, False: 150k]
  |  |  ------------------
  |  |  833|   155k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   155k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  857|   155k|    C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]);
  858|   155k|    CLIP_STORE(Dst[BpS* 8],C);
  ------------------
  |  |  832|   155k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.84k, False: 152k]
  |  |  |  Branch (832:28): [True: 2.50k, False: 149k]
  |  |  ------------------
  |  |  833|   155k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   155k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  859|   155k|    C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]);
  860|   155k|    CLIP_STORE(Dst[BpS* 9],C);
  ------------------
  |  |  832|   155k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 3.30k, False: 152k]
  |  |  |  Branch (832:28): [True: 2.50k, False: 149k]
  |  |  ------------------
  |  |  833|   155k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   155k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  861|   155k|    C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]);
  862|   155k|    CLIP_STORE(Dst[BpS*10],C);
  ------------------
  |  |  832|   155k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 5.18k, False: 150k]
  |  |  |  Branch (832:28): [True: 2.49k, False: 147k]
  |  |  ------------------
  |  |  833|   155k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   155k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  863|   155k|    C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]);
  864|   155k|    CLIP_STORE(Dst[BpS*11],C);
  ------------------
  |  |  832|   155k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.70k, False: 152k]
  |  |  |  Branch (832:28): [True: 2.84k, False: 149k]
  |  |  ------------------
  |  |  833|   155k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   155k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  865|   155k|    C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]);
  866|   155k|    CLIP_STORE(Dst[BpS*12],C);
  ------------------
  |  |  832|   155k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 4.50k, False: 150k]
  |  |  |  Branch (832:28): [True: 2.52k, False: 148k]
  |  |  ------------------
  |  |  833|   155k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   155k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  867|   155k|    C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16];
  868|   155k|    CLIP_STORE(Dst[BpS*13],C);
  ------------------
  |  |  832|   155k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.06k, False: 153k]
  |  |  |  Branch (832:28): [True: 2.18k, False: 151k]
  |  |  ------------------
  |  |  833|   155k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   155k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  869|   155k|    C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15];
  870|   155k|    CLIP_STORE(Dst[BpS*14],C);
  ------------------
  |  |  832|   155k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.69k, False: 152k]
  |  |  |  Branch (832:28): [True: 3.44k, False: 149k]
  |  |  ------------------
  |  |  833|   155k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   155k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  871|   155k|    C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16];
  872|   155k|    CLIP_STORE(Dst[BpS*15],C);
  ------------------
  |  |  832|   155k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 1.73k, False: 153k]
  |  |  |  Branch (832:28): [True: 3.31k, False: 150k]
  |  |  ------------------
  |  |  833|   155k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  156|   155k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  873|   155k|    Src += 1;
  874|   155k|    Dst += 1;
  875|   155k|  }
  876|       |#else
  877|       |  while(H-->0) {
  878|       |    int C;
  879|       |    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  880|       |    CLIP_STORE(Dst[BpS*0],C);
  881|       |    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  882|       |    CLIP_STORE(Dst[BpS*1],C);
  883|       |    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  884|       |    CLIP_STORE(Dst[BpS*2],C);
  885|       |    C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]);
  886|       |    CLIP_STORE(Dst[BpS*3],C);
  887|       |    C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]);
  888|       |    CLIP_STORE(Dst[BpS*4],C);
  889|       |    C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8];
  890|       |    CLIP_STORE(Dst[BpS*5],C);
  891|       |    C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7];
  892|       |    CLIP_STORE(Dst[BpS*6],C);
  893|       |    C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8];
  894|       |    CLIP_STORE(Dst[BpS*7],C);
  895|       |    Src += 1;
  896|       |    Dst += 1;
  897|       |  }
  898|       |#endif
  899|  9.70k|}
qpel.c:V_Pass_Avrg_16_Add_C:
  909|  11.8k|{
  910|  11.8k|#if (SIZE==16)
  911|   201k|  while(H-->0) {
  ------------------
  |  Branch (911:9): [True: 189k, False: 11.8k]
  ------------------
  912|   189k|    int C;
  913|   189k|    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  914|   189k|    CLIP_STORE(0,C);
  ------------------
  |  |  903|   189k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.84k, False: 188k]
  |  |  |  Branch (903:28): [True: 3.70k, False: 184k]
  |  |  ------------------
  |  |  904|   189k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   189k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   189k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  915|   189k|    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  916|   189k|    CLIP_STORE( 1,C);
  ------------------
  |  |  903|   189k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 2.77k, False: 187k]
  |  |  |  Branch (903:28): [True: 3.60k, False: 183k]
  |  |  ------------------
  |  |  904|   189k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   189k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   189k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  917|   189k|    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  918|   189k|    CLIP_STORE( 2,C);
  ------------------
  |  |  903|   189k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 2.45k, False: 187k]
  |  |  |  Branch (903:28): [True: 3.89k, False: 183k]
  |  |  ------------------
  |  |  904|   189k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   189k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   189k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  919|   189k|    C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]);
  920|   189k|    CLIP_STORE( 3,C);
  ------------------
  |  |  903|   189k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 3.15k, False: 186k]
  |  |  |  Branch (903:28): [True: 3.79k, False: 183k]
  |  |  ------------------
  |  |  904|   189k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   189k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   189k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  921|   189k|    C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]);
  922|   189k|    CLIP_STORE( 4,C);
  ------------------
  |  |  903|   189k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 4.12k, False: 185k]
  |  |  |  Branch (903:28): [True: 4.29k, False: 181k]
  |  |  ------------------
  |  |  904|   189k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   189k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   189k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  923|   189k|    C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]);
  924|   189k|    CLIP_STORE( 5,C);
  ------------------
  |  |  903|   189k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 2.53k, False: 187k]
  |  |  |  Branch (903:28): [True: 3.30k, False: 184k]
  |  |  ------------------
  |  |  904|   189k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   189k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   189k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  925|   189k|    C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]);
  926|   189k|    CLIP_STORE( 6,C);
  ------------------
  |  |  903|   189k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 4.02k, False: 185k]
  |  |  |  Branch (903:28): [True: 3.96k, False: 181k]
  |  |  ------------------
  |  |  904|   189k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   189k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   189k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  927|   189k|    C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]);
  928|   189k|    CLIP_STORE( 7,C);
  ------------------
  |  |  903|   189k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 2.17k, False: 187k]
  |  |  |  Branch (903:28): [True: 3.37k, False: 184k]
  |  |  ------------------
  |  |  904|   189k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   189k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   189k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  929|   189k|    C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]);
  930|   189k|    CLIP_STORE( 8,C);
  ------------------
  |  |  903|   189k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 2.81k, False: 187k]
  |  |  |  Branch (903:28): [True: 4.40k, False: 182k]
  |  |  ------------------
  |  |  904|   189k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   189k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   189k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  931|   189k|    C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]);
  932|   189k|    CLIP_STORE( 9,C);
  ------------------
  |  |  903|   189k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 2.41k, False: 187k]
  |  |  |  Branch (903:28): [True: 4.01k, False: 183k]
  |  |  ------------------
  |  |  904|   189k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   189k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   189k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  933|   189k|    C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]);
  934|   189k|    CLIP_STORE(10,C);
  ------------------
  |  |  903|   189k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 3.06k, False: 186k]
  |  |  |  Branch (903:28): [True: 4.66k, False: 182k]
  |  |  ------------------
  |  |  904|   189k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   189k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   189k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  935|   189k|    C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]);
  936|   189k|    CLIP_STORE(11,C);
  ------------------
  |  |  903|   189k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 3.63k, False: 186k]
  |  |  |  Branch (903:28): [True: 4.99k, False: 181k]
  |  |  ------------------
  |  |  904|   189k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   189k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   189k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  937|   189k|    C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]);
  938|   189k|    CLIP_STORE(12,C);
  ------------------
  |  |  903|   189k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 6.74k, False: 183k]
  |  |  |  Branch (903:28): [True: 4.71k, False: 178k]
  |  |  ------------------
  |  |  904|   189k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   189k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   189k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  939|   189k|    C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16];
  940|   189k|    CLIP_STORE(13,C);
  ------------------
  |  |  903|   189k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 3.01k, False: 186k]
  |  |  |  Branch (903:28): [True: 3.15k, False: 183k]
  |  |  ------------------
  |  |  904|   189k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   189k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   189k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  941|   189k|    C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15];
  942|   189k|    CLIP_STORE(14,C);
  ------------------
  |  |  903|   189k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 6.41k, False: 183k]
  |  |  |  Branch (903:28): [True: 4.58k, False: 178k]
  |  |  ------------------
  |  |  904|   189k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   189k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   189k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  943|   189k|    C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16];
  944|   189k|    CLIP_STORE(15,C);
  ------------------
  |  |  903|   189k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 1.68k, False: 188k]
  |  |  |  Branch (903:28): [True: 2.75k, False: 185k]
  |  |  ------------------
  |  |  904|   189k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   189k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   189k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  945|   189k|    Src += 1;
  946|   189k|    Dst += 1;
  947|   189k|  }
  948|       |#else
  949|       |  while(H-->0) {
  950|       |    int C;
  951|       |    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  952|       |    CLIP_STORE(0,C);
  953|       |    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  954|       |    CLIP_STORE(1,C);
  955|       |    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  956|       |    CLIP_STORE(2,C);
  957|       |    C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]);
  958|       |    CLIP_STORE(3,C);
  959|       |    C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]);
  960|       |    CLIP_STORE(4,C);
  961|       |    C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8];
  962|       |    CLIP_STORE(5,C);
  963|       |    C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7];
  964|       |    CLIP_STORE(6,C);
  965|       |    C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8];
  966|       |    CLIP_STORE(7,C);
  967|       |    Src += 1;
  968|       |    Dst += 1;
  969|       |  }
  970|       |#endif
  971|  11.8k|}
qpel.c:V_Pass_Avrg_Up_16_Add_C:
  981|  14.5k|{
  982|  14.5k|#if (SIZE==16)
  983|   247k|  while(H-->0) {
  ------------------
  |  Branch (983:9): [True: 232k, False: 14.5k]
  ------------------
  984|   232k|    int C;
  985|   232k|    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  986|   232k|    CLIP_STORE(0,C);
  ------------------
  |  |  975|   232k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.34k, False: 230k]
  |  |  |  Branch (975:28): [True: 4.56k, False: 226k]
  |  |  ------------------
  |  |  976|   232k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   232k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   232k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  987|   232k|    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  988|   232k|    CLIP_STORE( 1,C);
  ------------------
  |  |  975|   232k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 9.01k, False: 223k]
  |  |  |  Branch (975:28): [True: 5.59k, False: 218k]
  |  |  ------------------
  |  |  976|   232k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   232k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   232k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  989|   232k|    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  990|   232k|    CLIP_STORE( 2,C);
  ------------------
  |  |  975|   232k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.96k, False: 230k]
  |  |  |  Branch (975:28): [True: 4.31k, False: 225k]
  |  |  ------------------
  |  |  976|   232k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   232k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   232k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  991|   232k|    C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]);
  992|   232k|    CLIP_STORE( 3,C);
  ------------------
  |  |  975|   232k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 9.27k, False: 223k]
  |  |  |  Branch (975:28): [True: 5.75k, False: 217k]
  |  |  ------------------
  |  |  976|   232k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   232k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   232k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  993|   232k|    C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]);
  994|   232k|    CLIP_STORE( 4,C);
  ------------------
  |  |  975|   232k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.89k, False: 230k]
  |  |  |  Branch (975:28): [True: 5.63k, False: 224k]
  |  |  ------------------
  |  |  976|   232k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   232k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   232k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  995|   232k|    C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]);
  996|   232k|    CLIP_STORE( 5,C);
  ------------------
  |  |  975|   232k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 4.47k, False: 228k]
  |  |  |  Branch (975:28): [True: 6.38k, False: 222k]
  |  |  ------------------
  |  |  976|   232k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   232k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   232k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  997|   232k|    C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]);
  998|   232k|    CLIP_STORE( 6,C);
  ------------------
  |  |  975|   232k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 3.46k, False: 229k]
  |  |  |  Branch (975:28): [True: 5.26k, False: 224k]
  |  |  ------------------
  |  |  976|   232k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   232k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   232k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  999|   232k|    C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]);
 1000|   232k|    CLIP_STORE( 7,C);
  ------------------
  |  |  975|   232k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 5.63k, False: 227k]
  |  |  |  Branch (975:28): [True: 6.03k, False: 221k]
  |  |  ------------------
  |  |  976|   232k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   232k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   232k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1001|   232k|    C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]);
 1002|   232k|    CLIP_STORE( 8,C);
  ------------------
  |  |  975|   232k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 3.22k, False: 229k]
  |  |  |  Branch (975:28): [True: 5.13k, False: 224k]
  |  |  ------------------
  |  |  976|   232k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   232k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   232k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1003|   232k|    C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]);
 1004|   232k|    CLIP_STORE( 9,C);
  ------------------
  |  |  975|   232k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 4.55k, False: 228k]
  |  |  |  Branch (975:28): [True: 6.68k, False: 221k]
  |  |  ------------------
  |  |  976|   232k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   232k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   232k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1005|   232k|    C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]);
 1006|   232k|    CLIP_STORE(10,C);
  ------------------
  |  |  975|   232k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 3.75k, False: 229k]
  |  |  |  Branch (975:28): [True: 5.86k, False: 223k]
  |  |  ------------------
  |  |  976|   232k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   232k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   232k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1007|   232k|    C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]);
 1008|   232k|    CLIP_STORE(11,C);
  ------------------
  |  |  975|   232k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 5.29k, False: 227k]
  |  |  |  Branch (975:28): [True: 6.89k, False: 220k]
  |  |  ------------------
  |  |  976|   232k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   232k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   232k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1009|   232k|    C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]);
 1010|   232k|    CLIP_STORE(12,C);
  ------------------
  |  |  975|   232k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 2.93k, False: 230k]
  |  |  |  Branch (975:28): [True: 6.45k, False: 223k]
  |  |  ------------------
  |  |  976|   232k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   232k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   232k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1011|   232k|    C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16];
 1012|   232k|    CLIP_STORE(13,C);
  ------------------
  |  |  975|   232k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 3.57k, False: 229k]
  |  |  |  Branch (975:28): [True: 6.20k, False: 223k]
  |  |  ------------------
  |  |  976|   232k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   232k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   232k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1013|   232k|    C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15];
 1014|   232k|    CLIP_STORE(14,C);
  ------------------
  |  |  975|   232k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 3.79k, False: 229k]
  |  |  |  Branch (975:28): [True: 6.48k, False: 222k]
  |  |  ------------------
  |  |  976|   232k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   232k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   232k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1015|   232k|    C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16];
 1016|   232k|    CLIP_STORE(15,C);
  ------------------
  |  |  975|   232k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 1.86k, False: 231k]
  |  |  |  Branch (975:28): [True: 6.71k, False: 224k]
  |  |  ------------------
  |  |  976|   232k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   232k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  156|   232k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1017|   232k|    Src += 1;
 1018|   232k|    Dst += 1;
 1019|   232k|  }
 1020|       |#else
 1021|       |  while(H-->0) {
 1022|       |    int C;
 1023|       |    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
 1024|       |    CLIP_STORE(0,C);
 1025|       |    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
 1026|       |    CLIP_STORE(1,C);
 1027|       |    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
 1028|       |    CLIP_STORE(2,C);
 1029|       |    C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]);
 1030|       |    CLIP_STORE(3,C);
 1031|       |    C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]);
 1032|       |    CLIP_STORE(4,C);
 1033|       |    C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8];
 1034|       |    CLIP_STORE(5,C);
 1035|       |    C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7];
 1036|       |    CLIP_STORE(6,C);
 1037|       |    C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8];
 1038|       |    CLIP_STORE(7,C);
 1039|       |    Src += 1;
 1040|       |    Dst += 1;
 1041|       |  }
 1042|       |#endif
 1043|  14.5k|}
qpel.c:H_Pass_8_Add_C:
  617|  23.4k|{
  618|       |#if (SIZE==16)
  619|       |  while(H-->0) {
  620|       |    int C;
  621|       |    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  622|       |    CLIP_STORE(Dst[ 0],C);
  623|       |    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  624|       |    CLIP_STORE(Dst[ 1],C);
  625|       |    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  626|       |    CLIP_STORE(Dst[ 2],C);
  627|       |    C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
  628|       |    CLIP_STORE(Dst[ 3],C);
  629|       |    C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
  630|       |    CLIP_STORE(Dst[ 4],C);
  631|       |    C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
  632|       |    CLIP_STORE(Dst[ 5],C);
  633|       |    C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
  634|       |    CLIP_STORE(Dst[ 6],C);
  635|       |    C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
  636|       |    CLIP_STORE(Dst[ 7],C);
  637|       |    C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
  638|       |    CLIP_STORE(Dst[ 8],C);
  639|       |    C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
  640|       |    CLIP_STORE(Dst[ 9],C);
  641|       |    C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
  642|       |    CLIP_STORE(Dst[10],C);
  643|       |    C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
  644|       |    CLIP_STORE(Dst[11],C);
  645|       |    C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
  646|       |    CLIP_STORE(Dst[12],C);
  647|       |    C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
  648|       |    CLIP_STORE(Dst[13],C);
  649|       |    C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
  650|       |    CLIP_STORE(Dst[14],C);
  651|       |    C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
  652|       |    CLIP_STORE(Dst[15],C);
  653|       |    Src += BpS;
  654|       |    Dst += BpS;
  655|       |  }
  656|       |#else
  657|   210k|  while(H-->0) {
  ------------------
  |  Branch (657:9): [True: 187k, False: 23.4k]
  ------------------
  658|   187k|    int C;
  659|   187k|    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  660|   187k|    CLIP_STORE(Dst[0],C);
  ------------------
  |  |  612|   187k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 1.56k, False: 185k]
  |  |  |  Branch (612:28): [True: 4.05k, False: 181k]
  |  |  ------------------
  |  |  613|   187k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   187k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  661|   187k|    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  662|   187k|    CLIP_STORE(Dst[1],C);
  ------------------
  |  |  612|   187k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.82k, False: 184k]
  |  |  |  Branch (612:28): [True: 5.75k, False: 178k]
  |  |  ------------------
  |  |  613|   187k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   187k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  663|   187k|    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  664|   187k|    CLIP_STORE(Dst[2],C);
  ------------------
  |  |  612|   187k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 1.56k, False: 185k]
  |  |  |  Branch (612:28): [True: 3.57k, False: 182k]
  |  |  ------------------
  |  |  613|   187k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   187k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  665|   187k|    C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
  666|   187k|    CLIP_STORE(Dst[3],C);
  ------------------
  |  |  612|   187k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 3.38k, False: 184k]
  |  |  |  Branch (612:28): [True: 4.95k, False: 179k]
  |  |  ------------------
  |  |  613|   187k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   187k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  667|   187k|    C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
  668|   187k|    CLIP_STORE(Dst[4],C);
  ------------------
  |  |  612|   187k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.30k, False: 185k]
  |  |  |  Branch (612:28): [True: 4.81k, False: 180k]
  |  |  ------------------
  |  |  613|   187k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   187k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  669|   187k|    C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];
  670|   187k|    CLIP_STORE(Dst[5],C);
  ------------------
  |  |  612|   187k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.26k, False: 185k]
  |  |  |  Branch (612:28): [True: 4.67k, False: 180k]
  |  |  ------------------
  |  |  613|   187k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   187k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  671|   187k|    C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];
  672|   187k|    CLIP_STORE(Dst[6],C);
  ------------------
  |  |  612|   187k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 2.33k, False: 185k]
  |  |  |  Branch (612:28): [True: 4.61k, False: 180k]
  |  |  ------------------
  |  |  613|   187k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   187k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  673|   187k|    C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];
  674|   187k|    CLIP_STORE(Dst[7],C);
  ------------------
  |  |  612|   187k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (612:7): [True: 1.99k, False: 185k]
  |  |  |  Branch (612:28): [True: 4.77k, False: 180k]
  |  |  ------------------
  |  |  613|   187k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   187k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  675|   187k|    Src += BpS;
  676|   187k|    Dst += BpS;
  677|   187k|  }
  678|  23.4k|#endif
  679|  23.4k|}
qpel.c:H_Pass_Avrg_8_Add_C:
  689|  37.2k|{
  690|       |#if (SIZE==16)
  691|       |  while(H-->0) {
  692|       |    int C;
  693|       |    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  694|       |    CLIP_STORE(0,C);
  695|       |    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  696|       |    CLIP_STORE( 1,C);
  697|       |    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  698|       |    CLIP_STORE( 2,C);
  699|       |    C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
  700|       |    CLIP_STORE( 3,C);
  701|       |    C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
  702|       |    CLIP_STORE( 4,C);
  703|       |    C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
  704|       |    CLIP_STORE( 5,C);
  705|       |    C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
  706|       |    CLIP_STORE( 6,C);
  707|       |    C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
  708|       |    CLIP_STORE( 7,C);
  709|       |    C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
  710|       |    CLIP_STORE( 8,C);
  711|       |    C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
  712|       |    CLIP_STORE( 9,C);
  713|       |    C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
  714|       |    CLIP_STORE(10,C);
  715|       |    C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
  716|       |    CLIP_STORE(11,C);
  717|       |    C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
  718|       |    CLIP_STORE(12,C);
  719|       |    C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
  720|       |    CLIP_STORE(13,C);
  721|       |    C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
  722|       |    CLIP_STORE(14,C);
  723|       |    C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
  724|       |    CLIP_STORE(15,C);
  725|       |    Src += BpS;
  726|       |    Dst += BpS;
  727|       |  }
  728|       |#else
  729|   335k|  while(H-->0) {
  ------------------
  |  Branch (729:9): [True: 297k, False: 37.2k]
  ------------------
  730|   297k|    int C;
  731|   297k|    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  732|   297k|    CLIP_STORE(0,C);
  ------------------
  |  |  683|   297k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.41k, False: 295k]
  |  |  |  Branch (683:28): [True: 8.57k, False: 286k]
  |  |  ------------------
  |  |  684|   297k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   297k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   297k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  733|   297k|    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  734|   297k|    CLIP_STORE(1,C);
  ------------------
  |  |  683|   297k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.76k, False: 295k]
  |  |  |  Branch (683:28): [True: 8.89k, False: 286k]
  |  |  ------------------
  |  |  684|   297k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   297k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   297k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  735|   297k|    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  736|   297k|    CLIP_STORE(2,C);
  ------------------
  |  |  683|   297k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.99k, False: 294k]
  |  |  |  Branch (683:28): [True: 8.31k, False: 286k]
  |  |  ------------------
  |  |  684|   297k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   297k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   297k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  737|   297k|    C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
  738|   297k|    CLIP_STORE(3,C);
  ------------------
  |  |  683|   297k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.83k, False: 294k]
  |  |  |  Branch (683:28): [True: 8.31k, False: 286k]
  |  |  ------------------
  |  |  684|   297k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   297k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   297k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  739|   297k|    C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
  740|   297k|    CLIP_STORE(4,C);
  ------------------
  |  |  683|   297k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 6.22k, False: 291k]
  |  |  |  Branch (683:28): [True: 8.93k, False: 282k]
  |  |  ------------------
  |  |  684|   297k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   297k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   297k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  741|   297k|    C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];
  742|   297k|    CLIP_STORE(5,C);
  ------------------
  |  |  683|   297k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 2.50k, False: 295k]
  |  |  |  Branch (683:28): [True: 5.74k, False: 289k]
  |  |  ------------------
  |  |  684|   297k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   297k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   297k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  743|   297k|    C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];
  744|   297k|    CLIP_STORE(6,C);
  ------------------
  |  |  683|   297k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 6.77k, False: 291k]
  |  |  |  Branch (683:28): [True: 9.13k, False: 281k]
  |  |  ------------------
  |  |  684|   297k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   297k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   297k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  745|   297k|    C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];
  746|   297k|    CLIP_STORE(7,C);
  ------------------
  |  |  683|   297k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (683:7): [True: 1.76k, False: 296k]
  |  |  |  Branch (683:28): [True: 5.82k, False: 290k]
  |  |  ------------------
  |  |  684|   297k|  C = (C+Src[i]+1-RND) >> 1;  \
  |  |  685|   297k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   297k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  747|   297k|    Src += BpS;
  748|   297k|    Dst += BpS;
  749|   297k|  }
  750|  37.2k|#endif
  751|  37.2k|}
qpel.c:H_Pass_Avrg_Up_8_Add_C:
  761|  33.1k|{
  762|       |#if (SIZE==16)
  763|       |  while(H-->0) {
  764|       |    int C;
  765|       |    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  766|       |    CLIP_STORE(0,C);
  767|       |    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  768|       |    CLIP_STORE( 1,C);
  769|       |    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  770|       |    CLIP_STORE( 2,C);
  771|       |    C = 16-RND - (Src[0]+Src[7 ]) + 3*(Src[ 1]+Src[ 6])-6*(Src[ 2]+Src[ 5]) + 20*(Src[ 3]+Src[ 4]);
  772|       |    CLIP_STORE( 3,C);
  773|       |    C = 16-RND - (Src[1]+Src[8 ]) + 3*(Src[ 2]+Src[ 7])-6*(Src[ 3]+Src[ 6]) + 20*(Src[ 4]+Src[ 5]);
  774|       |    CLIP_STORE( 4,C);
  775|       |    C = 16-RND - (Src[2]+Src[9 ]) + 3*(Src[ 3]+Src[ 8])-6*(Src[ 4]+Src[ 7]) + 20*(Src[ 5]+Src[ 6]);
  776|       |    CLIP_STORE( 5,C);
  777|       |    C = 16-RND - (Src[3]+Src[10]) + 3*(Src[ 4]+Src[ 9])-6*(Src[ 5]+Src[ 8]) + 20*(Src[ 6]+Src[ 7]);
  778|       |    CLIP_STORE( 6,C);
  779|       |    C = 16-RND - (Src[4]+Src[11]) + 3*(Src[ 5]+Src[10])-6*(Src[ 6]+Src[ 9]) + 20*(Src[ 7]+Src[ 8]);
  780|       |    CLIP_STORE( 7,C);
  781|       |    C = 16-RND - (Src[5]+Src[12]) + 3*(Src[ 6]+Src[11])-6*(Src[ 7]+Src[10]) + 20*(Src[ 8]+Src[ 9]);
  782|       |    CLIP_STORE( 8,C);
  783|       |    C = 16-RND - (Src[6]+Src[13]) + 3*(Src[ 7]+Src[12])-6*(Src[ 8]+Src[11]) + 20*(Src[ 9]+Src[10]);
  784|       |    CLIP_STORE( 9,C);
  785|       |    C = 16-RND - (Src[7]+Src[14]) + 3*(Src[ 8]+Src[13])-6*(Src[ 9]+Src[12]) + 20*(Src[10]+Src[11]);
  786|       |    CLIP_STORE(10,C);
  787|       |    C = 16-RND - (Src[8]+Src[15]) + 3*(Src[ 9]+Src[14])-6*(Src[10]+Src[13]) + 20*(Src[11]+Src[12]);
  788|       |    CLIP_STORE(11,C);
  789|       |    C = 16-RND - (Src[9]+Src[16]) + 3*(Src[10]+Src[15])-6*(Src[11]+Src[14]) + 20*(Src[12]+Src[13]);
  790|       |    CLIP_STORE(12,C);
  791|       |    C = 16-RND - Src[10] +3*Src[11] -6*(Src[12]+Src[15]) + 20*(Src[13]+Src[14]) +2*Src[16];
  792|       |    CLIP_STORE(13,C);
  793|       |    C = 16-RND - Src[11] +3*(Src[12]-Src[16]) -6*Src[13] + 20*Src[14] + 19*Src[15];
  794|       |    CLIP_STORE(14,C);
  795|       |    C = 16-RND - Src[12] +3*Src[13] -7*Src[14] + 23*Src[15] + 14*Src[16];
  796|       |    CLIP_STORE(15,C);
  797|       |    Src += BpS;
  798|       |    Dst += BpS;
  799|       |  }
  800|       |#else
  801|   298k|  while(H-->0) {
  ------------------
  |  Branch (801:9): [True: 265k, False: 33.1k]
  ------------------
  802|   265k|    int C;
  803|   265k|    C = 16-RND +14*Src[0] +23*Src[1] - 7*Src[2] + 3*Src[3] -   Src[4];
  804|   265k|    CLIP_STORE(0,C);
  ------------------
  |  |  755|   265k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 1.56k, False: 263k]
  |  |  |  Branch (755:28): [True: 4.88k, False: 258k]
  |  |  ------------------
  |  |  756|   265k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   265k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   265k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  805|   265k|    C = 16-RND - 3*(Src[0]-Src[4]) +19*Src[1] +20*Src[2] - 6*Src[3] - Src[5];
  806|   265k|    CLIP_STORE(1,C);
  ------------------
  |  |  755|   265k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 4.44k, False: 260k]
  |  |  |  Branch (755:28): [True: 7.19k, False: 253k]
  |  |  ------------------
  |  |  756|   265k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   265k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   265k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  807|   265k|    C = 16-RND + 2*Src[0] - 6*(Src[1]+Src[4]) +20*(Src[2]+Src[3]) + 3*Src[5] - Src[6];
  808|   265k|    CLIP_STORE(2,C);
  ------------------
  |  |  755|   265k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 1.68k, False: 263k]
  |  |  |  Branch (755:28): [True: 4.66k, False: 258k]
  |  |  ------------------
  |  |  756|   265k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   265k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   265k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  809|   265k|    C = 16-RND - (Src[0]+Src[7]) + 3*(Src[1]+Src[6])-6*(Src[2]+Src[5]) + 20*(Src[3]+Src[4]);
  810|   265k|    CLIP_STORE(3,C);
  ------------------
  |  |  755|   265k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 4.98k, False: 260k]
  |  |  |  Branch (755:28): [True: 6.71k, False: 253k]
  |  |  ------------------
  |  |  756|   265k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   265k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   265k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  811|   265k|    C = 16-RND - (Src[1]+Src[8]) + 3*(Src[2]+Src[7])-6*(Src[3]+Src[6]) + 20*(Src[4]+Src[5]);
  812|   265k|    CLIP_STORE(4,C);
  ------------------
  |  |  755|   265k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.07k, False: 263k]
  |  |  |  Branch (755:28): [True: 6.48k, False: 256k]
  |  |  ------------------
  |  |  756|   265k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   265k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   265k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  813|   265k|    C = 16-RND - Src[2] +3*Src[3] -6*(Src[4]+Src[7]) + 20*(Src[5]+Src[6]) +2*Src[8];
  814|   265k|    CLIP_STORE(5,C);
  ------------------
  |  |  755|   265k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.23k, False: 262k]
  |  |  |  Branch (755:28): [True: 6.63k, False: 256k]
  |  |  ------------------
  |  |  756|   265k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   265k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   265k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  815|   265k|    C = 16-RND - Src[3] +3*(Src[4]-Src[8]) -6*Src[5] + 20*Src[6] + 19*Src[7];
  816|   265k|    CLIP_STORE(6,C);
  ------------------
  |  |  755|   265k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.39k, False: 262k]
  |  |  |  Branch (755:28): [True: 6.26k, False: 256k]
  |  |  ------------------
  |  |  756|   265k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   265k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   265k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  817|   265k|    C = 16-RND - Src[4] +3*Src[5] -7*Src[6] + 23*Src[7] + 14*Src[8];
  818|   265k|    CLIP_STORE(7,C);
  ------------------
  |  |  755|   265k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (755:7): [True: 2.04k, False: 263k]
  |  |  |  Branch (755:28): [True: 6.35k, False: 256k]
  |  |  ------------------
  |  |  756|   265k|  C = (C+Src[i+1]+1-RND) >> 1;  \
  |  |  757|   265k|  STORE(Dst[i], C)
  |  |  ------------------
  |  |  |  |  185|   265k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  819|   265k|    Src += BpS;
  820|   265k|    Dst += BpS;
  821|   265k|  }
  822|  33.1k|#endif
  823|  33.1k|}
qpel.c:V_Pass_8_Add_C:
  837|  41.7k|{
  838|       |#if (SIZE==16)
  839|       |  while(H-->0) {
  840|       |    int C;
  841|       |    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  842|       |    CLIP_STORE(Dst[BpS* 0],C);
  843|       |    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  844|       |    CLIP_STORE(Dst[BpS* 1],C);
  845|       |    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  846|       |    CLIP_STORE(Dst[BpS* 2],C);
  847|       |    C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]);
  848|       |    CLIP_STORE(Dst[BpS* 3],C);
  849|       |    C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]);
  850|       |    CLIP_STORE(Dst[BpS* 4],C);
  851|       |    C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]);
  852|       |    CLIP_STORE(Dst[BpS* 5],C);
  853|       |    C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]);
  854|       |    CLIP_STORE(Dst[BpS* 6],C);
  855|       |    C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]);
  856|       |    CLIP_STORE(Dst[BpS* 7],C);
  857|       |    C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]);
  858|       |    CLIP_STORE(Dst[BpS* 8],C);
  859|       |    C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]);
  860|       |    CLIP_STORE(Dst[BpS* 9],C);
  861|       |    C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]);
  862|       |    CLIP_STORE(Dst[BpS*10],C);
  863|       |    C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]);
  864|       |    CLIP_STORE(Dst[BpS*11],C);
  865|       |    C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]);
  866|       |    CLIP_STORE(Dst[BpS*12],C);
  867|       |    C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16];
  868|       |    CLIP_STORE(Dst[BpS*13],C);
  869|       |    C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15];
  870|       |    CLIP_STORE(Dst[BpS*14],C);
  871|       |    C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16];
  872|       |    CLIP_STORE(Dst[BpS*15],C);
  873|       |    Src += 1;
  874|       |    Dst += 1;
  875|       |  }
  876|       |#else
  877|   376k|  while(H-->0) {
  ------------------
  |  Branch (877:9): [True: 334k, False: 41.7k]
  ------------------
  878|   334k|    int C;
  879|   334k|    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  880|   334k|    CLIP_STORE(Dst[BpS*0],C);
  ------------------
  |  |  832|   334k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.77k, False: 331k]
  |  |  |  Branch (832:28): [True: 7.96k, False: 323k]
  |  |  ------------------
  |  |  833|   334k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   334k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  881|   334k|    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  882|   334k|    CLIP_STORE(Dst[BpS*1],C);
  ------------------
  |  |  832|   334k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 7.46k, False: 326k]
  |  |  |  Branch (832:28): [True: 10.9k, False: 315k]
  |  |  ------------------
  |  |  833|   334k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   334k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  883|   334k|    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  884|   334k|    CLIP_STORE(Dst[BpS*2],C);
  ------------------
  |  |  832|   334k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 3.35k, False: 330k]
  |  |  |  Branch (832:28): [True: 7.74k, False: 323k]
  |  |  ------------------
  |  |  833|   334k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   334k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  885|   334k|    C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]);
  886|   334k|    CLIP_STORE(Dst[BpS*3],C);
  ------------------
  |  |  832|   334k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 8.07k, False: 326k]
  |  |  |  Branch (832:28): [True: 10.1k, False: 316k]
  |  |  ------------------
  |  |  833|   334k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   334k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  887|   334k|    C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]);
  888|   334k|    CLIP_STORE(Dst[BpS*4],C);
  ------------------
  |  |  832|   334k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 5.69k, False: 328k]
  |  |  |  Branch (832:28): [True: 10.2k, False: 318k]
  |  |  ------------------
  |  |  833|   334k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   334k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  889|   334k|    C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8];
  890|   334k|    CLIP_STORE(Dst[BpS*5],C);
  ------------------
  |  |  832|   334k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 3.28k, False: 331k]
  |  |  |  Branch (832:28): [True: 9.38k, False: 321k]
  |  |  ------------------
  |  |  833|   334k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   334k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  891|   334k|    C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7];
  892|   334k|    CLIP_STORE(Dst[BpS*6],C);
  ------------------
  |  |  832|   334k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 5.08k, False: 329k]
  |  |  |  Branch (832:28): [True: 10.5k, False: 318k]
  |  |  ------------------
  |  |  833|   334k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   334k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  893|   334k|    C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8];
  894|   334k|    CLIP_STORE(Dst[BpS*7],C);
  ------------------
  |  |  832|   334k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (832:7): [True: 2.87k, False: 331k]
  |  |  |  Branch (832:28): [True: 9.58k, False: 321k]
  |  |  ------------------
  |  |  833|   334k|  STORE(D, C)
  |  |  ------------------
  |  |  |  |  185|   334k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  895|   334k|    Src += 1;
  896|   334k|    Dst += 1;
  897|   334k|  }
  898|  41.7k|#endif
  899|  41.7k|}
qpel.c:V_Pass_Avrg_8_Add_C:
  909|  47.4k|{
  910|       |#if (SIZE==16)
  911|       |  while(H-->0) {
  912|       |    int C;
  913|       |    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  914|       |    CLIP_STORE(0,C);
  915|       |    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  916|       |    CLIP_STORE( 1,C);
  917|       |    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  918|       |    CLIP_STORE( 2,C);
  919|       |    C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]);
  920|       |    CLIP_STORE( 3,C);
  921|       |    C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]);
  922|       |    CLIP_STORE( 4,C);
  923|       |    C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]);
  924|       |    CLIP_STORE( 5,C);
  925|       |    C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]);
  926|       |    CLIP_STORE( 6,C);
  927|       |    C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]);
  928|       |    CLIP_STORE( 7,C);
  929|       |    C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]);
  930|       |    CLIP_STORE( 8,C);
  931|       |    C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]);
  932|       |    CLIP_STORE( 9,C);
  933|       |    C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]);
  934|       |    CLIP_STORE(10,C);
  935|       |    C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]);
  936|       |    CLIP_STORE(11,C);
  937|       |    C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]);
  938|       |    CLIP_STORE(12,C);
  939|       |    C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16];
  940|       |    CLIP_STORE(13,C);
  941|       |    C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15];
  942|       |    CLIP_STORE(14,C);
  943|       |    C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16];
  944|       |    CLIP_STORE(15,C);
  945|       |    Src += 1;
  946|       |    Dst += 1;
  947|       |  }
  948|       |#else
  949|   427k|  while(H-->0) {
  ------------------
  |  Branch (949:9): [True: 379k, False: 47.4k]
  ------------------
  950|   379k|    int C;
  951|   379k|    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  952|   379k|    CLIP_STORE(0,C);
  ------------------
  |  |  903|   379k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 3.42k, False: 376k]
  |  |  |  Branch (903:28): [True: 9.30k, False: 366k]
  |  |  ------------------
  |  |  904|   379k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   379k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   379k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  953|   379k|    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  954|   379k|    CLIP_STORE(1,C);
  ------------------
  |  |  903|   379k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 4.94k, False: 374k]
  |  |  |  Branch (903:28): [True: 9.53k, False: 365k]
  |  |  ------------------
  |  |  904|   379k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   379k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   379k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  955|   379k|    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  956|   379k|    CLIP_STORE(2,C);
  ------------------
  |  |  903|   379k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 3.42k, False: 376k]
  |  |  |  Branch (903:28): [True: 9.57k, False: 366k]
  |  |  ------------------
  |  |  904|   379k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   379k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   379k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  957|   379k|    C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]);
  958|   379k|    CLIP_STORE(3,C);
  ------------------
  |  |  903|   379k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 4.83k, False: 374k]
  |  |  |  Branch (903:28): [True: 10.1k, False: 364k]
  |  |  ------------------
  |  |  904|   379k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   379k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   379k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  959|   379k|    C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]);
  960|   379k|    CLIP_STORE(4,C);
  ------------------
  |  |  903|   379k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 11.3k, False: 368k]
  |  |  |  Branch (903:28): [True: 10.0k, False: 358k]
  |  |  ------------------
  |  |  904|   379k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   379k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   379k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  961|   379k|    C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8];
  962|   379k|    CLIP_STORE(5,C);
  ------------------
  |  |  903|   379k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 3.30k, False: 376k]
  |  |  |  Branch (903:28): [True: 6.22k, False: 370k]
  |  |  ------------------
  |  |  904|   379k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   379k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   379k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  963|   379k|    C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7];
  964|   379k|    CLIP_STORE(6,C);
  ------------------
  |  |  903|   379k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 11.4k, False: 368k]
  |  |  |  Branch (903:28): [True: 11.1k, False: 357k]
  |  |  ------------------
  |  |  904|   379k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   379k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   379k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  965|   379k|    C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8];
  966|   379k|    CLIP_STORE(7,C);
  ------------------
  |  |  903|   379k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (903:7): [True: 2.18k, False: 377k]
  |  |  |  Branch (903:28): [True: 6.30k, False: 371k]
  |  |  ------------------
  |  |  904|   379k|  C = (C+Src[BpS*i]+1-RND) >> 1;  \
  |  |  905|   379k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   379k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
  967|   379k|    Src += 1;
  968|   379k|    Dst += 1;
  969|   379k|  }
  970|  47.4k|#endif
  971|  47.4k|}
qpel.c:V_Pass_Avrg_Up_8_Add_C:
  981|  55.6k|{
  982|       |#if (SIZE==16)
  983|       |  while(H-->0) {
  984|       |    int C;
  985|       |    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
  986|       |    CLIP_STORE(0,C);
  987|       |    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
  988|       |    CLIP_STORE( 1,C);
  989|       |    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
  990|       |    CLIP_STORE( 2,C);
  991|       |    C = 16-RND - (Src[BpS*0]+Src[BpS*7 ]) + 3*(Src[BpS* 1]+Src[BpS* 6])-6*(Src[BpS* 2]+Src[BpS* 5]) + 20*(Src[BpS* 3]+Src[BpS* 4]);
  992|       |    CLIP_STORE( 3,C);
  993|       |    C = 16-RND - (Src[BpS*1]+Src[BpS*8 ]) + 3*(Src[BpS* 2]+Src[BpS* 7])-6*(Src[BpS* 3]+Src[BpS* 6]) + 20*(Src[BpS* 4]+Src[BpS* 5]);
  994|       |    CLIP_STORE( 4,C);
  995|       |    C = 16-RND - (Src[BpS*2]+Src[BpS*9 ]) + 3*(Src[BpS* 3]+Src[BpS* 8])-6*(Src[BpS* 4]+Src[BpS* 7]) + 20*(Src[BpS* 5]+Src[BpS* 6]);
  996|       |    CLIP_STORE( 5,C);
  997|       |    C = 16-RND - (Src[BpS*3]+Src[BpS*10]) + 3*(Src[BpS* 4]+Src[BpS* 9])-6*(Src[BpS* 5]+Src[BpS* 8]) + 20*(Src[BpS* 6]+Src[BpS* 7]);
  998|       |    CLIP_STORE( 6,C);
  999|       |    C = 16-RND - (Src[BpS*4]+Src[BpS*11]) + 3*(Src[BpS* 5]+Src[BpS*10])-6*(Src[BpS* 6]+Src[BpS* 9]) + 20*(Src[BpS* 7]+Src[BpS* 8]);
 1000|       |    CLIP_STORE( 7,C);
 1001|       |    C = 16-RND - (Src[BpS*5]+Src[BpS*12]) + 3*(Src[BpS* 6]+Src[BpS*11])-6*(Src[BpS* 7]+Src[BpS*10]) + 20*(Src[BpS* 8]+Src[BpS* 9]);
 1002|       |    CLIP_STORE( 8,C);
 1003|       |    C = 16-RND - (Src[BpS*6]+Src[BpS*13]) + 3*(Src[BpS* 7]+Src[BpS*12])-6*(Src[BpS* 8]+Src[BpS*11]) + 20*(Src[BpS* 9]+Src[BpS*10]);
 1004|       |    CLIP_STORE( 9,C);
 1005|       |    C = 16-RND - (Src[BpS*7]+Src[BpS*14]) + 3*(Src[BpS* 8]+Src[BpS*13])-6*(Src[BpS* 9]+Src[BpS*12]) + 20*(Src[BpS*10]+Src[BpS*11]);
 1006|       |    CLIP_STORE(10,C);
 1007|       |    C = 16-RND - (Src[BpS*8]+Src[BpS*15]) + 3*(Src[BpS* 9]+Src[BpS*14])-6*(Src[BpS*10]+Src[BpS*13]) + 20*(Src[BpS*11]+Src[BpS*12]);
 1008|       |    CLIP_STORE(11,C);
 1009|       |    C = 16-RND - (Src[BpS*9]+Src[BpS*16]) + 3*(Src[BpS*10]+Src[BpS*15])-6*(Src[BpS*11]+Src[BpS*14]) + 20*(Src[BpS*12]+Src[BpS*13]);
 1010|       |    CLIP_STORE(12,C);
 1011|       |    C = 16-RND - Src[BpS*10] +3*Src[BpS*11] -6*(Src[BpS*12]+Src[BpS*15]) + 20*(Src[BpS*13]+Src[BpS*14]) +2*Src[BpS*16];
 1012|       |    CLIP_STORE(13,C);
 1013|       |    C = 16-RND - Src[BpS*11] +3*(Src[BpS*12]-Src[BpS*16]) -6*Src[BpS*13] + 20*Src[BpS*14] + 19*Src[BpS*15];
 1014|       |    CLIP_STORE(14,C);
 1015|       |    C = 16-RND - Src[BpS*12] +3*Src[BpS*13] -7*Src[BpS*14] + 23*Src[BpS*15] + 14*Src[BpS*16];
 1016|       |    CLIP_STORE(15,C);
 1017|       |    Src += 1;
 1018|       |    Dst += 1;
 1019|       |  }
 1020|       |#else
 1021|   500k|  while(H-->0) {
  ------------------
  |  Branch (1021:9): [True: 444k, False: 55.6k]
  ------------------
 1022|   444k|    int C;
 1023|   444k|    C = 16-RND +14*Src[BpS*0] +23*Src[BpS*1] - 7*Src[BpS*2] + 3*Src[BpS*3] -   Src[BpS*4];
 1024|   444k|    CLIP_STORE(0,C);
  ------------------
  |  |  975|   444k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 3.02k, False: 441k]
  |  |  |  Branch (975:28): [True: 6.56k, False: 435k]
  |  |  ------------------
  |  |  976|   444k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   444k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   444k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1025|   444k|    C = 16-RND - 3*(Src[BpS*0]-Src[BpS*4]) +19*Src[BpS*1] +20*Src[BpS*2] - 6*Src[BpS*3] - Src[BpS*5];
 1026|   444k|    CLIP_STORE(1,C);
  ------------------
  |  |  975|   444k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 10.7k, False: 434k]
  |  |  |  Branch (975:28): [True: 10.1k, False: 424k]
  |  |  ------------------
  |  |  976|   444k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   444k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   444k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1027|   444k|    C = 16-RND + 2*Src[BpS*0] - 6*(Src[BpS*1]+Src[BpS*4]) +20*(Src[BpS*2]+Src[BpS*3]) + 3*Src[BpS*5] - Src[BpS*6];
 1028|   444k|    CLIP_STORE(2,C);
  ------------------
  |  |  975|   444k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 3.59k, False: 441k]
  |  |  |  Branch (975:28): [True: 7.20k, False: 434k]
  |  |  ------------------
  |  |  976|   444k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   444k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   444k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1029|   444k|    C = 16-RND - (Src[BpS*0]+Src[BpS*7]) + 3*(Src[BpS*1]+Src[BpS*6])-6*(Src[BpS*2]+Src[BpS*5]) + 20*(Src[BpS*3]+Src[BpS*4]);
 1030|   444k|    CLIP_STORE(3,C);
  ------------------
  |  |  975|   444k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 11.0k, False: 433k]
  |  |  |  Branch (975:28): [True: 10.4k, False: 423k]
  |  |  ------------------
  |  |  976|   444k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   444k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   444k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1031|   444k|    C = 16-RND - (Src[BpS*1]+Src[BpS*8]) + 3*(Src[BpS*2]+Src[BpS*7])-6*(Src[BpS*3]+Src[BpS*6]) + 20*(Src[BpS*4]+Src[BpS*5]);
 1032|   444k|    CLIP_STORE(4,C);
  ------------------
  |  |  975|   444k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 4.65k, False: 440k]
  |  |  |  Branch (975:28): [True: 9.25k, False: 430k]
  |  |  ------------------
  |  |  976|   444k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   444k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   444k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1033|   444k|    C = 16-RND - Src[BpS*2] +3*Src[BpS*3] -6*(Src[BpS*4]+Src[BpS*7]) + 20*(Src[BpS*5]+Src[BpS*6]) +2*Src[BpS*8];
 1034|   444k|    CLIP_STORE(5,C);
  ------------------
  |  |  975|   444k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 4.97k, False: 439k]
  |  |  |  Branch (975:28): [True: 9.58k, False: 430k]
  |  |  ------------------
  |  |  976|   444k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   444k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   444k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1035|   444k|    C = 16-RND - Src[BpS*3] +3*(Src[BpS*4]-Src[BpS*8]) -6*Src[BpS*5] + 20*Src[BpS*6] + 19*Src[BpS*7];
 1036|   444k|    CLIP_STORE(6,C);
  ------------------
  |  |  975|   444k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 4.92k, False: 439k]
  |  |  |  Branch (975:28): [True: 9.51k, False: 430k]
  |  |  ------------------
  |  |  976|   444k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   444k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   444k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1037|   444k|    C = 16-RND - Src[BpS*4] +3*Src[BpS*5] -7*Src[BpS*6] + 23*Src[BpS*7] + 14*Src[BpS*8];
 1038|   444k|    CLIP_STORE(7,C);
  ------------------
  |  |  975|   444k|  if (C<0) C = 0; else if (C>(255<<5)) C = 255; else C = C>>5;  \
  |  |  ------------------
  |  |  |  Branch (975:7): [True: 3.70k, False: 441k]
  |  |  |  Branch (975:28): [True: 9.56k, False: 431k]
  |  |  ------------------
  |  |  976|   444k|  C = (C+Src[BpS*i+BpS]+1-RND) >> 1;  \
  |  |  977|   444k|  STORE(Dst[BpS*i], C)
  |  |  ------------------
  |  |  |  |  185|   444k|#define STORE(d,s)  (d) = ( (s)+(d)+1 ) >> 1
  |  |  ------------------
  ------------------
 1039|   444k|    Src += 1;
 1040|   444k|    Dst += 1;
 1041|   444k|  }
 1042|  55.6k|#endif
 1043|  55.6k|}
qpel.c:Init_FIR_Table:
  400|     34|{
  401|     34|	int i;
  402|  8.73k|	for(i=0; i<256; ++i) {
  ------------------
  |  Branch (402:11): [True: 8.70k, False: 34]
  ------------------
  403|  8.70k|		Tab[i][0] = i*A;
  404|  8.70k|		Tab[i][1] = i*B;
  405|  8.70k|		Tab[i][2] = i*C;
  406|  8.70k|		Tab[i][3] = i*D;
  407|  8.70k|	}
  408|     34|}

decoder.c:interpolate16x16_quarterpel:
  119|   191k|{
  120|   191k|	const uint8_t *src;
  121|   191k|	uint8_t *dst;
  122|   191k|	uint8_t *tmp;
  123|   191k|	int32_t quads;
  124|   191k|	const XVID_QP_FUNCS *Ops;
  125|       |
  126|   191k|	int32_t x_int, y_int;
  127|       |
  128|   191k|	const int32_t xRef = (int)x*4 + dx;
  129|   191k|	const int32_t yRef = (int)y*4 + dy;
  130|       |
  131|   191k|	Ops = xvid_QP_Funcs;
  132|   191k|	quads = (dx&3) | ((dy&3)<<2);
  133|       |
  134|   191k|	x_int = xRef >> 2;
  135|   191k|	y_int = yRef >> 2;
  136|       |
  137|   191k|	dst = cur + y * stride + x;
  138|   191k|	src = refn + y_int * (int)stride + x_int;
  139|       |
  140|   191k|	tmp = refh; /* we need at least a 16 x stride scratch block */
  141|       |
  142|   191k|	switch(quads) {
  ------------------
  |  Branch (142:9): [True: 0, False: 191k]
  ------------------
  143|  71.6k|	case 0:
  ------------------
  |  Branch (143:2): [True: 71.6k, False: 120k]
  ------------------
  144|  71.6k|		transfer8x8_copy(dst, src, stride);
  145|  71.6k|		transfer8x8_copy(dst+8, src+8, stride);
  146|  71.6k|		transfer8x8_copy(dst+8*stride, src+8*stride, stride);
  147|  71.6k|		transfer8x8_copy(dst+8*stride+8, src+8*stride+8, stride);
  148|  71.6k|		break;
  149|  13.1k|	case 1:
  ------------------
  |  Branch (149:2): [True: 13.1k, False: 178k]
  ------------------
  150|  13.1k|		Ops->H_Pass_Avrg(dst, src, 16, stride, rounding);
  151|  13.1k|		break;
  152|  12.1k|	case 2:
  ------------------
  |  Branch (152:2): [True: 12.1k, False: 179k]
  ------------------
  153|  12.1k|		Ops->H_Pass(dst, src, 16, stride, rounding);
  154|  12.1k|		break;
  155|  16.6k|	case 3:
  ------------------
  |  Branch (155:2): [True: 16.6k, False: 175k]
  ------------------
  156|  16.6k|		Ops->H_Pass_Avrg_Up(dst, src, 16, stride, rounding);
  157|  16.6k|		break;
  158|  12.6k|	case 4:
  ------------------
  |  Branch (158:2): [True: 12.6k, False: 179k]
  ------------------
  159|  12.6k|		Ops->V_Pass_Avrg(dst, src, 16, stride, rounding);
  160|  12.6k|		break;
  161|  4.76k|	case 5:
  ------------------
  |  Branch (161:2): [True: 4.76k, False: 187k]
  ------------------
  162|  4.76k|		Ops->H_Pass_Avrg(tmp, src, 17, stride, rounding);
  163|  4.76k|		Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding);
  164|  4.76k|		break;
  165|  2.81k|	case 6:
  ------------------
  |  Branch (165:2): [True: 2.81k, False: 188k]
  ------------------
  166|  2.81k|		Ops->H_Pass(tmp, src,	  17, stride, rounding);
  167|  2.81k|		Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding);
  168|  2.81k|		break;
  169|  4.51k|	case 7:
  ------------------
  |  Branch (169:2): [True: 4.51k, False: 187k]
  ------------------
  170|  4.51k|		Ops->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding);
  171|  4.51k|		Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding);
  172|  4.51k|		break;
  173|  12.9k|	case 8:
  ------------------
  |  Branch (173:2): [True: 12.9k, False: 178k]
  ------------------
  174|  12.9k|		Ops->V_Pass(dst, src, 16, stride, rounding);
  175|  12.9k|		break;
  176|  3.48k|	case 9:
  ------------------
  |  Branch (176:2): [True: 3.48k, False: 188k]
  ------------------
  177|  3.48k|		Ops->H_Pass_Avrg(tmp, src, 17, stride, rounding);
  178|  3.48k|		Ops->V_Pass(dst, tmp, 16, stride, rounding);
  179|  3.48k|		break;
  180|  3.40k|	case 10:
  ------------------
  |  Branch (180:2): [True: 3.40k, False: 188k]
  ------------------
  181|  3.40k|		Ops->H_Pass(tmp, src, 17, stride, rounding);
  182|  3.40k|		Ops->V_Pass(dst, tmp, 16, stride, rounding);
  183|  3.40k|		break;
  184|  4.32k|	case 11:
  ------------------
  |  Branch (184:2): [True: 4.32k, False: 187k]
  ------------------
  185|  4.32k|		Ops->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding);
  186|  4.32k|		Ops->V_Pass(dst, tmp, 16, stride, rounding);
  187|  4.32k|		break;
  188|  15.6k|	case 12:
  ------------------
  |  Branch (188:2): [True: 15.6k, False: 176k]
  ------------------
  189|  15.6k|		Ops->V_Pass_Avrg_Up(dst, src, 16, stride, rounding);
  190|  15.6k|		break;
  191|  4.36k|	case 13:
  ------------------
  |  Branch (191:2): [True: 4.36k, False: 187k]
  ------------------
  192|  4.36k|		Ops->H_Pass_Avrg(tmp, src, 17, stride, rounding);
  193|  4.36k|		Ops->V_Pass_Avrg_Up(dst, tmp, 16, stride, rounding);
  194|  4.36k|		break;
  195|  3.57k|	case 14:
  ------------------
  |  Branch (195:2): [True: 3.57k, False: 188k]
  ------------------
  196|  3.57k|		Ops->H_Pass(tmp, src, 17, stride, rounding);
  197|  3.57k|		Ops->V_Pass_Avrg_Up( dst, tmp, 16, stride, rounding);
  198|  3.57k|		break;
  199|  5.66k|	case 15:
  ------------------
  |  Branch (199:2): [True: 5.66k, False: 186k]
  ------------------
  200|  5.66k|		Ops->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding);
  201|  5.66k|		Ops->V_Pass_Avrg_Up(dst, tmp, 16, stride, rounding);
  202|  5.66k|		break;
  203|   191k|	}
  204|   191k|}
decoder.c:interpolate8x8_quarterpel:
  412|  1.60M|{
  413|  1.60M|	const uint8_t *src;
  414|  1.60M|	uint8_t *dst;
  415|  1.60M|	uint8_t *tmp;
  416|  1.60M|	int32_t quads;
  417|  1.60M|	const XVID_QP_FUNCS *Ops;
  418|       |
  419|  1.60M|	int32_t x_int, y_int;
  420|       |
  421|  1.60M|	const int32_t xRef = (int)x*4 + dx;
  422|  1.60M|	const int32_t yRef = (int)y*4 + dy;
  423|       |
  424|  1.60M|	Ops = xvid_QP_Funcs;
  425|  1.60M|	quads = (dx&3) | ((dy&3)<<2);
  426|       |
  427|  1.60M|	x_int = xRef >> 2;
  428|  1.60M|	y_int = yRef >> 2;
  429|       |
  430|  1.60M|	dst = cur + y * stride + x;
  431|  1.60M|	src = refn + y_int * (int)stride + x_int;
  432|       |
  433|  1.60M|	tmp = refh; /* we need at least a 16 x stride scratch block */
  434|       |
  435|  1.60M|	switch(quads) {
  ------------------
  |  Branch (435:9): [True: 0, False: 1.60M]
  ------------------
  436|  1.32M|	case 0:
  ------------------
  |  Branch (436:2): [True: 1.32M, False: 275k]
  ------------------
  437|  1.32M|		transfer8x8_copy( dst, src, stride);
  438|  1.32M|		break;
  439|  39.8k|	case 1:
  ------------------
  |  Branch (439:2): [True: 39.8k, False: 1.56M]
  ------------------
  440|  39.8k|		Ops->H_Pass_Avrg_8(dst, src, 8, stride, rounding);
  441|  39.8k|		break;
  442|  26.1k|	case 2:
  ------------------
  |  Branch (442:2): [True: 26.1k, False: 1.57M]
  ------------------
  443|  26.1k|		Ops->H_Pass_8(dst, src, 8, stride, rounding);
  444|  26.1k|		break;
  445|  35.1k|	case 3:
  ------------------
  |  Branch (445:2): [True: 35.1k, False: 1.56M]
  ------------------
  446|  35.1k|		Ops->H_Pass_Avrg_Up_8(dst, src, 8, stride, rounding);
  447|  35.1k|		break;
  448|  23.6k|	case 4:
  ------------------
  |  Branch (448:2): [True: 23.6k, False: 1.57M]
  ------------------
  449|  23.6k|		Ops->V_Pass_Avrg_8(dst, src, 8, stride, rounding);
  450|  23.6k|		break;
  451|  8.85k|	case 5:
  ------------------
  |  Branch (451:2): [True: 8.85k, False: 1.59M]
  ------------------
  452|  8.85k|		Ops->H_Pass_Avrg_8(tmp, src, 9, stride, rounding);
  453|  8.85k|		Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding);
  454|  8.85k|		break;
  455|  4.01k|	case 6:
  ------------------
  |  Branch (455:2): [True: 4.01k, False: 1.59M]
  ------------------
  456|  4.01k|		Ops->H_Pass_8(tmp, src, 9, stride, rounding);
  457|  4.01k|		Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding);
  458|  4.01k|		break;
  459|  17.3k|	case 7:
  ------------------
  |  Branch (459:2): [True: 17.3k, False: 1.58M]
  ------------------
  460|  17.3k|		Ops->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding);
  461|  17.3k|		Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding);
  462|  17.3k|		break;
  463|  31.1k|	case 8:
  ------------------
  |  Branch (463:2): [True: 31.1k, False: 1.57M]
  ------------------
  464|  31.1k|		Ops->V_Pass_8(dst, src, 8, stride, rounding);
  465|  31.1k|		break;
  466|  7.40k|	case 9:
  ------------------
  |  Branch (466:2): [True: 7.40k, False: 1.59M]
  ------------------
  467|  7.40k|		Ops->H_Pass_Avrg_8(tmp, src, 9, stride, rounding);
  468|  7.40k|		Ops->V_Pass_8(dst, tmp, 8, stride, rounding);
  469|  7.40k|		break;
  470|  6.90k|	case 10:
  ------------------
  |  Branch (470:2): [True: 6.90k, False: 1.59M]
  ------------------
  471|  6.90k|		Ops->H_Pass_8(tmp, src, 9, stride, rounding);
  472|  6.90k|		Ops->V_Pass_8(dst, tmp, 8, stride, rounding);
  473|  6.90k|		break;
  474|  7.27k|	case 11:
  ------------------
  |  Branch (474:2): [True: 7.27k, False: 1.59M]
  ------------------
  475|  7.27k|		Ops->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding);
  476|  7.27k|		Ops->V_Pass_8(dst, tmp, 8, stride, rounding);
  477|  7.27k|		break;
  478|  31.9k|	case 12:
  ------------------
  |  Branch (478:2): [True: 31.9k, False: 1.57M]
  ------------------
  479|  31.9k|		Ops->V_Pass_Avrg_Up_8(dst, src, 8, stride, rounding);
  480|  31.9k|		break;
  481|  16.4k|	case 13:
  ------------------
  |  Branch (481:2): [True: 16.4k, False: 1.58M]
  ------------------
  482|  16.4k|		Ops->H_Pass_Avrg_8(tmp, src, 9, stride, rounding);
  483|  16.4k|		Ops->V_Pass_Avrg_Up_8(dst, tmp, 8, stride, rounding);
  484|  16.4k|		break;
  485|  6.00k|	case 14:
  ------------------
  |  Branch (485:2): [True: 6.00k, False: 1.59M]
  ------------------
  486|  6.00k|		Ops->H_Pass_8(tmp, src, 9, stride, rounding);
  487|  6.00k|		Ops->V_Pass_Avrg_Up_8( dst, tmp, 8, stride, rounding);
  488|  6.00k|		break;
  489|  13.3k|	case 15:
  ------------------
  |  Branch (489:2): [True: 13.3k, False: 1.58M]
  ------------------
  490|  13.3k|		Ops->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding);
  491|  13.3k|		Ops->V_Pass_Avrg_Up_8(dst, tmp, 8, stride, rounding);
  492|  13.3k|		break;
  493|  1.60M|	}
  494|  1.60M|}
decoder.c:interpolate16x16_add_quarterpel:
  216|  95.6k|{
  217|  95.6k|	const uint8_t *src;
  218|  95.6k|	uint8_t *dst;
  219|  95.6k|	uint8_t *tmp;
  220|  95.6k|	int32_t quads;
  221|  95.6k|	const XVID_QP_FUNCS *Ops;
  222|  95.6k|	const XVID_QP_FUNCS *Ops_Copy;
  223|       |
  224|  95.6k|	int32_t x_int, y_int;
  225|       |
  226|  95.6k|	const int32_t xRef = (int)x*4 + dx;
  227|  95.6k|	const int32_t yRef = (int)y*4 + dy;
  228|       |
  229|  95.6k|	Ops = xvid_QP_Add_Funcs;
  230|  95.6k|	Ops_Copy = xvid_QP_Funcs;
  231|  95.6k|	quads = (dx&3) | ((dy&3)<<2);
  232|       |
  233|  95.6k|	x_int = xRef >> 2;
  234|  95.6k|	y_int = yRef >> 2;
  235|       |
  236|  95.6k|	dst = cur + y * stride + x;
  237|  95.6k|	src = refn + y_int * (int)stride + x_int;
  238|       |
  239|  95.6k|	tmp = refh; /* we need at least a 16 x stride scratch block */
  240|       |
  241|  95.6k|	switch(quads) {
  ------------------
  |  Branch (241:9): [True: 0, False: 95.6k]
  ------------------
  242|  31.9k|	case 0:
  ------------------
  |  Branch (242:2): [True: 31.9k, False: 63.6k]
  ------------------
  243|       |		/* NB: there is no halfpel involved ! the name's function can be
  244|       |		 *     misleading */
  245|  31.9k|		interpolate8x8_halfpel_add(dst, src, stride, rounding);
  246|  31.9k|		interpolate8x8_halfpel_add(dst+8, src+8, stride, rounding);
  247|  31.9k|		interpolate8x8_halfpel_add(dst+8*stride, src+8*stride, stride, rounding);
  248|  31.9k|		interpolate8x8_halfpel_add(dst+8*stride+8, src+8*stride+8, stride, rounding);
  249|  31.9k|		break;
  250|  9.27k|	case 1:
  ------------------
  |  Branch (250:2): [True: 9.27k, False: 86.3k]
  ------------------
  251|  9.27k|		Ops->H_Pass_Avrg(dst, src, 16, stride, rounding);
  252|  9.27k|		break;
  253|  7.74k|	case 2:
  ------------------
  |  Branch (253:2): [True: 7.74k, False: 87.8k]
  ------------------
  254|  7.74k|		Ops->H_Pass(dst, src, 16, stride, rounding);
  255|  7.74k|		break;
  256|  10.4k|	case 3:
  ------------------
  |  Branch (256:2): [True: 10.4k, False: 85.1k]
  ------------------
  257|  10.4k|		Ops->H_Pass_Avrg_Up(dst, src, 16, stride, rounding);
  258|  10.4k|		break;
  259|  5.22k|	case 4:
  ------------------
  |  Branch (259:2): [True: 5.22k, False: 90.3k]
  ------------------
  260|  5.22k|		Ops->V_Pass_Avrg(dst, src, 16, stride, rounding);
  261|  5.22k|		break;
  262|  2.32k|	case 5:
  ------------------
  |  Branch (262:2): [True: 2.32k, False: 93.2k]
  ------------------
  263|  2.32k|		Ops_Copy->H_Pass_Avrg(tmp, src, 17, stride, rounding);
  264|  2.32k|		Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding);
  265|  2.32k|		break;
  266|  2.06k|	case 6:
  ------------------
  |  Branch (266:2): [True: 2.06k, False: 93.5k]
  ------------------
  267|  2.06k|		Ops_Copy->H_Pass(tmp, src,	  17, stride, rounding);
  268|  2.06k|		Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding);
  269|  2.06k|		break;
  270|  2.26k|	case 7:
  ------------------
  |  Branch (270:2): [True: 2.26k, False: 93.3k]
  ------------------
  271|  2.26k|		Ops_Copy->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding);
  272|  2.26k|		Ops->V_Pass_Avrg(dst, tmp, 16, stride, rounding);
  273|  2.26k|		break;
  274|  4.13k|	case 8:
  ------------------
  |  Branch (274:2): [True: 4.13k, False: 91.4k]
  ------------------
  275|  4.13k|		Ops->V_Pass(dst, src, 16, stride, rounding);
  276|  4.13k|		break;
  277|  1.81k|	case 9:
  ------------------
  |  Branch (277:2): [True: 1.81k, False: 93.7k]
  ------------------
  278|  1.81k|		Ops_Copy->H_Pass_Avrg(tmp, src, 17, stride, rounding);
  279|  1.81k|		Ops->V_Pass(dst, tmp, 16, stride, rounding);
  280|  1.81k|		break;
  281|  1.90k|	case 10:
  ------------------
  |  Branch (281:2): [True: 1.90k, False: 93.7k]
  ------------------
  282|  1.90k|		Ops_Copy->H_Pass(tmp, src, 17, stride, rounding);
  283|  1.90k|		Ops->V_Pass(dst, tmp, 16, stride, rounding);
  284|  1.90k|		break;
  285|  1.85k|	case 11:
  ------------------
  |  Branch (285:2): [True: 1.85k, False: 93.7k]
  ------------------
  286|  1.85k|		Ops_Copy->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding);
  287|  1.85k|		Ops->V_Pass(dst, tmp, 16, stride, rounding);
  288|  1.85k|		break;
  289|  7.14k|	case 12:
  ------------------
  |  Branch (289:2): [True: 7.14k, False: 88.4k]
  ------------------
  290|  7.14k|		Ops->V_Pass_Avrg_Up(dst, src, 16, stride, rounding);
  291|  7.14k|		break;
  292|  2.47k|	case 13:
  ------------------
  |  Branch (292:2): [True: 2.47k, False: 93.1k]
  ------------------
  293|  2.47k|		Ops_Copy->H_Pass_Avrg(tmp, src, 17, stride, rounding);
  294|  2.47k|		Ops->V_Pass_Avrg_Up(dst, tmp, 16, stride, rounding);
  295|  2.47k|		break;
  296|  1.96k|	case 14:
  ------------------
  |  Branch (296:2): [True: 1.96k, False: 93.6k]
  ------------------
  297|  1.96k|		Ops_Copy->H_Pass(tmp, src, 17, stride, rounding);
  298|  1.96k|		Ops->V_Pass_Avrg_Up( dst, tmp, 16, stride, rounding);
  299|  1.96k|		break;
  300|  2.96k|	case 15:
  ------------------
  |  Branch (300:2): [True: 2.96k, False: 92.6k]
  ------------------
  301|  2.96k|		Ops_Copy->H_Pass_Avrg_Up(tmp, src, 17, stride, rounding);
  302|  2.96k|		Ops->V_Pass_Avrg_Up(dst, tmp, 16, stride, rounding);
  303|  2.96k|		break;
  304|  95.6k|	}
  305|  95.6k|}
decoder.c:interpolate8x8_add_quarterpel:
  506|  1.54M|{
  507|  1.54M|	const uint8_t *src;
  508|  1.54M|	uint8_t *dst;
  509|  1.54M|	uint8_t *tmp;
  510|  1.54M|	int32_t quads;
  511|  1.54M|	const XVID_QP_FUNCS *Ops;
  512|  1.54M|	const XVID_QP_FUNCS *Ops_Copy;
  513|       |
  514|  1.54M|	int32_t x_int, y_int;
  515|       |
  516|  1.54M|	const int32_t xRef = (int)x*4 + dx;
  517|  1.54M|	const int32_t yRef = (int)y*4 + dy;
  518|       |
  519|  1.54M|	Ops = xvid_QP_Add_Funcs;
  520|  1.54M|	Ops_Copy = xvid_QP_Funcs;
  521|  1.54M|	quads = (dx&3) | ((dy&3)<<2);
  522|       |
  523|  1.54M|	x_int = xRef >> 2;
  524|  1.54M|	y_int = yRef >> 2;
  525|       |
  526|  1.54M|	dst = cur + y * stride + x;
  527|  1.54M|	src = refn + y_int * (int)stride + x_int;
  528|       |
  529|  1.54M|	tmp = refh; /* we need at least a 16 x stride scratch block */
  530|       |
  531|  1.54M|	switch(quads) {
  ------------------
  |  Branch (531:9): [True: 0, False: 1.54M]
  ------------------
  532|  1.30M|	case 0:
  ------------------
  |  Branch (532:2): [True: 1.30M, False: 238k]
  ------------------
  533|       |		/* Misleading function name, there is no halfpel involved
  534|       |		 * just dst and src averaging with rounding=0 */
  535|  1.30M|		interpolate8x8_halfpel_add(dst, src, stride, rounding);
  536|  1.30M|		break;
  537|  37.2k|	case 1:
  ------------------
  |  Branch (537:2): [True: 37.2k, False: 1.51M]
  ------------------
  538|  37.2k|		Ops->H_Pass_Avrg_8(dst, src, 8, stride, rounding);
  539|  37.2k|		break;
  540|  23.4k|	case 2:
  ------------------
  |  Branch (540:2): [True: 23.4k, False: 1.52M]
  ------------------
  541|  23.4k|		Ops->H_Pass_8(dst, src, 8, stride, rounding);
  542|  23.4k|		break;
  543|  33.1k|	case 3:
  ------------------
  |  Branch (543:2): [True: 33.1k, False: 1.51M]
  ------------------
  544|  33.1k|		Ops->H_Pass_Avrg_Up_8(dst, src, 8, stride, rounding);
  545|  33.1k|		break;
  546|  21.7k|	case 4:
  ------------------
  |  Branch (546:2): [True: 21.7k, False: 1.52M]
  ------------------
  547|  21.7k|		Ops->V_Pass_Avrg_8(dst, src, 8, stride, rounding);
  548|  21.7k|		break;
  549|  6.63k|	case 5:
  ------------------
  |  Branch (549:2): [True: 6.63k, False: 1.54M]
  ------------------
  550|  6.63k|		Ops_Copy->H_Pass_Avrg_8(tmp, src, 9, stride, rounding);
  551|  6.63k|		Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding);
  552|  6.63k|		break;
  553|  3.20k|	case 6:
  ------------------
  |  Branch (553:2): [True: 3.20k, False: 1.54M]
  ------------------
  554|  3.20k|		Ops_Copy->H_Pass_8(tmp, src, 9, stride, rounding);
  555|  3.20k|		Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding);
  556|  3.20k|		break;
  557|  15.9k|	case 7:
  ------------------
  |  Branch (557:2): [True: 15.9k, False: 1.53M]
  ------------------
  558|  15.9k|		Ops_Copy->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding);
  559|  15.9k|		Ops->V_Pass_Avrg_8(dst, tmp, 8, stride, rounding);
  560|  15.9k|		break;
  561|  25.4k|	case 8:
  ------------------
  |  Branch (561:2): [True: 25.4k, False: 1.52M]
  ------------------
  562|  25.4k|		Ops->V_Pass_8(dst, src, 8, stride, rounding);
  563|  25.4k|		break;
  564|  6.11k|	case 9:
  ------------------
  |  Branch (564:2): [True: 6.11k, False: 1.54M]
  ------------------
  565|  6.11k|		Ops_Copy->H_Pass_Avrg_8(tmp, src, 9, stride, rounding);
  566|  6.11k|		Ops->V_Pass_8(dst, tmp, 8, stride, rounding);
  567|  6.11k|		break;
  568|  4.20k|	case 10:
  ------------------
  |  Branch (568:2): [True: 4.20k, False: 1.54M]
  ------------------
  569|  4.20k|		Ops_Copy->H_Pass_8(tmp, src, 9, stride, rounding);
  570|  4.20k|		Ops->V_Pass_8(dst, tmp, 8, stride, rounding);
  571|  4.20k|		break;
  572|  5.98k|	case 11:
  ------------------
  |  Branch (572:2): [True: 5.98k, False: 1.54M]
  ------------------
  573|  5.98k|		Ops_Copy->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding);
  574|  5.98k|		Ops->V_Pass_8(dst, tmp, 8, stride, rounding);
  575|  5.98k|		break;
  576|  25.0k|	case 12:
  ------------------
  |  Branch (576:2): [True: 25.0k, False: 1.52M]
  ------------------
  577|  25.0k|		Ops->V_Pass_Avrg_Up_8(dst, src, 8, stride, rounding);
  578|  25.0k|		break;
  579|  14.0k|	case 13:
  ------------------
  |  Branch (579:2): [True: 14.0k, False: 1.53M]
  ------------------
  580|  14.0k|		Ops_Copy->H_Pass_Avrg_8(tmp, src, 9, stride, rounding);
  581|  14.0k|		Ops->V_Pass_Avrg_Up_8(dst, tmp, 8, stride, rounding);
  582|  14.0k|		break;
  583|  4.88k|	case 14:
  ------------------
  |  Branch (583:2): [True: 4.88k, False: 1.54M]
  ------------------
  584|  4.88k|		Ops_Copy->H_Pass_8(tmp, src, 9, stride, rounding);
  585|  4.88k|		Ops->V_Pass_Avrg_Up_8( dst, tmp, 8, stride, rounding);
  586|  4.88k|		break;
  587|  11.5k|	case 15:
  ------------------
  |  Branch (587:2): [True: 11.5k, False: 1.53M]
  ------------------
  588|  11.5k|		Ops_Copy->H_Pass_Avrg_Up_8(tmp, src, 9, stride, rounding);
  589|  11.5k|		Ops->V_Pass_Avrg_Up_8(dst, tmp, 8, stride, rounding);
  590|  11.5k|		break;
  591|  1.54M|	}
  592|  1.54M|}

init_GMC:
  589|      2|{
  590|      2|      Predict_16x16_func = Predict_16x16_C;
  591|      2|      Predict_8x8_func   = Predict_8x8_C;
  592|       |
  593|       |#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
  594|       |      if ((cpu_flags & XVID_CPU_MMX)   || (cpu_flags & XVID_CPU_MMXEXT)   ||
  595|       |          (cpu_flags & XVID_CPU_3DNOW) || (cpu_flags & XVID_CPU_3DNOWEXT) ||
  596|       |          (cpu_flags & XVID_CPU_SSE)   || (cpu_flags & XVID_CPU_SSE2) ||
  597|       |          (cpu_flags & XVID_CPU_SSE3)  || (cpu_flags & XVID_CPU_SSE41))
  598|       |	{
  599|       |	   Predict_16x16_func = Predict_16x16_mmx;
  600|       |	   Predict_8x8_func   = Predict_8x8_mmx;
  601|       |
  602|       |           if (cpu_flags & XVID_CPU_SSE41)
  603|       |	     GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_sse41;
  604|       |	   else if (cpu_flags & XVID_CPU_SSE2)
  605|       |	     GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_sse2;
  606|       |	   else
  607|       |             GMC_Core_Lin_8 = xvid_GMC_Core_Lin_8_mmx;
  608|       |	}
  609|       |#endif
  610|      2|}
generate_GMCparameters:
  620|  2.47k|{
  621|  2.47k|	gmc->sW = width	<< 4;
  622|  2.47k|	gmc->sH = height << 4;
  623|  2.47k|	gmc->accuracy = accuracy;
  624|  2.47k|	gmc->num_wp = nb_pts;
  625|       |
  626|       |	/* reduce the number of points, if possible */
  627|  2.47k|	if (nb_pts<2 || (pts->duv[2].x==0 && pts->duv[2].y==0 && pts->duv[1].x==0 && pts->duv[1].y==0 )) {
  ------------------
  |  Branch (627:6): [True: 1.48k, False: 983]
  |  Branch (627:19): [True: 640, False: 343]
  |  Branch (627:39): [True: 573, False: 67]
  |  Branch (627:59): [True: 333, False: 240]
  |  Branch (627:79): [True: 260, False: 73]
  ------------------
  628|  1.74k|  	if (nb_pts<2 || (pts->duv[1].x==0 && pts->duv[1].y==0)) {
  ------------------
  |  Branch (628:8): [True: 1.48k, False: 260]
  |  Branch (628:21): [True: 260, False: 0]
  |  Branch (628:41): [True: 260, False: 0]
  ------------------
  629|  1.74k|	  	if (nb_pts<1 || (pts->duv[0].x==0 && pts->duv[0].y==0)) {
  ------------------
  |  Branch (629:9): [True: 1.30k, False: 444]
  |  Branch (629:22): [True: 184, False: 260]
  |  Branch (629:42): [True: 131, False: 53]
  ------------------
  630|  1.43k|		    nb_pts = 0;
  631|  1.43k|  		}
  632|    313|	  	else nb_pts = 1;
  633|  1.74k|  	}
  634|      0|	  else nb_pts = 2;
  635|  1.74k|  }
  636|       |
  637|       |	/* now, nb_pts stores the actual number of points required for interpolation */
  638|       |
  639|  2.47k|	if (nb_pts<=1)
  ------------------
  |  Branch (639:6): [True: 1.74k, False: 723]
  ------------------
  640|  1.74k|	{
  641|  1.74k|	if (nb_pts==1) {
  ------------------
  |  Branch (641:6): [True: 313, False: 1.43k]
  ------------------
  642|       |		/* store as 4b fixed point */
  643|    313|		gmc->Uo = pts->duv[0].x << accuracy;
  644|    313|		gmc->Vo = pts->duv[0].y << accuracy;
  645|    313|		gmc->Uco = ((pts->duv[0].x>>1) | (pts->duv[0].x&1)) << accuracy;	 /* DIV2RND() */
  646|    313|		gmc->Vco = ((pts->duv[0].y>>1) | (pts->duv[0].y&1)) << accuracy;	 /* DIV2RND() */
  647|    313|	}
  648|  1.43k|	else {	/* zero points?! */
  649|  1.43k|		gmc->Uo	= gmc->Vo	= 0;
  650|  1.43k|		gmc->Uco = gmc->Vco = 0;
  651|  1.43k|	}
  652|       |
  653|  1.74k|	gmc->predict_16x16	= Predict_1pt_16x16_C;
  654|  1.74k|	gmc->predict_8x8	= Predict_1pt_8x8_C;
  655|  1.74k|	gmc->get_average_mv = get_average_mv_1pt_C;
  656|  1.74k|	}
  657|    723|	else {		/* 2 or 3 points */
  658|    723|	const int rho	 = 3 - accuracy;	/* = {3,2,1,0} for Acc={0,1,2,3} */
  659|    723|	int Alpha = log2bin(width-1);
  660|    723|	int Ws = 1 << Alpha;
  661|       |
  662|    723|	gmc->dU[0] = 16*Ws + RDIV( 8*Ws*pts->duv[1].x, width );	 /* dU/dx */
  ------------------
  |  |   76|    723|#define RDIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b))
  |  |  ------------------
  |  |  |  Branch (76:21): [True: 248, False: 475]
  |  |  ------------------
  ------------------
  663|    723|	gmc->dV[0] =		 RDIV( 8*Ws*pts->duv[1].y, width );	 /* dV/dx */
  ------------------
  |  |   76|    723|#define RDIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b))
  |  |  ------------------
  |  |  |  Branch (76:21): [True: 313, False: 410]
  |  |  ------------------
  ------------------
  664|       |
  665|    723|	if (nb_pts==2) {
  ------------------
  |  Branch (665:6): [True: 280, False: 443]
  ------------------
  666|    280|		gmc->dU[1] = -gmc->dV[0];	/* -Sin */
  667|    280|		gmc->dV[1] =	gmc->dU[0] ;	/* Cos */
  668|    280|	}
  669|    443|	else
  670|    443|	{
  671|    443|		const int Beta = log2bin(height-1);
  672|    443|		const int Hs = 1<<Beta;
  673|    443|		gmc->dU[1] =		 RDIV( 8*Hs*pts->duv[2].x, height );	 /* dU/dy */
  ------------------
  |  |   76|    443|#define RDIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b))
  |  |  ------------------
  |  |  |  Branch (76:21): [True: 200, False: 243]
  |  |  ------------------
  ------------------
  674|    443|		gmc->dV[1] = 16*Hs + RDIV( 8*Hs*pts->duv[2].y, height );	 /* dV/dy */
  ------------------
  |  |   76|    443|#define RDIV(a,b) (((a)>0 ? (a) + ((b)>>1) : (a) - ((b)>>1))/(b))
  |  |  ------------------
  |  |  |  Branch (76:21): [True: 245, False: 198]
  |  |  ------------------
  ------------------
  675|    443|		if (Beta>Alpha) {
  ------------------
  |  Branch (675:7): [True: 189, False: 254]
  ------------------
  676|    189|		gmc->dU[0] <<= (Beta-Alpha);
  677|    189|		gmc->dV[0] <<= (Beta-Alpha);
  678|    189|		Alpha = Beta;
  679|    189|		Ws = Hs;
  680|    189|		}
  681|    254|		else {
  682|    254|		gmc->dU[1] <<= Alpha - Beta;
  683|    254|		gmc->dV[1] <<= Alpha - Beta;
  684|    254|		}
  685|    443|	}
  686|       |	/* upscale to 16b fixed-point */
  687|    723|	gmc->dU[0] <<= (16-Alpha - rho);
  688|    723|	gmc->dU[1] <<= (16-Alpha - rho);
  689|    723|	gmc->dV[0] <<= (16-Alpha - rho);
  690|    723|	gmc->dV[1] <<= (16-Alpha - rho);
  691|       |
  692|    723|	gmc->Uo	= ( pts->duv[0].x	 <<(16+ accuracy)) + (1<<15);
  693|    723|	gmc->Vo	= ( pts->duv[0].y	 <<(16+ accuracy)) + (1<<15);
  694|    723|	gmc->Uco = ((pts->duv[0].x-1)<<(17+ accuracy)) + (1<<17);
  695|    723|	gmc->Vco = ((pts->duv[0].y-1)<<(17+ accuracy)) + (1<<17);
  696|    723|	gmc->Uco = (gmc->Uco + gmc->dU[0] + gmc->dU[1])>>2;
  697|    723|	gmc->Vco = (gmc->Vco + gmc->dV[0] + gmc->dV[1])>>2;
  698|       |
  699|    723|	gmc->predict_16x16	= Predict_16x16_func;
  700|    723|	gmc->predict_8x8	= Predict_8x8_func;
  701|    723|	gmc->get_average_mv = get_average_mv_C;
  702|    723|	}
  703|  2.47k|}
gmc.c:Predict_16x16_C:
   98|   161k|{
   99|   161k|	const int W = This->sW;
  100|   161k|	const int H	= This->sH;
  101|   161k|	const int rho = 3 - This->accuracy;
  102|   161k|	const int Rounder = ( (1<<7) - (rounding<<(2*rho)) ) << 16;
  103|       |
  104|   161k|	const int dUx = This->dU[0];
  105|   161k|	const int dVx = This->dV[0];
  106|   161k|	const int dUy = This->dU[1];
  107|   161k|	const int dVy = This->dV[1];
  108|       |
  109|   161k|	int Uo = This->Uo + 16*(dUy*y + dUx*x);
  110|   161k|	int Vo = This->Vo + 16*(dVy*y + dVx*x);
  111|       |
  112|   161k|	int i, j;
  113|       |
  114|   161k|	dst += 16;
  115|  2.74M|	for (j=16; j>0; --j) {
  ------------------
  |  Branch (115:13): [True: 2.58M, False: 161k]
  ------------------
  116|  2.58M|		int U = Uo, V = Vo;
  117|  2.58M|		Uo += dUy; Vo += dVy;
  118|  43.8M|		for (i=-16; i<0; ++i) {
  ------------------
  |  Branch (118:15): [True: 41.2M, False: 2.58M]
  ------------------
  119|  41.2M|			unsigned int f0, f1, ri = 16, rj = 16;
  120|  41.2M|			int Offset;
  121|  41.2M|			int u = ( U >> 16 ) << rho;
  122|  41.2M|			int v = ( V >> 16 ) << rho;
  123|       |
  124|  41.2M|			U += dUx; V += dVx;
  125|       |
  126|  41.2M|			if (u > 0 && u <= W) { ri = MTab[u&15]; Offset = u>>4;	}
  ------------------
  |  Branch (126:8): [True: 33.4M, False: 7.81M]
  |  Branch (126:17): [True: 31.2M, False: 2.17M]
  ------------------
  127|  9.98M|			else {
  128|  9.98M|				if (u > W) Offset = W>>4;
  ------------------
  |  Branch (128:9): [True: 2.17M, False: 7.81M]
  ------------------
  129|  7.81M|				else Offset = 0;
  130|  9.98M|				ri = MTab[0];
  131|  9.98M|			}
  132|       |
  133|  41.2M|			if (v > 0 && v <= H) { rj = MTab[v&15]; Offset += (v>>4)*srcstride; }
  ------------------
  |  Branch (133:8): [True: 37.5M, False: 3.78M]
  |  Branch (133:17): [True: 37.3M, False: 119k]
  ------------------
  134|  3.89M|			else {
  135|  3.89M|				if (v > H) Offset += (H>>4)*srcstride;
  ------------------
  |  Branch (135:9): [True: 119k, False: 3.78M]
  ------------------
  136|  3.89M|				rj = MTab[0];
  137|  3.89M|			}
  138|       |
  139|  41.2M|			f0	= src[Offset + 0];
  140|  41.2M|			f0 |= src[Offset + 1] << 16;
  141|  41.2M|			f1	= src[Offset + srcstride + 0];
  142|  41.2M|			f1 |= src[Offset + srcstride + 1] << 16;
  143|  41.2M|			f0 = (ri*f0)>>16;
  144|  41.2M|			f1 = (ri*f1) & 0x0fff0000;
  145|  41.2M|			f0 |= f1;
  146|  41.2M|			f0 = (rj*f0 + Rounder) >> 24;
  147|       |
  148|  41.2M|			dst[i] = (uint8_t)f0;
  149|  41.2M|		}
  150|  2.58M|		dst += dststride;
  151|  2.58M|	}
  152|   161k|}
gmc.c:Predict_8x8_C:
  159|   161k|{
  160|   161k|	const int W	 = This->sW >> 1;
  161|   161k|	const int H	 = This->sH >> 1;
  162|   161k|	const int rho = 3-This->accuracy;
  163|   161k|	const int32_t Rounder = ( 128 - (rounding<<(2*rho)) ) << 16;
  164|       |
  165|   161k|	const int32_t dUx = This->dU[0];
  166|   161k|	const int32_t dVx = This->dV[0];
  167|   161k|	const int32_t dUy = This->dU[1];
  168|   161k|	const int32_t dVy = This->dV[1];
  169|       |
  170|   161k|	int32_t Uo = This->Uco + 8*(dUy*y + dUx*x);
  171|   161k|	int32_t Vo = This->Vco + 8*(dVy*y + dVx*x);
  172|       |
  173|   161k|	int i, j;
  174|       |
  175|   161k|	uDst += 8;
  176|   161k|	vDst += 8;
  177|  1.45M|	for (j=8; j>0; --j) {
  ------------------
  |  Branch (177:12): [True: 1.29M, False: 161k]
  ------------------
  178|  1.29M|		int32_t U = Uo, V = Vo;
  179|  1.29M|		Uo += dUy; Vo += dVy;
  180|       |
  181|  11.6M|		for (i=-8; i<0; ++i) {
  ------------------
  |  Branch (181:14): [True: 10.3M, False: 1.29M]
  ------------------
  182|  10.3M|			int Offset;
  183|  10.3M|			uint32_t f0, f1, ri, rj;
  184|  10.3M|			int32_t u, v;
  185|       |
  186|  10.3M|			u = ( U >> 16 ) << rho;
  187|  10.3M|			v = ( V >> 16 ) << rho;
  188|  10.3M|			U += dUx; V += dVx;
  189|       |
  190|  10.3M|			if (u > 0 && u <= W) {
  ------------------
  |  Branch (190:8): [True: 8.37M, False: 1.94M]
  |  Branch (190:17): [True: 7.90M, False: 472k]
  ------------------
  191|  7.90M|				ri = MTab[u&15];
  192|  7.90M|				Offset = u>>4;
  193|  7.90M|			} else {
  194|  2.41M|				if (u>W) Offset = W>>4;
  ------------------
  |  Branch (194:9): [True: 472k, False: 1.94M]
  ------------------
  195|  1.94M|				else Offset = 0;
  196|  2.41M|				ri = MTab[0];
  197|  2.41M|			}
  198|       |
  199|  10.3M|			if (v > 0 && v <= H) {
  ------------------
  |  Branch (199:8): [True: 8.64M, False: 1.67M]
  |  Branch (199:17): [True: 8.62M, False: 25.0k]
  ------------------
  200|  8.62M|				rj = MTab[v&15];
  201|  8.62M|				Offset += (v>>4)*srcstride;
  202|  8.62M|			} else {
  203|  1.70M|				if (v>H) Offset += (H>>4)*srcstride;
  ------------------
  |  Branch (203:9): [True: 25.0k, False: 1.67M]
  ------------------
  204|  1.70M|				rj = MTab[0];
  205|  1.70M|			}
  206|       |
  207|  10.3M|			f0	= uSrc[Offset + 0];
  208|  10.3M|			f0 |= uSrc[Offset + 1] << 16;
  209|  10.3M|			f1	= uSrc[Offset + srcstride + 0];
  210|  10.3M|			f1 |= uSrc[Offset + srcstride + 1] << 16;
  211|  10.3M|			f0 = (ri*f0)>>16;
  212|  10.3M|			f1 = (ri*f1) & 0x0fff0000;
  213|  10.3M|			f0 |= f1;
  214|  10.3M|			f0 = (rj*f0 + Rounder) >> 24;
  215|       |
  216|  10.3M|			uDst[i] = (uint8_t)f0;
  217|       |
  218|  10.3M|			f0	= vSrc[Offset + 0];
  219|  10.3M|			f0 |= vSrc[Offset + 1] << 16;
  220|  10.3M|			f1	= vSrc[Offset + srcstride + 0];
  221|  10.3M|			f1 |= vSrc[Offset + srcstride + 1] << 16;
  222|  10.3M|			f0 = (ri*f0)>>16;
  223|  10.3M|			f1 = (ri*f1) & 0x0fff0000;
  224|  10.3M|			f0 |= f1;
  225|  10.3M|			f0 = (rj*f0 + Rounder) >> 24;
  226|       |
  227|  10.3M|			vDst[i] = (uint8_t)f0;
  228|  10.3M|		}
  229|  1.29M|		uDst += dststride;
  230|  1.29M|		vDst += dststride;
  231|  1.29M|	}
  232|   161k|}
gmc.c:Predict_1pt_16x16_C:
  269|   220k|{
  270|   220k|	const int W	 = This->sW;
  271|   220k|	const int H	 = This->sH;
  272|   220k|	const int rho = 3-MIN(This->accuracy, 3);
  ------------------
  |  |  255|   220k|#define MIN(X, Y) ((X)<(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (255:20): [True: 145k, False: 75.3k]
  |  |  ------------------
  ------------------
  273|   220k|	const int32_t Rounder = ( 128 - (rounding<<(2*rho)) ) << 16;
  274|       |
  275|       |
  276|   220k|	int32_t uo = This->Uo + (x<<8);	 /* ((16*x)<<4) */
  277|   220k|	int32_t vo = This->Vo + (y<<8);
  278|   220k|	uint32_t ri = MTab[uo & 15];
  279|   220k|	uint32_t rj = MTab[vo & 15];
  280|   220k|	int i, j;
  281|       |
  282|   220k|	int32_t Offset;
  283|   220k|	if (vo>=(-16<<4) && vo<=H) Offset = (vo>>4)*srcstride;
  ------------------
  |  Branch (283:6): [True: 219k, False: 803]
  |  Branch (283:22): [True: 219k, False: 679]
  ------------------
  284|  1.48k|	else {
  285|  1.48k|		if (vo>H) Offset = ( H>>4)*srcstride;
  ------------------
  |  Branch (285:7): [True: 679, False: 803]
  ------------------
  286|    803|		else Offset =-16*srcstride;
  287|  1.48k|		rj = MTab[0];
  288|  1.48k|	}
  289|   220k|	if (uo>=(-16<<4) && uo<=W) Offset += (uo>>4);
  ------------------
  |  Branch (289:6): [True: 218k, False: 1.98k]
  |  Branch (289:22): [True: 195k, False: 23.1k]
  ------------------
  290|  25.1k|	else {
  291|  25.1k|		if (uo>W) Offset += (W>>4);
  ------------------
  |  Branch (291:7): [True: 23.1k, False: 1.98k]
  ------------------
  292|  1.98k|		else Offset -= 16;
  293|  25.1k|		ri = MTab[0];
  294|  25.1k|	}
  295|       |
  296|   220k|	Dst += 16;
  297|       |
  298|  3.74M|	for(j=16; j>0; --j, Offset+=srcstride-16)
  ------------------
  |  Branch (298:12): [True: 3.52M, False: 220k]
  ------------------
  299|  3.52M|	{
  300|  59.9M|	for(i=-16; i<0; ++i, ++Offset)
  ------------------
  |  Branch (300:13): [True: 56.4M, False: 3.52M]
  ------------------
  301|  56.4M|	{
  302|  56.4M|		uint32_t f0, f1;
  303|  56.4M|		f0	= Src[ Offset		+0 ];
  304|  56.4M|		f0 |= Src[ Offset		+1 ] << 16;
  305|  56.4M|		f1	= Src[ Offset+srcstride +0 ];
  306|  56.4M|		f1 |= Src[ Offset+srcstride +1 ] << 16;
  307|  56.4M|		f0 = (ri*f0)>>16;
  308|  56.4M|		f1 = (ri*f1) & 0x0fff0000;
  309|  56.4M|		f0 |= f1;
  310|  56.4M|		f0 = ( rj*f0 + Rounder ) >> 24;
  311|  56.4M|		Dst[i] = (uint8_t)f0;
  312|  56.4M|	}
  313|  3.52M|	Dst += dststride;
  314|  3.52M|	}
  315|   220k|}
gmc.c:Predict_1pt_8x8_C:
  322|   220k|{
  323|   220k|	const int W	 = This->sW >> 1;
  324|   220k|	const int H	 = This->sH >> 1;
  325|   220k|	const int rho = 3-This->accuracy;
  326|   220k|	const int32_t Rounder = ( 128 - (rounding<<(2*rho)) ) << 16;
  327|       |
  328|   220k|	int32_t uo = This->Uco + (x<<7);
  329|   220k|	int32_t vo = This->Vco + (y<<7);
  330|   220k|	uint32_t rri = MTab[uo & 15];
  331|   220k|	uint32_t rrj = MTab[vo & 15];
  332|   220k|	int i, j;
  333|       |
  334|   220k|	int32_t Offset;
  335|   220k|	if (vo>=(-8<<4) && vo<=H) Offset = (vo>>4)*srcstride;
  ------------------
  |  Branch (335:6): [True: 219k, False: 606]
  |  Branch (335:21): [True: 218k, False: 1.15k]
  ------------------
  336|  1.76k|	else {
  337|  1.76k|		if (vo>H) Offset = ( H>>4)*srcstride;
  ------------------
  |  Branch (337:7): [True: 1.15k, False: 606]
  ------------------
  338|    606|		else Offset =-8*srcstride;
  339|  1.76k|		rrj = MTab[0];
  340|  1.76k|	}
  341|   220k|	if (uo>=(-8<<4) && uo<=W) Offset += (uo>>4);
  ------------------
  |  Branch (341:6): [True: 218k, False: 2.05k]
  |  Branch (341:21): [True: 195k, False: 23.3k]
  ------------------
  342|  25.3k|	else {
  343|  25.3k|		if (uo>W) Offset += ( W>>4);
  ------------------
  |  Branch (343:7): [True: 23.3k, False: 2.05k]
  ------------------
  344|  2.05k|		else Offset -= 8;
  345|  25.3k|		rri = MTab[0];
  346|  25.3k|	}
  347|       |
  348|   220k|	uDst += 8;
  349|   220k|	vDst += 8;
  350|  1.98M|	for(j=8; j>0; --j, Offset+=srcstride-8)
  ------------------
  |  Branch (350:11): [True: 1.76M, False: 220k]
  ------------------
  351|  1.76M|	{
  352|  15.8M|	for(i=-8; i<0; ++i, Offset++)
  ------------------
  |  Branch (352:12): [True: 14.1M, False: 1.76M]
  ------------------
  353|  14.1M|	{
  354|  14.1M|		uint32_t f0, f1;
  355|  14.1M|		f0	= uSrc[ Offset + 0 ];
  356|  14.1M|		f0 |= uSrc[ Offset + 1 ] << 16;
  357|  14.1M|		f1	= uSrc[ Offset + srcstride + 0 ];
  358|  14.1M|		f1 |= uSrc[ Offset + srcstride + 1 ] << 16;
  359|  14.1M|		f0 = (rri*f0)>>16;
  360|  14.1M|		f1 = (rri*f1) & 0x0fff0000;
  361|  14.1M|		f0 |= f1;
  362|  14.1M|		f0 = ( rrj*f0 + Rounder ) >> 24;
  363|  14.1M|		uDst[i] = (uint8_t)f0;
  364|       |
  365|  14.1M|		f0	= vSrc[ Offset + 0 ];
  366|  14.1M|		f0 |= vSrc[ Offset + 1 ] << 16;
  367|  14.1M|		f1	= vSrc[ Offset + srcstride + 0 ];
  368|  14.1M|		f1 |= vSrc[ Offset + srcstride + 1 ] << 16;
  369|  14.1M|		f0 = (rri*f0)>>16;
  370|  14.1M|		f1 = (rri*f1) & 0x0fff0000;
  371|  14.1M|		f0 |= f1;
  372|  14.1M|		f0 = ( rrj*f0 + Rounder ) >> 24;
  373|  14.1M|		vDst[i] = (uint8_t)f0;
  374|  14.1M|	}
  375|  1.76M|	uDst += dststride;
  376|  1.76M|	vDst += dststride;
  377|  1.76M|	}
  378|   220k|}
gmc.c:get_average_mv_1pt_C:
  383|   220k|{
  384|   220k|	mv->x = RSHIFT(Dsp->Uo<<qpel, 3);
  ------------------
  |  |   77|   220k|#define RSHIFT(a,b) ( (a)>0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b))
  |  |  ------------------
  |  |  |  Branch (77:23): [True: 25.2k, False: 195k]
  |  |  ------------------
  ------------------
  385|   220k|	mv->y = RSHIFT(Dsp->Vo<<qpel, 3);
  ------------------
  |  |   77|   220k|#define RSHIFT(a,b) ( (a)>0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b))
  |  |  ------------------
  |  |  |  Branch (77:23): [True: 3.63k, False: 216k]
  |  |  ------------------
  ------------------
  386|   220k|}
gmc.c:log2bin:
   49|  1.16k|{
   50|       |/* Changed by Chenm001 */
   51|  1.16k|#if !defined(_MSC_VER) || defined(ARCH_IS_X86_64)
   52|  1.16k|  int n = 0;
   53|       |
   54|  9.30k|  while (value) {
  ------------------
  |  Branch (54:10): [True: 8.13k, False: 1.16k]
  ------------------
   55|  8.13k|	value >>= 1;
   56|  8.13k|	n++;
   57|  8.13k|  }
   58|  1.16k|  return n;
   59|       |#else
   60|       |  __asm {
   61|       |	bsr eax, value
   62|       |	inc eax
   63|       |  }
   64|       |#endif
   65|  1.16k|}
gmc.c:get_average_mv_C:
  237|   161k|{
  238|   161k|	int i, j;
  239|   161k|	int vx = 0, vy = 0;
  240|   161k|	int32_t uo = Dsp->Uo + 16*(Dsp->dU[1]*y + Dsp->dU[0]*x);
  241|   161k|	int32_t vo = Dsp->Vo + 16*(Dsp->dV[1]*y + Dsp->dV[0]*x);
  242|  2.74M|	for (j=16; j>0; --j)
  ------------------
  |  Branch (242:13): [True: 2.58M, False: 161k]
  ------------------
  243|  2.58M|	{
  244|  2.58M|	int32_t U, V;
  245|  2.58M|	U = uo; uo += Dsp->dU[1];
  246|  2.58M|	V = vo; vo += Dsp->dV[1];
  247|  43.8M|	for (i=16; i>0; --i)
  ------------------
  |  Branch (247:13): [True: 41.2M, False: 2.58M]
  ------------------
  248|  41.2M|	{
  249|  41.2M|		int32_t u,v;
  250|  41.2M|		u = U >> 16; U += Dsp->dU[0]; vx += u;
  251|  41.2M|		v = V >> 16; V += Dsp->dV[0]; vy += v;
  252|  41.2M|	}
  253|  2.58M|	}
  254|   161k|	vx -= (256*x+120) << (5+Dsp->accuracy);	/* 120 = 15*16/2 */
  255|   161k|	vy -= (256*y+120) << (5+Dsp->accuracy);
  256|       |
  257|   161k|	mv->x = RSHIFT( vx, 8+Dsp->accuracy - qpel );
  ------------------
  |  |   77|   161k|#define RSHIFT(a,b) ( (a)>0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b))
  |  |  ------------------
  |  |  |  Branch (77:23): [True: 19.1k, False: 142k]
  |  |  ------------------
  ------------------
  258|   161k|	mv->y = RSHIFT( vy, 8+Dsp->accuracy - qpel );
  ------------------
  |  |   77|   161k|#define RSHIFT(a,b) ( (a)>0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b))
  |  |  ------------------
  |  |  |  Branch (77:23): [True: 16.9k, False: 144k]
  |  |  ------------------
  ------------------
  259|   161k|}

decoder.c:gmc_sanitize:
   68|   763k|{
   69|   763k|	int length = 1 << (fcode+4);
   70|       |
   71|       |#if 0
   72|       |	if (quarterpel) value *= 2;
   73|       |#endif
   74|       |
   75|   763k|	if (value < -length)
  ------------------
  |  Branch (75:6): [True: 77.7k, False: 685k]
  ------------------
   76|  77.7k|		return -length;
   77|   685k|	else if (value >= length)
  ------------------
  |  Branch (77:11): [True: 39.4k, False: 646k]
  ------------------
   78|  39.4k|		return length-1;
   79|   646k|	else return value;
   80|   763k|}

mbcoding.c:DPRINTF:
  282|   275M|static __inline void DPRINTF(int level, char *format, ...) {}
decoder.c:DPRINTF:
  282|   278M|static __inline void DPRINTF(int level, char *format, ...) {}
bitstream.c:DPRINTF:
  282|  25.3M|static __inline void DPRINTF(int level, char *format, ...) {}
mbprediction.c:DPRINTF:
  282|   275M|static __inline void DPRINTF(int level, char *format, ...) {}

predict_acdc:
   72|   272M|{
   73|   272M|	const int mbpos = (y * mb_width) + x;
   74|   272M|	int16_t *left, *top, *diag, *current;
   75|       |
   76|   272M|	int32_t left_quant = current_quant;
   77|   272M|	int32_t top_quant = current_quant;
   78|       |
   79|   272M|	const int16_t *pLeft = default_acdc_values;
   80|   272M|	const int16_t *pTop = default_acdc_values;
   81|   272M|	const int16_t *pDiag = default_acdc_values;
   82|       |
   83|   272M|	uint32_t index = x + y * mb_width;	/* current macroblock */
   84|   272M|	int *acpred_direction = &pMBs[index].acpred_directions[block];
   85|   272M|	uint32_t i;
   86|       |
   87|   272M|	left = top = diag = current = NULL;
   88|       |
   89|       |	/* grab left,top and diag macroblocks */
   90|       |
   91|       |	/* left macroblock */
   92|       |
   93|   272M|	if (x && mbpos >= bound + 1  &&
  ------------------
  |  Branch (93:6): [True: 269M, False: 3.27M]
  |  Branch (93:11): [True: 269M, False: 283k]
  ------------------
   94|   272M|		(pMBs[index - 1].mode == MODE_INTRA ||
  ------------------
  |  |   37|   538M|#define	MODE_INTRA		3
  ------------------
  |  Branch (94:4): [True: 394k, False: 268M]
  ------------------
   95|   269M|		 pMBs[index - 1].mode == MODE_INTRA_Q)) {
  ------------------
  |  |   38|   268M|#define MODE_INTRA_Q	4
  ------------------
  |  Branch (95:4): [True: 86.7k, False: 268M]
  ------------------
   96|       |
   97|   481k|		left = (int16_t*)pMBs[index - 1].pred_values[0];
   98|   481k|		left_quant = pMBs[index - 1].quant;
   99|   481k|	}
  100|       |	/* top macroblock */
  101|       |
  102|   272M|	if (mbpos >= bound + (int)mb_width &&
  ------------------
  |  Branch (102:6): [True: 268M, False: 4.00M]
  ------------------
  103|   272M|		(pMBs[index - mb_width].mode == MODE_INTRA ||
  ------------------
  |  |   37|   537M|#define	MODE_INTRA		3
  ------------------
  |  Branch (103:4): [True: 304k, False: 268M]
  ------------------
  104|   268M|		 pMBs[index - mb_width].mode == MODE_INTRA_Q)) {
  ------------------
  |  |   38|   268M|#define MODE_INTRA_Q	4
  ------------------
  |  Branch (104:4): [True: 54.3k, False: 268M]
  ------------------
  105|       |
  106|   359k|		top = (int16_t*)pMBs[index - mb_width].pred_values[0];
  107|   359k|		top_quant = pMBs[index - mb_width].quant;
  108|   359k|	}
  109|       |	/* diag macroblock */
  110|       |
  111|   272M|	if (x && mbpos >= bound + (int)mb_width + 1 &&
  ------------------
  |  Branch (111:6): [True: 269M, False: 3.27M]
  |  Branch (111:11): [True: 265M, False: 3.94M]
  ------------------
  112|   272M|		(pMBs[index - 1 - mb_width].mode == MODE_INTRA ||
  ------------------
  |  |   37|   530M|#define	MODE_INTRA		3
  ------------------
  |  Branch (112:4): [True: 269k, False: 265M]
  ------------------
  113|   265M|		 pMBs[index - 1 - mb_width].mode == MODE_INTRA_Q)) {
  ------------------
  |  |   38|   265M|#define MODE_INTRA_Q	4
  ------------------
  |  Branch (113:4): [True: 40.0k, False: 265M]
  ------------------
  114|       |
  115|   309k|		diag = (int16_t*)pMBs[index - 1 - mb_width].pred_values[0];
  116|   309k|	}
  117|       |
  118|   272M|	current = (int16_t*)pMBs[index].pred_values[0];
  119|       |
  120|       |	/* now grab pLeft, pTop, pDiag _blocks_ */
  121|       |
  122|   272M|	switch (block) {
  ------------------
  |  Branch (122:10): [True: 0, False: 272M]
  ------------------
  123|       |
  124|  45.4M|	case 0:
  ------------------
  |  Branch (124:2): [True: 45.4M, False: 227M]
  ------------------
  125|  45.4M|		if (left)
  ------------------
  |  Branch (125:7): [True: 80.2k, False: 45.3M]
  ------------------
  126|  80.2k|			pLeft = left + MBPRED_SIZE;
  ------------------
  |  |  180|  80.2k|#define MBPRED_SIZE  15
  ------------------
  127|       |
  128|  45.4M|		if (top)
  ------------------
  |  Branch (128:7): [True: 59.8k, False: 45.3M]
  ------------------
  129|  59.8k|			pTop = top + (MBPRED_SIZE << 1);
  ------------------
  |  |  180|  59.8k|#define MBPRED_SIZE  15
  ------------------
  130|       |
  131|  45.4M|		if (diag)
  ------------------
  |  Branch (131:7): [True: 51.6k, False: 45.3M]
  ------------------
  132|  51.6k|			pDiag = diag + 3 * MBPRED_SIZE;
  ------------------
  |  |  180|  51.6k|#define MBPRED_SIZE  15
  ------------------
  133|       |
  134|  45.4M|		break;
  135|       |
  136|  45.4M|	case 1:
  ------------------
  |  Branch (136:2): [True: 45.4M, False: 227M]
  ------------------
  137|  45.4M|		pLeft = current;
  138|  45.4M|		left_quant = current_quant;
  139|       |
  140|  45.4M|		if (top) {
  ------------------
  |  Branch (140:7): [True: 59.8k, False: 45.3M]
  ------------------
  141|  59.8k|			pTop = top + 3 * MBPRED_SIZE;
  ------------------
  |  |  180|  59.8k|#define MBPRED_SIZE  15
  ------------------
  142|  59.8k|			pDiag = top + (MBPRED_SIZE << 1);
  ------------------
  |  |  180|  59.8k|#define MBPRED_SIZE  15
  ------------------
  143|  59.8k|		}
  144|  45.4M|		break;
  145|       |
  146|  45.4M|	case 2:
  ------------------
  |  Branch (146:2): [True: 45.4M, False: 227M]
  ------------------
  147|  45.4M|		if (left) {
  ------------------
  |  Branch (147:7): [True: 80.2k, False: 45.3M]
  ------------------
  148|  80.2k|			pLeft = left + 3 * MBPRED_SIZE;
  ------------------
  |  |  180|  80.2k|#define MBPRED_SIZE  15
  ------------------
  149|  80.2k|			pDiag = left + MBPRED_SIZE;
  ------------------
  |  |  180|  80.2k|#define MBPRED_SIZE  15
  ------------------
  150|  80.2k|		}
  151|       |
  152|  45.4M|		pTop = current;
  153|  45.4M|		top_quant = current_quant;
  154|       |
  155|  45.4M|		break;
  156|       |
  157|  45.4M|	case 3:
  ------------------
  |  Branch (157:2): [True: 45.4M, False: 227M]
  ------------------
  158|  45.4M|		pLeft = current + (MBPRED_SIZE << 1);
  ------------------
  |  |  180|  45.4M|#define MBPRED_SIZE  15
  ------------------
  159|  45.4M|		left_quant = current_quant;
  160|       |
  161|  45.4M|		pTop = current + MBPRED_SIZE;
  ------------------
  |  |  180|  45.4M|#define MBPRED_SIZE  15
  ------------------
  162|  45.4M|		top_quant = current_quant;
  163|       |
  164|  45.4M|		pDiag = current;
  165|       |
  166|  45.4M|		break;
  167|       |
  168|  45.4M|	case 4:
  ------------------
  |  Branch (168:2): [True: 45.4M, False: 227M]
  ------------------
  169|  45.4M|		if (left)
  ------------------
  |  Branch (169:7): [True: 80.2k, False: 45.3M]
  ------------------
  170|  80.2k|			pLeft = left + (MBPRED_SIZE << 2);
  ------------------
  |  |  180|  80.2k|#define MBPRED_SIZE  15
  ------------------
  171|  45.4M|		if (top)
  ------------------
  |  Branch (171:7): [True: 59.8k, False: 45.3M]
  ------------------
  172|  59.8k|			pTop = top + (MBPRED_SIZE << 2);
  ------------------
  |  |  180|  59.8k|#define MBPRED_SIZE  15
  ------------------
  173|  45.4M|		if (diag)
  ------------------
  |  Branch (173:7): [True: 51.6k, False: 45.3M]
  ------------------
  174|  51.6k|			pDiag = diag + (MBPRED_SIZE << 2);
  ------------------
  |  |  180|  51.6k|#define MBPRED_SIZE  15
  ------------------
  175|  45.4M|		break;
  176|       |
  177|  45.4M|	case 5:
  ------------------
  |  Branch (177:2): [True: 45.4M, False: 227M]
  ------------------
  178|  45.4M|		if (left)
  ------------------
  |  Branch (178:7): [True: 80.2k, False: 45.3M]
  ------------------
  179|  80.2k|			pLeft = left + 5 * MBPRED_SIZE;
  ------------------
  |  |  180|  80.2k|#define MBPRED_SIZE  15
  ------------------
  180|  45.4M|		if (top)
  ------------------
  |  Branch (180:7): [True: 59.8k, False: 45.3M]
  ------------------
  181|  59.8k|			pTop = top + 5 * MBPRED_SIZE;
  ------------------
  |  |  180|  59.8k|#define MBPRED_SIZE  15
  ------------------
  182|  45.4M|		if (diag)
  ------------------
  |  Branch (182:7): [True: 51.6k, False: 45.3M]
  ------------------
  183|  51.6k|			pDiag = diag + 5 * MBPRED_SIZE;
  ------------------
  |  |  180|  51.6k|#define MBPRED_SIZE  15
  ------------------
  184|  45.4M|		break;
  185|   272M|	}
  186|       |
  187|       |	/* determine ac prediction direction & ac/dc predictor place rescaled ac/dc
  188|       |	 * predictions into predictors[] for later use */
  189|   272M|	if (abs(pLeft[0] - pDiag[0]) < abs(pDiag[0] - pTop[0])) {
  ------------------
  |  Branch (189:6): [True: 33.0M, False: 239M]
  ------------------
  190|  33.0M|		*acpred_direction = 1;	/* vertical */
  191|  33.0M|		predictors[0] = DIV_DIV(pTop[0], iDcScaler);
  ------------------
  |  |  263|  33.0M|#define DIV_DIV(a,b)    (((a)>0) ? ((a)+((b)>>1))/(b) : ((a)-((b)>>1))/(b))
  |  |  ------------------
  |  |  |  Branch (263:26): [True: 29.1M, False: 3.90M]
  |  |  ------------------
  ------------------
  192|   264M|		for (i = 1; i < 8; i++) {
  ------------------
  |  Branch (192:15): [True: 231M, False: 33.0M]
  ------------------
  193|   231M|			predictors[i] = rescale(top_quant, current_quant, pTop[i]);
  194|   231M|		}
  195|   239M|	} else {
  196|   239M|		*acpred_direction = 2;	/* horizontal */
  197|   239M|		predictors[0] = DIV_DIV(pLeft[0], iDcScaler);
  ------------------
  |  |  263|   239M|#define DIV_DIV(a,b)    (((a)>0) ? ((a)+((b)>>1))/(b) : ((a)-((b)>>1))/(b))
  |  |  ------------------
  |  |  |  Branch (263:26): [True: 231M, False: 7.82M]
  |  |  ------------------
  ------------------
  198|  1.91G|		for (i = 1; i < 8; i++) {
  ------------------
  |  Branch (198:15): [True: 1.67G, False: 239M]
  ------------------
  199|  1.67G|			predictors[i] = rescale(left_quant, current_quant, pLeft[i + 7]);
  200|  1.67G|		}
  201|   239M|	}
  202|   272M|}
add_acdc:
  220|   272M|{
  221|   272M|	uint8_t acpred_direction = pMB->acpred_directions[block];
  222|   272M|	int16_t *pCurrent = (int16_t*)pMB->pred_values[block];
  223|   272M|	uint32_t i;
  224|       |
  225|   272M|	DPRINTF(XVID_DEBUG_COEFF,"predictor[0] %i\n", predictors[0]);
  ------------------
  |  |  201|   272M|#define XVID_DEBUG_COEFF     (1<< 5)
  ------------------
  226|       |
  227|   272M|	dct_codes[0] += predictors[0];	/* dc prediction */
  228|   272M|	pCurrent[0] = dct_codes[0]*iDcScaler;
  229|   272M|	if (bsversion > BS_VERSION_BUGGY_DC_CLIPPING) {
  ------------------
  |  |  211|   272M|#define BS_VERSION_BUGGY_DC_CLIPPING 34
  ------------------
  |  Branch (229:6): [True: 267M, False: 4.75M]
  ------------------
  230|   267M|		pCurrent[0] = CLIP(pCurrent[0], -2048, 2047);
  ------------------
  |  |  262|   267M|#define CLIP(X,AMIN,AMAX)   (((X)<(AMIN)) ? (AMIN) : ((X)>(AMAX)) ? (AMAX) : (X))
  |  |  ------------------
  |  |  |  Branch (262:30): [True: 85.3k, False: 267M]
  |  |  |  Branch (262:54): [True: 318k, False: 267M]
  |  |  ------------------
  ------------------
  231|   267M|	}
  232|       |
  233|   272M|	if (acpred_direction == 1) {
  ------------------
  |  Branch (233:6): [True: 97.1k, False: 272M]
  ------------------
  234|   776k|		for (i = 1; i < 8; i++) {
  ------------------
  |  Branch (234:15): [True: 679k, False: 97.1k]
  ------------------
  235|   679k|			int level = dct_codes[i] + predictors[i];
  236|       |
  237|   679k|			DPRINTF(XVID_DEBUG_COEFF,"predictor[%i] %i\n",i, predictors[i]);
  ------------------
  |  |  201|   679k|#define XVID_DEBUG_COEFF     (1<< 5)
  ------------------
  238|       |
  239|   679k|			dct_codes[i] = level;
  240|   679k|			pCurrent[i] = level;
  241|   679k|			pCurrent[i + 7] = dct_codes[i * 8];
  242|   679k|		}
  243|   272M|	} else if (acpred_direction == 2) {
  ------------------
  |  Branch (243:13): [True: 271k, False: 272M]
  ------------------
  244|  2.17M|		for (i = 1; i < 8; i++) {
  ------------------
  |  Branch (244:15): [True: 1.90M, False: 271k]
  ------------------
  245|  1.90M|			int level = dct_codes[i * 8] + predictors[i];
  246|  1.90M|			DPRINTF(XVID_DEBUG_COEFF,"predictor[%i] %i\n",i*8, predictors[i]);
  ------------------
  |  |  201|  1.90M|#define XVID_DEBUG_COEFF     (1<< 5)
  ------------------
  247|       |
  248|  1.90M|			dct_codes[i * 8] = level;
  249|  1.90M|			pCurrent[i + 7] = level;
  250|  1.90M|			pCurrent[i] = dct_codes[i];
  251|  1.90M|		}
  252|   272M|	} else {
  253|  2.17G|		for (i = 1; i < 8; i++) {
  ------------------
  |  Branch (253:15): [True: 1.90G, False: 272M]
  ------------------
  254|  1.90G|			pCurrent[i] = dct_codes[i];
  255|  1.90G|			pCurrent[i + 7] = dct_codes[i * 8];
  256|  1.90G|		}
  257|   272M|	}
  258|   272M|}
get_pmv2:
  460|   220k|{
  461|   220k|	int lx, ly, lz;		/* left */
  462|   220k|	int tx, ty, tz;		/* top */
  463|   220k|	int rx, ry, rz;		/* top-right */
  464|   220k|	int lpos, tpos, rpos;
  465|   220k|	int num_cand = 0, last_cand = 1;
  466|       |
  467|   220k|	VECTOR pmv[4];	/* left neighbour, top neighbour, top-right neighbour */
  468|       |
  469|   220k|	switch (block) {
  470|   109k|	case 0:
  ------------------
  |  Branch (470:2): [True: 109k, False: 110k]
  ------------------
  471|   109k|		lx = x - 1;	ly = y;		lz = 1;
  472|   109k|		tx = x;		ty = y - 1;	tz = 2;
  473|   109k|		rx = x + 1;	ry = y - 1;	rz = 2;
  474|   109k|		break;
  475|  36.9k|	case 1:
  ------------------
  |  Branch (475:2): [True: 36.9k, False: 183k]
  ------------------
  476|  36.9k|		lx = x;		ly = y;		lz = 0;
  477|  36.9k|		tx = x;		ty = y - 1;	tz = 3;
  478|  36.9k|		rx = x + 1;	ry = y - 1;	rz = 2;
  479|  36.9k|		break;
  480|  36.9k|	case 2:
  ------------------
  |  Branch (480:2): [True: 36.9k, False: 183k]
  ------------------
  481|  36.9k|		lx = x - 1;	ly = y;		lz = 3;
  482|  36.9k|		tx = x;		ty = y;		tz = 0;
  483|  36.9k|		rx = x;		ry = y;		rz = 1;
  484|  36.9k|		break;
  485|  36.9k|	default:
  ------------------
  |  Branch (485:2): [True: 36.9k, False: 183k]
  ------------------
  486|  36.9k|		lx = x;		ly = y;		lz = 2;
  487|  36.9k|		tx = x;		ty = y;		tz = 0;
  488|  36.9k|		rx = x;		ry = y;		rz = 1;
  489|   220k|	}
  490|       |
  491|   220k|	lpos = lx + ly * mb_width;
  492|   220k|	rpos = rx + ry * mb_width;
  493|   220k|	tpos = tx + ty * mb_width;
  494|       |
  495|   220k|	if (lpos >= bound && lx >= 0) {
  ------------------
  |  Branch (495:6): [True: 208k, False: 12.1k]
  |  Branch (495:23): [True: 197k, False: 10.5k]
  ------------------
  496|   197k|		num_cand++;
  497|   197k|		pmv[1] = mbs[lpos].mvs[lz];
  498|   197k|	} else pmv[1] = zeroMV;
  499|       |
  500|   220k|	if (tpos >= bound) {
  ------------------
  |  Branch (500:6): [True: 124k, False: 96.3k]
  ------------------
  501|   124k|		num_cand++;
  502|   124k|		last_cand = 2;
  503|   124k|		pmv[2] = mbs[tpos].mvs[tz];
  504|   124k|	} else pmv[2] = zeroMV;
  505|       |
  506|   220k|	if (rpos >= bound && rx < mb_width) {
  ------------------
  |  Branch (506:6): [True: 125k, False: 94.6k]
  |  Branch (506:23): [True: 113k, False: 12.1k]
  ------------------
  507|   113k|		num_cand++;
  508|   113k|		last_cand = 3;
  509|   113k|		pmv[3] = mbs[rpos].mvs[rz];
  510|   113k|	} else pmv[3] = zeroMV;
  511|       |
  512|       |	/* If there're more than one candidate, we return the median vector */
  513|       |
  514|   220k|	if (num_cand > 1) {
  ------------------
  |  Branch (514:6): [True: 123k, False: 96.8k]
  ------------------
  515|       |		/* set median */
  516|   123k|		pmv[0].x =
  517|   123k|			MIN(MAX(pmv[1].x, pmv[2].x),
  ------------------
  |  |   34|   990k|#define MIN(X, Y) ((X)<(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (34:20): [True: 15.7k, False: 108k]
  |  |  |  Branch (34:21): [True: 30.3k, False: 93.4k]
  |  |  |  Branch (34:25): [True: 27.6k, False: 96.2k]
  |  |  |  Branch (34:25): [True: 34.2k, False: 89.5k]
  |  |  |  Branch (34:25): [True: 3.66k, False: 18.8k]
  |  |  |  Branch (34:25): [True: 11.6k, False: 89.5k]
  |  |  |  Branch (34:25): [True: 22.5k, False: 101k]
  |  |  |  Branch (34:29): [True: 3.05k, False: 12.6k]
  |  |  |  Branch (34:33): [True: 27.6k, False: 80.5k]
  |  |  |  Branch (34:33): [True: 34.2k, False: 73.8k]
  |  |  |  Branch (34:33): [True: 3.66k, False: 18.8k]
  |  |  |  Branch (34:33): [True: 11.6k, False: 73.8k]
  |  |  |  Branch (34:33): [True: 22.5k, False: 85.5k]
  |  |  ------------------
  ------------------
  518|   123k|				MIN(MAX(pmv[2].x, pmv[3].x), MAX(pmv[1].x, pmv[3].x)));
  519|   123k|		pmv[0].y =
  520|   123k|			MIN(MAX(pmv[1].y, pmv[2].y),
  ------------------
  |  |   34|   990k|#define MIN(X, Y) ((X)<(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (34:20): [True: 14.1k, False: 109k]
  |  |  |  Branch (34:21): [True: 28.1k, False: 95.7k]
  |  |  |  Branch (34:25): [True: 15.3k, False: 108k]
  |  |  |  Branch (34:25): [True: 24.1k, False: 99.6k]
  |  |  |  Branch (34:25): [True: 2.57k, False: 14.4k]
  |  |  |  Branch (34:25): [True: 7.17k, False: 99.6k]
  |  |  |  Branch (34:25): [True: 16.9k, False: 106k]
  |  |  |  Branch (34:29): [True: 2.70k, False: 11.4k]
  |  |  |  Branch (34:33): [True: 15.3k, False: 94.2k]
  |  |  |  Branch (34:33): [True: 24.1k, False: 85.4k]
  |  |  |  Branch (34:33): [True: 2.57k, False: 14.4k]
  |  |  |  Branch (34:33): [True: 7.17k, False: 85.4k]
  |  |  |  Branch (34:33): [True: 16.9k, False: 92.6k]
  |  |  ------------------
  ------------------
  521|   123k|				MIN(MAX(pmv[2].y, pmv[3].y), MAX(pmv[1].y, pmv[3].y)));
  522|   123k|		return pmv[0];
  523|   123k|	}
  524|       |
  525|  96.8k|	return pmv[last_cand];	/* no point calculating median mv */
  526|   220k|}
get_pmv2_interlaced:
  534|  75.1k|{
  535|  75.1k|  int lx, ly, lz;   /* left */
  536|  75.1k|  int tx, ty, tz;   /* top */
  537|  75.1k|  int rx, ry, rz;   /* top-right */
  538|  75.1k|  int lpos, tpos, rpos;
  539|  75.1k|  int num_cand = 0, last_cand = 1;
  540|       |
  541|  75.1k|  VECTOR pmv[4];  /* left neighbour, top neighbour, top-right neighbour */
  542|       |
  543|  75.1k|  lx=x-1; ly=y;   lz=1;
  544|  75.1k|  tx=x;   ty=y-1; tz=2;
  545|  75.1k|  rx=x+1; ry=y-1; rz=2;
  546|       |
  547|  75.1k|  lpos=lx+ly*mb_width;
  548|  75.1k|  rpos=rx+ry*mb_width;
  549|  75.1k|  tpos=tx+ty*mb_width;
  550|       |
  551|  75.1k|  if(lx>=0 && lpos>=bound) 
  ------------------
  |  Branch (551:6): [True: 60.9k, False: 14.2k]
  |  Branch (551:15): [True: 60.5k, False: 418]
  ------------------
  552|  60.5k|  {
  553|  60.5k|    num_cand++;
  554|  60.5k|    if(mbs[lpos].field_pred)
  ------------------
  |  Branch (554:8): [True: 4.51k, False: 56.0k]
  ------------------
  555|  4.51k|     pmv[1] = mbs[lpos].mvs_avg;
  556|  56.0k|    else 
  557|  56.0k|     pmv[1] = mbs[lpos].mvs[lz];
  558|  60.5k|  }
  559|  14.6k|  else 
  560|  14.6k|  {
  561|  14.6k|    pmv[1] = zeroMV;
  562|  14.6k|  }  
  563|       |
  564|  75.1k|  if(tpos>=bound) 
  ------------------
  |  Branch (564:6): [True: 48.6k, False: 26.5k]
  ------------------
  565|  48.6k|  {
  566|  48.6k|    num_cand++;
  567|  48.6k|    last_cand=2;
  568|  48.6k|    if(mbs[tpos].field_pred)
  ------------------
  |  Branch (568:8): [True: 6.31k, False: 42.3k]
  ------------------
  569|  6.31k|     pmv[2] = mbs[tpos].mvs_avg;
  570|  42.3k|    else
  571|  42.3k|     pmv[2] = mbs[tpos].mvs[tz];
  572|  48.6k|  } 
  573|  26.5k|  else
  574|  26.5k|  { 
  575|  26.5k|    pmv[2] = zeroMV;
  576|  26.5k|  }
  577|       |        
  578|  75.1k|  if(rx<mb_width && rpos>=bound) 
  ------------------
  |  Branch (578:6): [True: 60.3k, False: 14.7k]
  |  Branch (578:21): [True: 34.5k, False: 25.8k]
  ------------------
  579|  34.5k|  {
  580|  34.5k|    num_cand++;
  581|  34.5k|    last_cand = 3;
  582|  34.5k|    if(mbs[rpos].field_pred)
  ------------------
  |  Branch (582:8): [True: 3.97k, False: 30.5k]
  ------------------
  583|  3.97k|     pmv[3] = mbs[rpos].mvs_avg;
  584|  30.5k|    else
  585|  30.5k|     pmv[3] = mbs[rpos].mvs[rz];
  586|  34.5k|  } 
  587|  40.5k|  else
  588|  40.5k|  { 
  589|  40.5k|    pmv[3] = zeroMV;
  590|  40.5k|  }  
  591|       |
  592|       |  /* If there're more than one candidate, we return the median vector */
  593|  75.1k|  if(num_cand>1) 
  ------------------
  |  Branch (593:6): [True: 47.2k, False: 27.9k]
  ------------------
  594|  47.2k|  {
  595|       |    /* set median */
  596|  47.2k|    pmv[0].x = MIN(MAX(pmv[1].x, pmv[2].x),
  ------------------
  |  |   34|   377k|#define MIN(X, Y) ((X)<(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (34:20): [True: 5.76k, False: 41.4k]
  |  |  |  Branch (34:21): [True: 8.53k, False: 38.7k]
  |  |  |  Branch (34:25): [True: 9.80k, False: 37.4k]
  |  |  |  Branch (34:25): [True: 12.2k, False: 34.9k]
  |  |  |  Branch (34:25): [True: 1.40k, False: 4.80k]
  |  |  |  Branch (34:25): [True: 6.08k, False: 34.9k]
  |  |  |  Branch (34:25): [True: 6.21k, False: 41.0k]
  |  |  |  Branch (34:29): [True: 928, False: 4.83k]
  |  |  |  Branch (34:33): [True: 9.80k, False: 31.6k]
  |  |  |  Branch (34:33): [True: 12.2k, False: 29.1k]
  |  |  |  Branch (34:33): [True: 1.40k, False: 4.80k]
  |  |  |  Branch (34:33): [True: 6.08k, False: 29.1k]
  |  |  |  Branch (34:33): [True: 6.21k, False: 35.2k]
  |  |  ------------------
  ------------------
  597|  47.2k|               MIN(MAX(pmv[2].x, pmv[3].x), MAX(pmv[1].x, pmv[3].x)));
  598|  47.2k|    pmv[0].y = MIN(MAX(pmv[1].y, pmv[2].y),
  ------------------
  |  |   34|   377k|#define MIN(X, Y) ((X)<(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (34:20): [True: 5.80k, False: 41.4k]
  |  |  |  Branch (34:21): [True: 8.97k, False: 38.2k]
  |  |  |  Branch (34:25): [True: 8.85k, False: 38.3k]
  |  |  |  Branch (34:25): [True: 11.4k, False: 35.8k]
  |  |  |  Branch (34:25): [True: 1.49k, False: 4.73k]
  |  |  |  Branch (34:25): [True: 5.18k, False: 35.8k]
  |  |  |  Branch (34:25): [True: 6.22k, False: 41.0k]
  |  |  |  Branch (34:29): [True: 786, False: 5.02k]
  |  |  |  Branch (34:33): [True: 8.85k, False: 32.5k]
  |  |  |  Branch (34:33): [True: 11.4k, False: 30.0k]
  |  |  |  Branch (34:33): [True: 1.49k, False: 4.73k]
  |  |  |  Branch (34:33): [True: 5.18k, False: 30.0k]
  |  |  |  Branch (34:33): [True: 6.22k, False: 35.2k]
  |  |  ------------------
  ------------------
  599|  47.2k|               MIN(MAX(pmv[2].y, pmv[3].y), MAX(pmv[1].y, pmv[3].y)));
  600|       |          
  601|  47.2k|    return pmv[0];
  602|  47.2k|  }
  603|       |
  604|  27.9k|  return pmv[last_cand];  /* no point calculating median mv */
  605|  75.1k|}
mbprediction.c:rescale:
   42|  1.90G|{
   43|  1.90G|	return (coeff != 0) ? DIV_DIV((coeff) * (predict_quant),
  ------------------
  |  |  263|   649k|#define DIV_DIV(a,b)    (((a)>0) ? ((a)+((b)>>1))/(b) : ((a)-((b)>>1))/(b))
  |  |  ------------------
  |  |  |  Branch (263:26): [True: 186k, False: 462k]
  |  |  ------------------
  ------------------
  |  Branch (43:9): [True: 649k, False: 1.90G]
  ------------------
   44|  1.90G|								  (current_quant)) : 0;
   45|  1.90G|}

dequant_h263_intra_c:
  161|   158M|{
  162|   158M|	const int32_t quant_m_2 = quant << 1;
  163|   158M|	const int32_t quant_add = (quant & 1 ? quant : quant - 1);
  ------------------
  |  Branch (163:29): [True: 100M, False: 57.7M]
  ------------------
  164|   158M|	int i;
  165|       |
  166|   158M|	data[0] = coeff[0] * dcscalar;
  167|   158M|	if (data[0] < -2048) {
  ------------------
  |  Branch (167:6): [True: 29.2k, False: 158M]
  ------------------
  168|  29.2k|		data[0] = -2048;
  169|   158M|	} else if (data[0] > 2047) {
  ------------------
  |  Branch (169:13): [True: 144k, False: 158M]
  ------------------
  170|   144k|		data[0] = 2047;
  171|   144k|	}
  172|       |
  173|  10.1G|	for (i = 1; i < 64; i++) {
  ------------------
  |  Branch (173:14): [True: 9.97G, False: 158M]
  ------------------
  174|  9.97G|		int32_t acLevel = coeff[i];
  175|       |
  176|  9.97G|		if (acLevel == 0) {
  ------------------
  |  Branch (176:7): [True: 9.97G, False: 876k]
  ------------------
  177|  9.97G|			data[i] = 0;
  178|  9.97G|		} else if (acLevel < 0) {
  ------------------
  |  Branch (178:14): [True: 564k, False: 311k]
  ------------------
  179|   564k|			acLevel = quant_m_2 * -acLevel + quant_add;
  180|   564k|			data[i] = (acLevel <= 2048 ? -acLevel : -2048);
  ------------------
  |  Branch (180:15): [True: 547k, False: 17.6k]
  ------------------
  181|   564k|		} else {
  182|   311k|			acLevel = quant_m_2 * acLevel + quant_add;
  183|   311k|			data[i] = (acLevel <= 2047 ? acLevel : 2047);
  ------------------
  |  Branch (183:15): [True: 297k, False: 13.9k]
  ------------------
  184|   311k|		}
  185|  9.97G|	}
  186|       |
  187|   158M|	return(0);
  188|   158M|}

get_intra_matrix:
   61|   114M|{
   62|   114M|	return(mpeg_quant_matrices + 0*64);
   63|   114M|}
get_inter_matrix:
   67|   526k|{
   68|   526k|	return(mpeg_quant_matrices + 4*64);
   69|   526k|}
get_default_intra_matrix:
   73|  8.96k|{
   74|  8.96k|	return default_intra_matrix;
   75|  8.96k|}
get_default_inter_matrix:
   79|  10.7k|{
   80|  10.7k|	return default_inter_matrix;
   81|  10.7k|}
set_intra_matrix:
  113|  22.3k|{
  114|  22.3k|	int i;
  115|  22.3k|	uint16_t *intra_matrix = mpeg_quant_matrices + 0*64;
  116|       |
  117|  1.45M|	for (i = 0; i < 64; i++) {
  ------------------
  |  Branch (117:14): [True: 1.43M, False: 22.3k]
  ------------------
  118|  1.43M|		intra_matrix[i] = (!i) ? (uint16_t)8: (uint16_t)MAX(1, matrix[i]);
  ------------------
  |  |  258|  2.84M|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (258:20): [True: 76.1k, False: 1.33M]
  |  |  ------------------
  ------------------
  |  Branch (118:21): [True: 22.3k, False: 1.40M]
  ------------------
  119|  1.43M|	}
  120|  22.3k|}
set_inter_matrix:
  137|  22.3k|{
  138|  22.3k|	int i;
  139|  22.3k|	uint16_t *inter_matrix = mpeg_quant_matrices + 4*64;
  140|  22.3k|	uint16_t *inter_matrix1 = mpeg_quant_matrices + 5*64;
  141|  22.3k|	uint16_t *inter_matrix_fix = mpeg_quant_matrices + 6*64;
  142|  22.3k|	uint16_t *inter_matrix_fixl = mpeg_quant_matrices + 7*64;
  143|       |
  144|  1.45M|	for (i = 0; i < 64; i++) {
  ------------------
  |  Branch (144:14): [True: 1.43M, False: 22.3k]
  ------------------
  145|  1.43M|		inter_matrix1[i] = ((inter_matrix[i] = (int16_t)MAX(1, matrix[i]))>>1);
  ------------------
  |  |  258|  1.43M|#define MAX(X, Y) ((X)>(Y)?(X):(Y))
  |  |  ------------------
  |  |  |  Branch (258:20): [True: 48.6k, False: 1.38M]
  |  |  ------------------
  ------------------
  146|  1.43M|		inter_matrix1[i] += ((inter_matrix[i] == 1) ? 1: 0);
  ------------------
  |  Branch (146:24): [True: 50.1k, False: 1.38M]
  ------------------
  147|  1.43M|		inter_matrix_fix[i] = (uint16_t) FIX(inter_matrix[i]);
  ------------------
  |  |   30|  1.43M|#define FIX(X)   (((X)==1) ? 0xFFFF : ((1UL << 16) / (X) + 1))
  |  |  ------------------
  |  |  |  Branch (30:19): [True: 50.1k, False: 1.38M]
  |  |  ------------------
  ------------------
  148|  1.43M|		inter_matrix_fixl[i] = (uint16_t) FIXL(inter_matrix[i]);
  ------------------
  |  |   31|  1.43M|#define FIXL(X)    ((1UL << 16) / (X) - 1)
  ------------------
  149|  1.43M|	}
  150|  22.3k|}
init_mpeg_matrix:
  153|  10.2k|init_mpeg_matrix(uint16_t * mpeg_quant_matrices) {
  154|       |
  155|  10.2k|	set_intra_matrix(mpeg_quant_matrices, default_intra_matrix);
  156|  10.2k|	set_inter_matrix(mpeg_quant_matrices, default_inter_matrix);
  157|  10.2k|}

dequant_mpeg_intra_c:
  145|   114M|{
  146|   114M|	const uint16_t *intra_matrix = get_intra_matrix(mpeg_quant_matrices);
  147|   114M|	int i;
  148|       |
  149|   114M|	data[0] = coeff[0] * dcscalar;
  150|   114M|	if (data[0] < -2048) {
  ------------------
  |  Branch (150:6): [True: 69.3k, False: 114M]
  ------------------
  151|  69.3k|		data[0] = -2048;
  152|   114M|	} else if (data[0] > 2047) {
  ------------------
  |  Branch (152:13): [True: 195k, False: 114M]
  ------------------
  153|   195k|		data[0] = 2047;
  154|   195k|	}
  155|       |
  156|  7.31G|	for (i = 1; i < 64; i++) {
  ------------------
  |  Branch (156:14): [True: 7.20G, False: 114M]
  ------------------
  157|  7.20G|		if (coeff[i] == 0) {
  ------------------
  |  Branch (157:7): [True: 7.20G, False: 1.48M]
  ------------------
  158|  7.20G|			data[i] = 0;
  159|  7.20G|		} else if (coeff[i] < 0) {
  ------------------
  |  Branch (159:14): [True: 1.06M, False: 426k]
  ------------------
  160|  1.06M|			uint32_t level = -coeff[i];
  161|       |
  162|  1.06M|			level = (level * intra_matrix[i] * quant) >> 3;
  163|  1.06M|			data[i] = (level <= 2048 ? -(int16_t) level : -2048);
  ------------------
  |  Branch (163:15): [True: 409k, False: 651k]
  ------------------
  164|  1.06M|		} else {
  165|   426k|			uint32_t level = coeff[i];
  166|       |
  167|   426k|			level = (level * intra_matrix[i] * quant) >> 3;
  168|   426k|			data[i] = (level <= 2047 ? level : 2047);
  ------------------
  |  Branch (168:15): [True: 274k, False: 151k]
  ------------------
  169|   426k|		}
  170|  7.20G|	}
  171|       |
  172|   114M|	return(0);
  173|   114M|}

emms_c:
   47|  80.2k|{
   48|  80.2k|}

xvid_malloc:
   50|   508k|{
   51|   508k|	uint8_t *mem_ptr;
   52|       |
   53|   508k|	if (!alignment) {
  ------------------
  |  Branch (53:6): [True: 0, False: 508k]
  ------------------
   54|       |
   55|       |		/* We have not to satisfy any alignment */
   56|      0|		if ((mem_ptr = (uint8_t *) malloc(size + 1)) != NULL) {
  ------------------
  |  Branch (56:7): [True: 0, False: 0]
  ------------------
   57|       |
   58|       |			/* Store (mem_ptr - "real allocated memory") in *(mem_ptr-1) */
   59|      0|			*mem_ptr = (uint8_t)1;
   60|       |
   61|       |			/* Return the mem_ptr pointer */
   62|      0|			return ((void *)(mem_ptr+1));
   63|      0|		}
   64|   508k|	} else {
   65|   508k|		uint8_t *tmp;
   66|       |
   67|       |		/* Allocate the required size memory + alignment so we
   68|       |		 * can realign the data if necessary */
   69|   508k|		if ((tmp = (uint8_t *) malloc(size + alignment)) != NULL) {
  ------------------
  |  Branch (69:7): [True: 508k, False: 0]
  ------------------
   70|       |
   71|       |			/* Align the tmp pointer */
   72|   508k|			mem_ptr =
   73|   508k|				(uint8_t *) ((ptr_t) (tmp + alignment - 1) &
   74|   508k|							 (~(ptr_t) (alignment - 1)));
   75|       |
   76|       |			/* Special case where malloc have already satisfied the alignment
   77|       |			 * We must add alignment to mem_ptr because we must store
   78|       |			 * (mem_ptr - tmp) in *(mem_ptr-1)
   79|       |			 * If we do not add alignment to mem_ptr then *(mem_ptr-1) points
   80|       |			 * to a forbidden memory space */
   81|   508k|			if (mem_ptr == tmp)
  ------------------
  |  Branch (81:8): [True: 132k, False: 376k]
  ------------------
   82|   132k|				mem_ptr += alignment;
   83|       |
   84|       |			/* (mem_ptr - tmp) is stored in *(mem_ptr-1) so we are able to retrieve
   85|       |			 * the real malloc block allocated and free it in xvid_free */
   86|   508k|			*(mem_ptr - 1) = (uint8_t) (mem_ptr - tmp);
   87|       |
   88|       |			/* Return the aligned pointer */
   89|   508k|			return ((void *)mem_ptr);
   90|   508k|		}
   91|   508k|	}
   92|       |
   93|      0|	return(NULL);
   94|   508k|}
xvid_free:
  108|   539k|{
  109|       |
  110|   539k|	uint8_t *ptr;
  111|       |
  112|   539k|	if (mem_ptr == NULL)
  ------------------
  |  Branch (112:6): [True: 30.8k, False: 508k]
  ------------------
  113|  30.8k|		return;
  114|       |
  115|       |	/* Aligned pointer */
  116|   508k|	ptr = mem_ptr;
  117|       |
  118|       |	/* *(ptr - 1) holds the offset to the real allocated block
  119|       |	 * we sub that offset os we free the real pointer */
  120|   508k|	ptr -= *(ptr - 1);
  121|       |
  122|       |	/* Free the memory */
  123|   508k|	free(ptr);
  124|   508k|}

transfer_16to8copy_c:
   91|   272M|{
   92|   272M|	int i, j;
   93|       |
   94|  2.45G|	for (j = 0; j < 8; j++) {
  ------------------
  |  Branch (94:14): [True: 2.18G, False: 272M]
  ------------------
   95|  19.6G|		for (i = 0; i < 8; i++) {
  ------------------
  |  Branch (95:15): [True: 17.4G, False: 2.18G]
  ------------------
   96|  17.4G|#ifdef USE_REFERENCE_C
   97|  17.4G|			int16_t pixel = src[j * 8 + i];
   98|       |
   99|  17.4G|			if (pixel < 0) {
  ------------------
  |  Branch (99:8): [True: 14.4M, False: 17.4G]
  ------------------
  100|  14.4M|				pixel = 0;
  101|  17.4G|			} else if (pixel > 255) {
  ------------------
  |  Branch (101:15): [True: 20.0M, False: 17.4G]
  ------------------
  102|  20.0M|				pixel = 255;
  103|  20.0M|			}
  104|  17.4G|			dst[j * stride + i] = (uint8_t) pixel;
  105|       |#else
  106|       |			const int16_t pixel = src[j * 8 + i];
  107|       |			const uint8_t value = (uint8_t)( (pixel&~255) ? (-pixel)>>(8*sizeof(pixel)-1) : pixel );
  108|       |			dst[j*stride + i] = value;
  109|       |#endif
  110|  17.4G|    }
  111|  2.18G|	}
  112|   272M|}
transfer_16to8add_c:
  232|   526k|{
  233|   526k|	int i, j;
  234|       |
  235|  4.74M|	for (j = 0; j < 8; j++) {
  ------------------
  |  Branch (235:14): [True: 4.21M, False: 526k]
  ------------------
  236|  37.9M|		for (i = 0; i < 8; i++) {
  ------------------
  |  Branch (236:15): [True: 33.7M, False: 4.21M]
  ------------------
  237|  33.7M|#ifdef USE_REFERENCE_C
  238|  33.7M|			int16_t pixel = (int16_t) dst[j * stride + i] + src[j * 8 + i];
  239|       |
  240|  33.7M|			if (pixel < 0) {
  ------------------
  |  Branch (240:8): [True: 2.56M, False: 31.1M]
  ------------------
  241|  2.56M|				pixel = 0;
  242|  31.1M|			} else if (pixel > 255) {
  ------------------
  |  Branch (242:15): [True: 119k, False: 31.0M]
  ------------------
  243|   119k|				pixel = 255;
  244|   119k|			}
  245|  33.7M|			dst[j * stride + i] = (uint8_t) pixel;
  246|       |#else
  247|       |      const int16_t pixel = (int16_t) dst[j * stride + i] + src[j * 8 + i];
  248|       |			const uint8_t value = (uint8_t)( (pixel&~255) ? (-pixel)>>(8*sizeof(pixel)-1) : pixel );
  249|       |			dst[j*stride + i] = value;
  250|       |#endif
  251|       |
  252|  33.7M|		}
  253|  4.21M|	}
  254|   526k|}
transfer8x8_copy_c:
  269|  5.00M|{
  270|  5.00M|	int j, i;
  271|       |
  272|  45.0M|	for (j = 0; j < 8; ++j) {
  ------------------
  |  Branch (272:14): [True: 40.0M, False: 5.00M]
  ------------------
  273|  40.0M|	    uint8_t *d = dst + j*stride;
  274|  40.0M|		const uint8_t *s = src + j*stride;
  275|       |
  276|   360M|		for (i = 0; i < 8; ++i)
  ------------------
  |  Branch (276:15): [True: 320M, False: 40.0M]
  ------------------
  277|   320M|		{
  278|   320M|			*d++ = *s++;
  279|   320M|		}
  280|  40.0M|	}
  281|  5.00M|}
transfer8x4_copy_c:
  296|  37.4k|{
  297|  37.4k|	uint32_t j;
  298|       |
  299|   187k|	for (j = 0; j < 4; j++) {
  ------------------
  |  Branch (299:14): [True: 149k, False: 37.4k]
  ------------------
  300|   149k|		uint32_t *d= (uint32_t*)(dst + j*stride);
  301|   149k|		const uint32_t *s = (const uint32_t*)(src + j*stride);
  302|   149k|		*(d+0) = *(s+0);
  303|   149k|		*(d+1) = *(s+1);
  304|   149k|	}
  305|  37.4k|}

decoder.c:init_timer:
  106|  10.2k|{
  107|  10.2k|}
decoder.c:write_timer:
  110|  10.2k|{
  111|  10.2k|}
decoder.c:start_global_timer:
   62|  69.9k|{
   63|  69.9k|}
decoder.c:stop_global_timer:
  126|  17.1k|{
  127|  17.1k|}
decoder.c:start_timer:
   58|  1.41G|{
   59|  1.41G|}
decoder.c:stop_prediction_timer:
  122|   545M|{
  123|   545M|}
decoder.c:stop_coding_timer:
  114|   273M|{
  115|   273M|}
decoder.c:stop_iquant_timer:
   94|   272M|{
   95|   272M|}
decoder.c:stop_idct_timer:
   70|   273M|{
   71|   273M|}
decoder.c:stop_transfer_timer:
  102|  46.3M|{
  103|  46.3M|}
decoder.c:stop_edges_timer:
   82|  7.93k|{
   83|  7.93k|}
decoder.c:stop_comp_timer:
   78|  1.08M|{
   79|  1.08M|}

xvid_global:
  812|      2|{
  813|      2|	switch(opt)
  814|      2|	{
  815|      2|		case XVID_GBL_INIT :
  ------------------
  |  |  235|      2|#define XVID_GBL_INIT    0 /* initialize xvidcore; must be called before using xvid_decore, or xvid_encore) */
  ------------------
  |  Branch (815:3): [True: 2, False: 0]
  ------------------
  816|      2|			return xvid_gbl_init((xvid_gbl_init_t*)param1);
  817|       |
  818|      0|        case XVID_GBL_INFO :
  ------------------
  |  |  236|      0|#define XVID_GBL_INFO    1 /* return some info about xvidcore, and the host computer */
  ------------------
  |  Branch (818:9): [True: 0, False: 2]
  ------------------
  819|      0|            return xvid_gbl_info((xvid_gbl_info_t*)param1);
  820|       |
  821|      0|		case XVID_GBL_CONVERT :
  ------------------
  |  |  237|      0|#define XVID_GBL_CONVERT 2 /* colorspace conversion utility */
  ------------------
  |  Branch (821:3): [True: 0, False: 2]
  ------------------
  822|      0|			return xvid_gbl_convert((xvid_gbl_convert_t*)param1);
  823|       |
  824|      0|		default :
  ------------------
  |  Branch (824:3): [True: 0, False: 2]
  ------------------
  825|      0|			return XVID_ERR_FAIL;
  ------------------
  |  |   95|      0|#define XVID_ERR_FAIL		-1		/* general fault */
  ------------------
  826|      2|	}
  827|      2|}
xvid_decore:
  844|  90.5k|{
  845|  90.5k|	switch (opt) {
  846|  10.2k|	case XVID_DEC_CREATE:
  ------------------
  |  |  246|  10.2k|#define XVID_DEC_CREATE  0 /* create decore instance; return 0 on success */
  ------------------
  |  Branch (846:2): [True: 10.2k, False: 80.2k]
  ------------------
  847|  10.2k|		return decoder_create((xvid_dec_create_t *) param1);
  848|       |
  849|  10.2k|	case XVID_DEC_DESTROY:
  ------------------
  |  |  247|  10.2k|#define XVID_DEC_DESTROY 1 /* destroy decore instance: return 0 on success */
  ------------------
  |  Branch (849:2): [True: 10.2k, False: 80.2k]
  ------------------
  850|  10.2k|		return decoder_destroy((DECODER *) handle);
  851|       |
  852|  69.9k|	case XVID_DEC_DECODE:
  ------------------
  |  |  248|  69.9k|#define XVID_DEC_DECODE  2 /* decode a frame: returns number of bytes consumed >= 0 */
  ------------------
  |  Branch (852:2): [True: 69.9k, False: 20.5k]
  ------------------
  853|  69.9k|		return decoder_decode((DECODER *) handle, (xvid_dec_frame_t *) param1, (xvid_dec_stats_t*) param2);
  854|       |
  855|      0|	default:
  ------------------
  |  Branch (855:2): [True: 0, False: 90.5k]
  ------------------
  856|      0|		return XVID_ERR_FAIL;
  ------------------
  |  |   95|      0|#define XVID_ERR_FAIL		-1		/* general fault */
  ------------------
  857|  90.5k|	}
  858|  90.5k|}
xvid.c:xvid_gbl_init:
  200|      2|{
  201|      2|	unsigned int cpu_flags;
  202|       |
  203|      2|	if (XVID_VERSION_MAJOR(init->version) != 1) /* v1.x.x */
  ------------------
  |  |   63|      2|#define XVID_VERSION_MAJOR(a)    ((char)(((a)>>16) & 0xff))
  ------------------
  |  Branch (203:6): [True: 0, False: 2]
  ------------------
  204|      0|		return XVID_ERR_VERSION;
  ------------------
  |  |   98|      0|#define XVID_ERR_VERSION	-4		/* structure version not supported */
  ------------------
  205|       |
  206|      2|	cpu_flags = (init->cpu_flags & XVID_CPU_FORCE) ? init->cpu_flags : detect_cpu_flags();
  ------------------
  |  |  180|      2|#define XVID_CPU_FORCE    (1<<31) /* force passed cpu flags */
  ------------------
  |  Branch (206:14): [True: 0, False: 2]
  ------------------
  207|       |
  208|       |	/* Initialize the function pointers */
  209|      2|	init_vlc_tables();
  210|       |
  211|       |	/* Fixed Point Forward/Inverse DCT transformations */
  212|      2|	fdct = fdct_int32;
  213|      2|	idct = idct_int32;
  214|       |
  215|       |	/* Only needed on PPC Altivec archs */
  216|      2|	sadInit = NULL;
  217|       |
  218|       |	/* Restore FPU context : emms_c is a nop functions */
  219|      2|	emms = emms_c;
  220|       |
  221|       |	/* Qpel stuff */
  222|      2|	xvid_QP_Funcs = &xvid_QP_Funcs_C;
  223|      2|	xvid_QP_Add_Funcs = &xvid_QP_Add_Funcs_C;
  224|      2|	xvid_Init_QP();
  225|       |
  226|       |	/* Quantization functions */
  227|      2|	quant_h263_intra   = quant_h263_intra_c;
  228|      2|	quant_h263_inter   = quant_h263_inter_c;
  229|      2|	dequant_h263_intra = dequant_h263_intra_c;
  230|      2|	dequant_h263_inter = dequant_h263_inter_c;
  231|       |
  232|      2|	quant_mpeg_intra   = quant_mpeg_intra_c;
  233|      2|	quant_mpeg_inter   = quant_mpeg_inter_c;
  234|      2|	dequant_mpeg_intra = dequant_mpeg_intra_c;
  235|      2|	dequant_mpeg_inter = dequant_mpeg_inter_c;
  236|       |
  237|       |	/* Block transfer related functions */
  238|      2|	transfer_8to16copy = transfer_8to16copy_c;
  239|      2|	transfer_16to8copy = transfer_16to8copy_c;
  240|      2|	transfer_8to16sub  = transfer_8to16sub_c;
  241|      2|	transfer_8to16subro  = transfer_8to16subro_c;
  242|      2|	transfer_8to16sub2 = transfer_8to16sub2_c;
  243|      2|	transfer_8to16sub2ro = transfer_8to16sub2ro_c;
  244|      2|	transfer_16to8add  = transfer_16to8add_c;
  245|      2|	transfer8x8_copy   = transfer8x8_copy_c;
  246|      2|	transfer8x4_copy   = transfer8x4_copy_c;
  247|       |
  248|       |	/* Interlacing functions */
  249|      2|	MBFieldTest = MBFieldTest_c;
  250|       |
  251|       |	/* Image interpolation related functions */
  252|      2|	interpolate8x8_halfpel_h  = interpolate8x8_halfpel_h_c;
  253|      2|	interpolate8x8_halfpel_v  = interpolate8x8_halfpel_v_c;
  254|      2|	interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_c;
  255|       |
  256|      2|	interpolate8x4_halfpel_h  = interpolate8x4_halfpel_h_c;
  257|      2|	interpolate8x4_halfpel_v  = interpolate8x4_halfpel_v_c;
  258|      2|	interpolate8x4_halfpel_hv = interpolate8x4_halfpel_hv_c;
  259|       |
  260|      2|	interpolate8x8_halfpel_add = interpolate8x8_halfpel_add_c;
  261|      2|	interpolate8x8_halfpel_h_add = interpolate8x8_halfpel_h_add_c;
  262|      2|	interpolate8x8_halfpel_v_add = interpolate8x8_halfpel_v_add_c;
  263|      2|	interpolate8x8_halfpel_hv_add = interpolate8x8_halfpel_hv_add_c;
  264|       |
  265|      2|	interpolate16x16_lowpass_h = interpolate16x16_lowpass_h_c;
  266|      2|	interpolate16x16_lowpass_v = interpolate16x16_lowpass_v_c;
  267|      2|	interpolate16x16_lowpass_hv = interpolate16x16_lowpass_hv_c;
  268|       |
  269|      2|	interpolate8x8_lowpass_h = interpolate8x8_lowpass_h_c;
  270|      2|	interpolate8x8_lowpass_v = interpolate8x8_lowpass_v_c;
  271|      2|	interpolate8x8_lowpass_hv = interpolate8x8_lowpass_hv_c;
  272|       |
  273|      2|	interpolate8x8_6tap_lowpass_h = interpolate8x8_6tap_lowpass_h_c;
  274|      2|	interpolate8x8_6tap_lowpass_v = interpolate8x8_6tap_lowpass_v_c;
  275|       |
  276|      2|	interpolate8x8_avg2 = interpolate8x8_avg2_c;
  277|      2|	interpolate8x8_avg4 = interpolate8x8_avg4_c;
  278|       |
  279|       |	/* postprocessing */
  280|      2|	image_brightness = image_brightness_c;
  281|       |
  282|       |	/* Initialize internal colorspace transformation tables */
  283|      2|	colorspace_init();
  284|       |
  285|       |	/* All colorspace transformation functions User Format->YV12 */
  286|      2|	yv12_to_yv12    = yv12_to_yv12_c;
  287|      2|	rgb555_to_yv12  = rgb555_to_yv12_c;
  288|      2|	rgb565_to_yv12  = rgb565_to_yv12_c;
  289|      2|	rgb_to_yv12     = rgb_to_yv12_c;
  290|      2|	bgr_to_yv12     = bgr_to_yv12_c;
  291|      2|	bgra_to_yv12    = bgra_to_yv12_c;
  292|      2|	abgr_to_yv12    = abgr_to_yv12_c;
  293|      2|	rgba_to_yv12    = rgba_to_yv12_c;
  294|      2|	argb_to_yv12    = argb_to_yv12_c;
  295|      2|	yuyv_to_yv12    = yuyv_to_yv12_c;
  296|      2|	uyvy_to_yv12    = uyvy_to_yv12_c;
  297|       |
  298|      2|	rgb555i_to_yv12 = rgb555i_to_yv12_c;
  299|      2|	rgb565i_to_yv12 = rgb565i_to_yv12_c;
  300|      2|	bgri_to_yv12    = bgri_to_yv12_c;
  301|      2|	bgrai_to_yv12   = bgrai_to_yv12_c;
  302|      2|	abgri_to_yv12   = abgri_to_yv12_c;
  303|      2|	rgbai_to_yv12   = rgbai_to_yv12_c;
  304|      2|	argbi_to_yv12   = argbi_to_yv12_c;
  305|      2|	yuyvi_to_yv12   = yuyvi_to_yv12_c;
  306|      2|	uyvyi_to_yv12   = uyvyi_to_yv12_c;
  307|       |
  308|       |	/* All colorspace transformation functions YV12->User format */
  309|      2|	yv12_to_rgb555  = yv12_to_rgb555_c;
  310|      2|	yv12_to_rgb565  = yv12_to_rgb565_c;
  311|      2|	yv12_to_rgb     = yv12_to_rgb_c;
  312|      2|	yv12_to_bgr     = yv12_to_bgr_c;
  313|      2|	yv12_to_bgra    = yv12_to_bgra_c;
  314|      2|	yv12_to_abgr    = yv12_to_abgr_c;
  315|      2|	yv12_to_rgba    = yv12_to_rgba_c;
  316|      2|	yv12_to_argb    = yv12_to_argb_c;
  317|      2|	yv12_to_yuyv    = yv12_to_yuyv_c;
  318|      2|	yv12_to_uyvy    = yv12_to_uyvy_c;
  319|       |
  320|      2|	yv12_to_rgb555i = yv12_to_rgb555i_c;
  321|      2|	yv12_to_rgb565i = yv12_to_rgb565i_c;
  322|      2|	yv12_to_bgri    = yv12_to_bgri_c;
  323|      2|	yv12_to_bgrai   = yv12_to_bgrai_c;
  324|      2|	yv12_to_abgri   = yv12_to_abgri_c;
  325|      2|	yv12_to_rgbai   = yv12_to_rgbai_c;
  326|      2|	yv12_to_argbi   = yv12_to_argbi_c;
  327|      2|	yv12_to_yuyvi   = yv12_to_yuyvi_c;
  328|      2|	yv12_to_uyvyi   = yv12_to_uyvyi_c;
  329|       |
  330|       |	/* Functions used in motion estimation algorithms */
  331|      2|	calc_cbp      = calc_cbp_c;
  332|      2|	sad16         = sad16_c;
  333|      2|	sad8          = sad8_c;
  334|      2|	sad16bi       = sad16bi_c;
  335|      2|	sad8bi        = sad8bi_c;
  336|      2|	dev16         = dev16_c;
  337|      2|	sad16v        = sad16v_c;
  338|      2|	sse8_16bit    = sse8_16bit_c;
  339|      2|	sse8_8bit     = sse8_8bit_c;
  340|       |
  341|      2|	sseh8_16bit   = sseh8_16bit_c;
  342|      2|	coeff8_energy = coeff8_energy_c;
  343|      2|	blocksum8     = blocksum8_c;
  344|       |
  345|      2|	init_GMC(cpu_flags);
  346|       |
  347|       |#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
  348|       |
  349|       |	if ((cpu_flags & XVID_CPU_MMX) || (cpu_flags & XVID_CPU_MMXEXT) ||
  350|       |		(cpu_flags & XVID_CPU_3DNOW) || (cpu_flags & XVID_CPU_3DNOWEXT) ||
  351|       |		(cpu_flags & XVID_CPU_SSE) || (cpu_flags & XVID_CPU_SSE2) ||
  352|       |        (cpu_flags & XVID_CPU_SSE3) || (cpu_flags & XVID_CPU_SSE41))
  353|       |	{
  354|       |		/* Restore FPU context : emms_c is a nop functions */
  355|       |		emms = emms_mmx;
  356|       |	}
  357|       |
  358|       |	if ((cpu_flags & XVID_CPU_MMX)) {
  359|       |
  360|       |		/* Forward and Inverse Discrete Cosine Transformation functions */
  361|       |		fdct = fdct_mmx_skal;
  362|       |		idct = idct_mmx;
  363|       |
  364|       |		/* Qpel stuff */
  365|       |		xvid_QP_Funcs = &xvid_QP_Funcs_mmx;
  366|       |		xvid_QP_Add_Funcs = &xvid_QP_Add_Funcs_mmx;
  367|       |
  368|       |		/* Quantization related functions */
  369|       |		quant_h263_intra   = quant_h263_intra_mmx;
  370|       |		quant_h263_inter   = quant_h263_inter_mmx;
  371|       |		dequant_h263_intra = dequant_h263_intra_mmx;
  372|       |		dequant_h263_inter = dequant_h263_inter_mmx;
  373|       |		quant_mpeg_intra   = quant_mpeg_intra_mmx;
  374|       |		quant_mpeg_inter   = quant_mpeg_inter_mmx;
  375|       |		dequant_mpeg_intra = dequant_mpeg_intra_mmx;
  376|       |		dequant_mpeg_inter = dequant_mpeg_inter_mmx;
  377|       |
  378|       |
  379|       |		/* Block related functions */
  380|       |		transfer_8to16copy = transfer_8to16copy_mmx;
  381|       |		transfer_16to8copy = transfer_16to8copy_mmx;
  382|       |		transfer_8to16sub  = transfer_8to16sub_mmx;
  383|       |		transfer_8to16subro  = transfer_8to16subro_mmx;
  384|       |		transfer_8to16sub2 = transfer_8to16sub2_mmx;
  385|       |		transfer_16to8add  = transfer_16to8add_mmx;
  386|       |		transfer8x8_copy   = transfer8x8_copy_mmx;
  387|       |		transfer8x4_copy   = transfer8x4_copy_mmx;
  388|       |
  389|       |		/* Interlacing Functions */
  390|       |		MBFieldTest = MBFieldTest_mmx;
  391|       |
  392|       |		/* Image Interpolation related functions */
  393|       |		interpolate8x8_halfpel_h  = interpolate8x8_halfpel_h_mmx;
  394|       |		interpolate8x8_halfpel_v  = interpolate8x8_halfpel_v_mmx;
  395|       |		interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_mmx;
  396|       |
  397|       |		interpolate8x4_halfpel_h  = interpolate8x4_halfpel_h_mmx;
  398|       |		interpolate8x4_halfpel_v  = interpolate8x4_halfpel_v_mmx;
  399|       |		interpolate8x4_halfpel_hv = interpolate8x4_halfpel_hv_mmx;
  400|       |
  401|       |		interpolate8x8_halfpel_add = interpolate8x8_halfpel_add_mmx;
  402|       |		interpolate8x8_halfpel_h_add = interpolate8x8_halfpel_h_add_mmx;
  403|       |		interpolate8x8_halfpel_v_add = interpolate8x8_halfpel_v_add_mmx;
  404|       |		interpolate8x8_halfpel_hv_add = interpolate8x8_halfpel_hv_add_mmx;
  405|       |
  406|       |		interpolate8x8_6tap_lowpass_h = interpolate8x8_6tap_lowpass_h_mmx;
  407|       |		interpolate8x8_6tap_lowpass_v = interpolate8x8_6tap_lowpass_v_mmx;
  408|       |
  409|       |		interpolate8x8_avg2 = interpolate8x8_avg2_mmx;
  410|       |		interpolate8x8_avg4 = interpolate8x8_avg4_mmx;
  411|       |
  412|       |		/* postprocessing */
  413|       |		image_brightness = image_brightness_mmx;
  414|       |
  415|       |		/* image input xxx_to_yv12 related functions */
  416|       |
  417|       |		yv12_to_yv12  = yv12_to_yv12_mmx;
  418|       |
  419|       |		bgr_to_yv12   = bgr_to_yv12_mmx;
  420|       |		rgb_to_yv12   = rgb_to_yv12_mmx;
  421|       |		bgra_to_yv12  = bgra_to_yv12_mmx;
  422|       |		rgba_to_yv12  = rgba_to_yv12_mmx;
  423|       |		yuyv_to_yv12  = yuyv_to_yv12_mmx;
  424|       |		uyvy_to_yv12  = uyvy_to_yv12_mmx;
  425|       |
  426|       |		/* image output yv12_to_xxx related functions */
  427|       |		yv12_to_bgr   = yv12_to_bgr_mmx;
  428|       |		yv12_to_bgra  = yv12_to_bgra_mmx;
  429|       |		yv12_to_yuyv  = yv12_to_yuyv_mmx;
  430|       |		yv12_to_uyvy  = yv12_to_uyvy_mmx;
  431|       |
  432|       |		yv12_to_yuyvi = yv12_to_yuyvi_mmx;
  433|       |		yv12_to_uyvyi = yv12_to_uyvyi_mmx;
  434|       |
  435|       |		/* Motion estimation related functions */
  436|       |		calc_cbp   = calc_cbp_mmx;
  437|       |		sad16      = sad16_mmx;
  438|       |		sad8       = sad8_mmx;
  439|       |		sad16bi    = sad16bi_mmx;
  440|       |		sad8bi     = sad8bi_mmx;
  441|       |		dev16      = dev16_mmx;
  442|       |		sad16v	   = sad16v_mmx;
  443|       |		sse8_16bit = sse8_16bit_mmx;
  444|       |		sse8_8bit  = sse8_8bit_mmx;
  445|       |	}
  446|       |
  447|       |	/* these 3dnow functions are faster than mmx, but slower than xmm. */
  448|       |	if ((cpu_flags & XVID_CPU_3DNOW)) {
  449|       |
  450|       |		emms = emms_3dn;
  451|       |
  452|       |		/* ME functions */
  453|       |		sad16bi = sad16bi_3dn;
  454|       |		sad8bi  = sad8bi_3dn;
  455|       |
  456|       |		yuyv_to_yv12  = yuyv_to_yv12_3dn;
  457|       |		uyvy_to_yv12  = uyvy_to_yv12_3dn;
  458|       |
  459|       |	}
  460|       |
  461|       |
  462|       |	if ((cpu_flags & XVID_CPU_MMXEXT)) {
  463|       |
  464|       |		/* DCT */
  465|       |		fdct = fdct_xmm_skal;
  466|       |		idct = idct_xmm;
  467|       |
  468|       |		/* Interpolation */
  469|       |		interpolate8x8_halfpel_h  = interpolate8x8_halfpel_h_xmm;
  470|       |		interpolate8x8_halfpel_v  = interpolate8x8_halfpel_v_xmm;
  471|       |		interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_xmm;
  472|       |		
  473|       |		interpolate8x4_halfpel_h  = interpolate8x4_halfpel_h_xmm;
  474|       |		interpolate8x4_halfpel_v  = interpolate8x4_halfpel_v_xmm;
  475|       |		interpolate8x4_halfpel_hv = interpolate8x4_halfpel_hv_xmm;
  476|       |		
  477|       |		interpolate8x8_halfpel_add = interpolate8x8_halfpel_add_xmm;
  478|       |		interpolate8x8_halfpel_h_add = interpolate8x8_halfpel_h_add_xmm;
  479|       |		interpolate8x8_halfpel_v_add = interpolate8x8_halfpel_v_add_xmm;
  480|       |		interpolate8x8_halfpel_hv_add = interpolate8x8_halfpel_hv_add_xmm;
  481|       |
  482|       |        /* Quantization */
  483|       |		quant_mpeg_inter = quant_mpeg_inter_xmm;
  484|       |
  485|       |		dequant_h263_intra = dequant_h263_intra_xmm;
  486|       |		dequant_h263_inter = dequant_h263_inter_xmm;
  487|       |
  488|       |        /* Buffer transfer */
  489|       |		transfer_8to16sub2 = transfer_8to16sub2_xmm;
  490|       |		transfer_8to16sub2ro = transfer_8to16sub2ro_xmm;
  491|       |
  492|       |		/* Colorspace transformation */
  493|       |		/* yv12_to_yv12  = yv12_to_yv12_xmm; */ /* appears to be slow on many machines */
  494|       |		yuyv_to_yv12  = yuyv_to_yv12_xmm;
  495|       |		uyvy_to_yv12  = uyvy_to_yv12_xmm;
  496|       |
  497|       |		/* ME functions */
  498|       |		sad16 = sad16_xmm;
  499|       |		sad8  = sad8_xmm;
  500|       |		sad16bi = sad16bi_xmm;
  501|       |		sad8bi  = sad8bi_xmm;
  502|       |		dev16 = dev16_xmm;
  503|       |		sad16v	 = sad16v_xmm;
  504|       |	}
  505|       |
  506|       |	if ((cpu_flags & XVID_CPU_3DNOW)) {
  507|       |
  508|       |		/* Interpolation */
  509|       |		interpolate8x8_halfpel_h = interpolate8x8_halfpel_h_3dn;
  510|       |		interpolate8x8_halfpel_v = interpolate8x8_halfpel_v_3dn;
  511|       |		interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_3dn;
  512|       |
  513|       |		interpolate8x4_halfpel_h = interpolate8x4_halfpel_h_3dn;
  514|       |		interpolate8x4_halfpel_v = interpolate8x4_halfpel_v_3dn;
  515|       |		interpolate8x4_halfpel_hv = interpolate8x4_halfpel_hv_3dn;
  516|       |	}
  517|       |
  518|       |	if ((cpu_flags & XVID_CPU_3DNOWEXT)) {
  519|       |
  520|       |		/* Buffer transfer */
  521|       |		transfer_8to16copy =  transfer_8to16copy_3dne;
  522|       |		transfer_16to8copy = transfer_16to8copy_3dne;
  523|       |		transfer_8to16sub =  transfer_8to16sub_3dne;
  524|       |		transfer_8to16subro =  transfer_8to16subro_3dne;
  525|       |		transfer_16to8add = transfer_16to8add_3dne;
  526|       |		transfer8x8_copy = transfer8x8_copy_3dne;
  527|       |		transfer8x4_copy = transfer8x4_copy_3dne;
  528|       |
  529|       |		if ((cpu_flags & XVID_CPU_MMXEXT)) {
  530|       |			/* Inverse DCT */
  531|       |			idct =  idct_3dne;
  532|       |
  533|       |			/* Buffer transfer */
  534|       |			transfer_8to16sub2 =  transfer_8to16sub2_3dne;
  535|       |
  536|       |			/* Interpolation */
  537|       |			interpolate8x8_halfpel_h = interpolate8x8_halfpel_h_3dne;
  538|       |			interpolate8x8_halfpel_v = interpolate8x8_halfpel_v_3dne;
  539|       |			interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_3dne;
  540|       |
  541|       |			interpolate8x4_halfpel_h = interpolate8x4_halfpel_h_3dne;
  542|       |			interpolate8x4_halfpel_v = interpolate8x4_halfpel_v_3dne;
  543|       |			interpolate8x4_halfpel_hv = interpolate8x4_halfpel_hv_3dne;
  544|       |
  545|       |            /* Quantization */
  546|       |			quant_h263_intra = quant_h263_intra_3dne;		/* cmov only */
  547|       |			quant_h263_inter = quant_h263_inter_3dne;
  548|       |			dequant_mpeg_intra = dequant_mpeg_intra_3dne;	/* cmov only */
  549|       |			dequant_mpeg_inter = dequant_mpeg_inter_3dne;
  550|       |			dequant_h263_intra = dequant_h263_intra_3dne;
  551|       |			dequant_h263_inter = dequant_h263_inter_3dne;
  552|       |
  553|       |            /* ME functions */
  554|       |			sad16 = sad16_3dne;
  555|       |			sad8 = sad8_3dne;
  556|       |			sad16bi = sad16bi_3dne;
  557|       |			sad8bi = sad8bi_3dne;
  558|       |			dev16 = dev16_3dne;
  559|       |		}
  560|       |	}
  561|       | 
  562|       |	if ((cpu_flags & XVID_CPU_SSE2)) {
  563|       |
  564|       |		calc_cbp = calc_cbp_sse2;
  565|       |
  566|       |		/* Quantization */
  567|       |		quant_h263_intra   = quant_h263_intra_sse2;
  568|       |		quant_h263_inter   = quant_h263_inter_sse2;
  569|       |		dequant_h263_intra = dequant_h263_intra_sse2;
  570|       |		dequant_h263_inter = dequant_h263_inter_sse2;
  571|       |
  572|       |		/* SAD operators */
  573|       |		sad16       = sad16_sse2;
  574|       |		dev16       = dev16_sse2;
  575|       |
  576|       |		/* PSNR-HVS-M distortion metric */
  577|       |		sseh8_16bit   = sseh8_16bit_sse2;
  578|       |		coeff8_energy = coeff8_energy_sse2;
  579|       |		blocksum8     = blocksum8_sse2;
  580|       |
  581|       |		/* DCT operators */
  582|       |		fdct = fdct_sse2_skal;
  583|       |		idct = idct_sse2_skal;   /* Is now IEEE1180 and Walken compliant. */
  584|       |
  585|       |		/* postprocessing */
  586|       |		image_brightness = image_brightness_sse2;
  587|       |
  588|       |	}
  589|       |
  590|       |	if ((cpu_flags & XVID_CPU_SSE3)) {
  591|       |
  592|       |		/* SAD operators */
  593|       |		sad16    = sad16_sse3;
  594|       |		dev16    = dev16_sse3;
  595|       |	}
  596|       |
  597|       |#endif /* ARCH_IS_IA32 */
  598|       |
  599|       |#if defined(ARCH_IS_IA64)
  600|       |	if ((cpu_flags & XVID_CPU_ASM)) { /* use assembler routines? */
  601|       |	  idct_ia64_init();
  602|       |	  fdct = fdct_ia64;
  603|       |	  idct = idct_ia64;   /*not yet working, crashes */
  604|       |	  interpolate8x8_halfpel_h = interpolate8x8_halfpel_h_ia64;
  605|       |	  interpolate8x8_halfpel_v = interpolate8x8_halfpel_v_ia64;
  606|       |	  interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_ia64;
  607|       |	  sad16 = sad16_ia64;
  608|       |	  sad16bi = sad16bi_ia64;
  609|       |	  sad8 = sad8_ia64;
  610|       |	  dev16 = dev16_ia64;
  611|       |/*	  Halfpel8_Refine = Halfpel8_Refine_ia64; */
  612|       |	  quant_h263_intra = quant_h263_intra_ia64;
  613|       |	  quant_h263_inter = quant_h263_inter_ia64;
  614|       |	  dequant_h263_intra = dequant_h263_intra_ia64;
  615|       |	  dequant_h263_inter = dequant_h263_inter_ia64;
  616|       |	  transfer_8to16copy = transfer_8to16copy_ia64;
  617|       |	  transfer_16to8copy = transfer_16to8copy_ia64;
  618|       |	  transfer_8to16sub = transfer_8to16sub_ia64;
  619|       |	  transfer_8to16sub2 = transfer_8to16sub2_ia64;
  620|       |	  transfer_16to8add = transfer_16to8add_ia64;
  621|       |	  transfer8x8_copy = transfer8x8_copy_ia64;
  622|       |	}
  623|       |#endif
  624|       |
  625|       |#if defined(ARCH_IS_PPC)
  626|       |	if ((cpu_flags & XVID_CPU_ALTIVEC)) {
  627|       |          /* sad operators */
  628|       |		  sad16 = sad16_altivec_c;
  629|       |		  sad16bi = sad16bi_altivec_c;
  630|       |		  sad8 = sad8_altivec_c;
  631|       |		  dev16 = dev16_altivec_c;
  632|       |          
  633|       |          sse8_16bit = sse8_16bit_altivec_c;
  634|       |          
  635|       |          /* mem transfer */
  636|       |          transfer_8to16copy = transfer_8to16copy_altivec_c;
  637|       |          transfer_16to8copy = transfer_16to8copy_altivec_c;
  638|       |          transfer_8to16sub = transfer_8to16sub_altivec_c;
  639|       |          transfer_8to16subro = transfer_8to16subro_altivec_c;
  640|       |          transfer_8to16sub2 = transfer_8to16sub2_altivec_c;
  641|       |          transfer_16to8add = transfer_16to8add_altivec_c;
  642|       |          transfer8x8_copy = transfer8x8_copy_altivec_c;
  643|       |           
  644|       |          /* Inverse DCT */
  645|       |          idct = idct_altivec_c;
  646|       |          
  647|       |          /* Interpolation */
  648|       |          interpolate8x8_halfpel_h = interpolate8x8_halfpel_h_altivec_c;
  649|       |          interpolate8x8_halfpel_v = interpolate8x8_halfpel_v_altivec_c;
  650|       |          interpolate8x8_halfpel_hv = interpolate8x8_halfpel_hv_altivec_c;
  651|       |		  
  652|       |          interpolate8x8_avg2 = interpolate8x8_avg2_altivec_c;
  653|       |          interpolate8x8_avg4 = interpolate8x8_avg4_altivec_c;
  654|       |		  
  655|       |		  interpolate8x8_halfpel_add = interpolate8x8_halfpel_add_altivec_c;
  656|       |		  interpolate8x8_halfpel_h_add = interpolate8x8_halfpel_h_add_altivec_c;
  657|       |		  interpolate8x8_halfpel_v_add = interpolate8x8_halfpel_v_add_altivec_c;
  658|       |		  interpolate8x8_halfpel_hv_add = interpolate8x8_halfpel_hv_add_altivec_c;
  659|       |          
  660|       |          /* Colorspace conversion */
  661|       |          bgra_to_yv12 = bgra_to_yv12_altivec_c;
  662|       |          abgr_to_yv12 = abgr_to_yv12_altivec_c;
  663|       |          rgba_to_yv12 = rgba_to_yv12_altivec_c;
  664|       |          argb_to_yv12 = argb_to_yv12_altivec_c;
  665|       |          
  666|       |          yuyv_to_yv12 = yuyv_to_yv12_altivec_c;
  667|       |          uyvy_to_yv12 = uyvy_to_yv12_altivec_c;
  668|       |          
  669|       |          yv12_to_yuyv = yv12_to_yuyv_altivec_c;
  670|       |          yv12_to_uyvy = yv12_to_uyvy_altivec_c;
  671|       |          
  672|       |          /* Quantization */
  673|       |          quant_h263_intra = quant_h263_intra_altivec_c;
  674|       |          quant_h263_inter = quant_h263_inter_altivec_c;
  675|       |          dequant_h263_intra = dequant_h263_intra_altivec_c;
  676|       |          dequant_h263_inter = dequant_h263_inter_altivec_c;
  677|       |
  678|       |		  dequant_mpeg_intra = dequant_mpeg_intra_altivec_c;
  679|       |		  dequant_mpeg_inter = dequant_mpeg_inter_altivec_c;
  680|       |		  
  681|       |		  /* Qpel stuff */
  682|       |		  xvid_QP_Funcs = &xvid_QP_Funcs_Altivec_C;
  683|       |		  xvid_QP_Add_Funcs = &xvid_QP_Add_Funcs_Altivec_C;
  684|       |        }
  685|       |#endif
  686|       |
  687|       |#if defined(_DEBUG)
  688|       |    xvid_debug = init->debug;
  689|       |#endif
  690|       |
  691|      2|    return(0);
  692|      2|}
xvid.c:detect_cpu_flags:
  151|      2|{
  152|       |	/* enable native assembly optimizations by default */
  153|      2|	unsigned int cpu_flags = XVID_CPU_ASM;
  ------------------
  |  |  181|      2|#define XVID_CPU_ASM      (1<< 7) /* native assembly */
  ------------------
  154|       |
  155|       |#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64)
  156|       |	cpu_flags |= check_cpu_features();
  157|       |	if ((cpu_flags & XVID_CPU_SSE) && sigill_check(sse_os_trigger))
  158|       |		cpu_flags &= ~XVID_CPU_SSE;
  159|       |
  160|       |	if ((cpu_flags & (XVID_CPU_SSE2|XVID_CPU_SSE3|XVID_CPU_SSE41)) && sigill_check(sse2_os_trigger))
  161|       |		cpu_flags &= ~(XVID_CPU_SSE2|XVID_CPU_SSE3|XVID_CPU_SSE41);
  162|       |#endif
  163|       |
  164|       |#if defined(ARCH_IS_PPC)
  165|       |#if defined(__amigaos4__)
  166|       |        {
  167|       |                uint32_t vector_unit = VECTORTYPE_NONE;
  168|       |                IExec->GetCPUInfoTags(GCIT_VectorUnit, &vector_unit, TAG_END);
  169|       |                if (vector_unit == VECTORTYPE_ALTIVEC) {
  170|       |                        cpu_flags |= XVID_CPU_ALTIVEC;
  171|       |                }
  172|       |        }
  173|       |#else
  174|       |	if (!sigill_check(altivec_trigger))
  175|       |		cpu_flags |= XVID_CPU_ALTIVEC;
  176|       |#endif
  177|       |#endif
  178|       |
  179|      2|	return cpu_flags;
  180|      2|}

