_Z15bsd_initializerv:
   64|      1|{
   65|      4|	for (int i = 0; i < testSz.size(); i++)
  ------------------
  |  Branch (65:18): [True: 3, False: 1]
  ------------------
   66|      3|	{
   67|      3|		init_block_size_descriptor(
   68|      3|		    testSz[i].x,
   69|      3|		    testSz[i].y,
   70|      3|		    testSz[i].z,
   71|      3|		    false,
   72|      3|		    4,
   73|      3|		    1.0f,
   74|      3|		    testBSD[i]);
   75|      3|	}
   76|       |
   77|      1|	return true;
   78|      1|}
LLVMFuzzerTestOneInput:
   81|    311|{
   82|       |	// Preinitialize the block size descriptors we need
   83|    311|	static bool init = bsd_initializer();
   84|       |
   85|       |	// Must have 4 (select block size) and 16 (payload) bytes
   86|    311|	if (size < 4 + 16)
  ------------------
  |  Branch (86:6): [True: 3, False: 308]
  ------------------
   87|      3|	{
   88|      3|		return 0;
   89|      3|	}
   90|       |
   91|    308|	FuzzedDataProvider stream(data, size);
   92|       |
   93|       |	// Select a block size to test
   94|    308|	int i = stream.ConsumeIntegralInRange<int>(0, testSz.size() - 1);
   95|       |
   96|       |	// Populate the physical block
   97|    308|	uint8_t pcb[16];
   98|    308|	std::vector<uint8_t> buffer = stream.ConsumeBytes<uint8_t>(16);
   99|    308|	std::memcpy(pcb, buffer.data(), 16);
  100|       |
  101|       |	// Call the function under test
  102|    308|	symbolic_compressed_block scb;
  103|    308|	physical_to_symbolic(testBSD[i], pcb, scb);
  104|       |
  105|    308|	return 0;
  106|    311|}

_Z26init_block_size_descriptorjjjbjfR21block_size_descriptor:
 1173|      3|) {
 1174|      3|	if (z_texels > 1)
  ------------------
  |  Branch (1174:6): [True: 1, False: 2]
  ------------------
 1175|      1|	{
 1176|      1|		construct_block_size_descriptor_3d(x_texels, y_texels, z_texels, bsd);
 1177|      1|	}
 1178|      2|	else
 1179|      2|	{
 1180|      2|		construct_block_size_descriptor_2d(x_texels, y_texels, can_omit_modes, mode_cutoff, bsd);
 1181|      2|	}
 1182|       |
 1183|      3|	init_partition_tables(bsd, can_omit_modes, partition_count_cutoff);
 1184|      3|}
astcenc_block_sizes.cpp:_ZL34construct_block_size_descriptor_3djjjR21block_size_descriptor:
  998|      1|) {
  999|       |	// Store a remap table for storing packed decimation modes.
 1000|       |	// Indexing uses [Z * 64 + Y *  8 + X] and max size for each axis is 6.
 1001|      1|	static constexpr unsigned int MAX_DMI = 6 * 64 + 6 * 8 + 6;
 1002|      1|	int decimation_mode_index[MAX_DMI];
 1003|      1|	unsigned int decimation_mode_count = 0;
 1004|       |
 1005|      1|	dt_init_working_buffers* wb = new dt_init_working_buffers;
 1006|       |
 1007|      1|	bsd.xdim = static_cast<uint8_t>(x_texels);
 1008|      1|	bsd.ydim = static_cast<uint8_t>(y_texels);
 1009|      1|	bsd.zdim = static_cast<uint8_t>(z_texels);
 1010|      1|	bsd.texel_count = static_cast<uint8_t>(x_texels * y_texels * z_texels);
 1011|       |
 1012|    439|	for (unsigned int i = 0; i < MAX_DMI; i++)
  ------------------
  |  Branch (1012:27): [True: 438, False: 1]
  ------------------
 1013|    438|	{
 1014|    438|		decimation_mode_index[i] = -1;
 1015|    438|	}
 1016|       |
 1017|       |	// gather all the infill-modes that can be used with the current block size
 1018|      6|	for (unsigned int x_weights = 2; x_weights <= x_texels; x_weights++)
  ------------------
  |  Branch (1018:35): [True: 5, False: 1]
  ------------------
 1019|      5|	{
 1020|     30|		for (unsigned int y_weights = 2; y_weights <= y_texels; y_weights++)
  ------------------
  |  Branch (1020:36): [True: 25, False: 5]
  ------------------
 1021|     25|		{
 1022|    150|			for (unsigned int z_weights = 2; z_weights <= z_texels; z_weights++)
  ------------------
  |  Branch (1022:37): [True: 125, False: 25]
  ------------------
 1023|    125|			{
 1024|    125|				unsigned int weight_count = x_weights * y_weights * z_weights;
 1025|    125|				if (weight_count > BLOCK_MAX_WEIGHTS)
  ------------------
  |  Branch (1025:9): [True: 47, False: 78]
  ------------------
 1026|     47|				{
 1027|     47|					continue;
 1028|     47|				}
 1029|       |
 1030|     78|				decimation_info& di = bsd.decimation_tables[decimation_mode_count];
 1031|     78|				decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights] = decimation_mode_count;
 1032|     78|				init_decimation_info_3d(x_texels, y_texels, z_texels, x_weights, y_weights, z_weights, di, *wb);
 1033|       |
 1034|     78|				int maxprec_1plane = -1;
 1035|     78|				int maxprec_2planes = -1;
 1036|  1.01k|				for (unsigned int i = 0; i < 12; i++)
  ------------------
  |  Branch (1036:30): [True: 936, False: 78]
  ------------------
 1037|    936|				{
 1038|    936|					unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(i));
 1039|    936|					if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS)
  ------------------
  |  Branch (1039:10): [True: 916, False: 20]
  |  Branch (1039:50): [True: 417, False: 499]
  ------------------
 1040|    417|					{
 1041|    417|						maxprec_1plane = i;
 1042|    417|					}
 1043|       |
 1044|    936|					unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast<quant_method>(i));
 1045|    936|					if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS)
  ------------------
  |  Branch (1045:10): [True: 935, False: 1]
  |  Branch (1045:51): [True: 154, False: 781]
  ------------------
 1046|    154|					{
 1047|    154|						maxprec_2planes = i;
 1048|    154|					}
 1049|    936|				}
 1050|       |
 1051|     78|				if ((2 * weight_count) > BLOCK_MAX_WEIGHTS)
  ------------------
  |  Branch (1051:9): [True: 46, False: 32]
  ------------------
 1052|     46|				{
 1053|     46|					maxprec_2planes = -1;
 1054|     46|				}
 1055|       |
 1056|     78|				bsd.decimation_modes[decimation_mode_count].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
 1057|     78|				bsd.decimation_modes[decimation_mode_count].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
 1058|     78|				bsd.decimation_modes[decimation_mode_count].refprec_1plane = maxprec_1plane == -1 ? 0 : 0xFFFF;
  ------------------
  |  Branch (1058:66): [True: 0, False: 78]
  ------------------
 1059|     78|				bsd.decimation_modes[decimation_mode_count].refprec_2planes = maxprec_2planes == -1 ? 0 : 0xFFFF;
  ------------------
  |  Branch (1059:67): [True: 46, False: 32]
  ------------------
 1060|     78|				decimation_mode_count++;
 1061|     78|			}
 1062|     25|		}
 1063|      5|	}
 1064|       |
 1065|       |	// Ensure the end of the array contains valid data (should never get read)
 1066|     10|	for (unsigned int i = decimation_mode_count; i < WEIGHTS_MAX_DECIMATION_MODES; i++)
  ------------------
  |  Branch (1066:47): [True: 9, False: 1]
  ------------------
 1067|      9|	{
 1068|      9|		bsd.decimation_modes[i].maxprec_1plane = -1;
 1069|      9|		bsd.decimation_modes[i].maxprec_2planes = -1;
 1070|      9|		bsd.decimation_modes[i].refprec_1plane = 0;
 1071|      9|		bsd.decimation_modes[i].refprec_2planes = 0;
 1072|      9|	}
 1073|       |
 1074|      1|	bsd.decimation_mode_count_always = 0; // Skipped for 3D modes
 1075|      1|	bsd.decimation_mode_count_selected = decimation_mode_count;
 1076|      1|	bsd.decimation_mode_count_all = decimation_mode_count;
 1077|       |
 1078|       |	// Construct the list of block formats referencing the decimation tables
 1079|       |
 1080|       |	// Clear the list to a known-bad value
 1081|  2.04k|	for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
  ------------------
  |  Branch (1081:27): [True: 2.04k, False: 1]
  ------------------
 1082|  2.04k|	{
 1083|  2.04k|		bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE;
 1084|  2.04k|	}
 1085|       |
 1086|      1|	unsigned int packed_idx = 0;
 1087|      1|	unsigned int bm_counts[2] { 0 };
 1088|       |
 1089|       |	// Iterate two times to build a usefully ordered list:
 1090|       |	//   - Pass 0 - keep valid single plane block modes
 1091|       |	//   - Pass 1 - keep valid dual plane block modes
 1092|      3|	for (unsigned int j = 0; j < 2; j++)
  ------------------
  |  Branch (1092:27): [True: 2, False: 1]
  ------------------
 1093|      2|	{
 1094|  4.09k|		for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
  ------------------
  |  Branch (1094:28): [True: 4.09k, False: 2]
  ------------------
 1095|  4.09k|		{
 1096|       |			// Skip modes we've already included in a previous pass
 1097|  4.09k|			if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE)
  ------------------
  |  Branch (1097:8): [True: 435, False: 3.66k]
  ------------------
 1098|    435|			{
 1099|    435|				continue;
 1100|    435|			}
 1101|       |
 1102|  3.66k|			unsigned int x_weights;
 1103|  3.66k|			unsigned int y_weights;
 1104|  3.66k|			unsigned int z_weights;
 1105|  3.66k|			bool is_dual_plane;
 1106|  3.66k|			unsigned int quant_mode;
 1107|  3.66k|			unsigned int weight_bits;
 1108|       |
 1109|  3.66k|			bool valid = decode_block_mode_3d(i, x_weights, y_weights, z_weights, is_dual_plane, quant_mode, weight_bits);
 1110|       |			// Skip invalid encodings
 1111|  3.66k|			if (!valid || x_weights > x_texels || y_weights > y_texels || z_weights > z_texels)
  ------------------
  |  Branch (1111:8): [True: 2.97k, False: 689]
  |  Branch (1111:18): [True: 0, False: 689]
  |  Branch (1111:42): [True: 0, False: 689]
  |  Branch (1111:66): [True: 0, False: 689]
  ------------------
 1112|  2.97k|			{
 1113|  2.97k|				continue;
 1114|  2.97k|			}
 1115|       |
 1116|       |			// Skip encodings in the wrong iteration
 1117|    689|			if ((j == 0 && is_dual_plane) || (j == 1 && !is_dual_plane))
  ------------------
  |  Branch (1117:9): [True: 562, False: 127]
  |  Branch (1117:19): [True: 127, False: 435]
  |  Branch (1117:38): [True: 127, False: 435]
  |  Branch (1117:48): [True: 0, False: 127]
  ------------------
 1118|    127|			{
 1119|    127|				continue;
 1120|    127|			}
 1121|       |
 1122|       |			// Always skip encodings we can't physically encode based on bit availability
 1123|    562|			if (is_dual_plane)
  ------------------
  |  Branch (1123:8): [True: 127, False: 435]
  ------------------
 1124|    127|			{
 1125|       |				 // This is the only check we need as only support 1 partition
 1126|    127|				 if ((109 - weight_bits) <= 0)
  ------------------
  |  Branch (1126:10): [True: 0, False: 127]
  ------------------
 1127|      0|				 {
 1128|      0|					continue;
 1129|      0|				 }
 1130|    127|			}
 1131|    435|			else
 1132|    435|			{
 1133|       |				// This is conservative - fewer bits may be available for > 1 partition
 1134|    435|				 if ((111 - weight_bits) <= 0)
  ------------------
  |  Branch (1134:10): [True: 0, False: 435]
  ------------------
 1135|      0|				 {
 1136|      0|					continue;
 1137|      0|				 }
 1138|    435|			}
 1139|       |
 1140|    562|			int decimation_mode = decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights];
 1141|    562|			bsd.block_modes[packed_idx].decimation_mode = static_cast<uint8_t>(decimation_mode);
 1142|    562|			bsd.block_modes[packed_idx].quant_mode = static_cast<uint8_t>(quant_mode);
 1143|    562|			bsd.block_modes[packed_idx].weight_bits = static_cast<uint8_t>(weight_bits);
 1144|    562|			bsd.block_modes[packed_idx].is_dual_plane = static_cast<uint8_t>(is_dual_plane);
 1145|    562|			bsd.block_modes[packed_idx].mode_index = static_cast<uint16_t>(i);
 1146|       |
 1147|    562|			bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_idx);
 1148|    562|			bm_counts[j]++;
 1149|    562|			packed_idx++;
 1150|    562|		}
 1151|      2|	}
 1152|       |
 1153|      1|	bsd.block_mode_count_1plane_always = 0;  // Skipped for 3D modes
 1154|      1|	bsd.block_mode_count_1plane_selected = bm_counts[0];
 1155|      1|	bsd.block_mode_count_1plane_2plane_selected = bm_counts[0] + bm_counts[1];
 1156|      1|	bsd.block_mode_count_all = bm_counts[0] + bm_counts[1];
 1157|       |
 1158|       |	// Determine the texels to use for kmeans clustering.
 1159|      1|	assign_kmeans_texels(bsd);
 1160|       |
 1161|      1|	delete wb;
 1162|      1|}
astcenc_block_sizes.cpp:_ZL23init_decimation_info_3djjjjjjR15decimation_infoR23dt_init_working_buffers:
  445|     78|) {
  446|     78|	unsigned int texels_per_block = x_texels * y_texels * z_texels;
  447|     78|	unsigned int weights_per_block = x_weights * y_weights * z_weights;
  448|       |
  449|     78|	uint8_t max_texel_count_of_weight = 0;
  450|       |
  451|     78|	promise(weights_per_block > 0);
  ------------------
  |  |   61|     78|	#define promise(cond) assert(cond)
  ------------------
  452|     78|	promise(texels_per_block > 0);
  ------------------
  |  |   61|     78|	#define promise(cond) assert(cond)
  ------------------
  453|       |
  454|  3.03k|	for (unsigned int i = 0; i < weights_per_block; i++)
  ------------------
  |  Branch (454:27): [True: 2.95k, False: 78]
  ------------------
  455|  2.95k|	{
  456|  2.95k|		wb.texel_count_of_weight[i] = 0;
  457|  2.95k|	}
  458|       |
  459|  16.9k|	for (unsigned int i = 0; i < texels_per_block; i++)
  ------------------
  |  Branch (459:27): [True: 16.8k, False: 78]
  ------------------
  460|  16.8k|	{
  461|  16.8k|		wb.weight_count_of_texel[i] = 0;
  462|  16.8k|	}
  463|       |
  464|    546|	for (unsigned int z = 0; z < z_texels; z++)
  ------------------
  |  Branch (464:27): [True: 468, False: 78]
  ------------------
  465|    468|	{
  466|  3.27k|		for (unsigned int y = 0; y < y_texels; y++)
  ------------------
  |  Branch (466:28): [True: 2.80k, False: 468]
  ------------------
  467|  2.80k|		{
  468|  19.6k|			for (unsigned int x = 0; x < x_texels; x++)
  ------------------
  |  Branch (468:29): [True: 16.8k, False: 2.80k]
  ------------------
  469|  16.8k|			{
  470|  16.8k|				int texel = (z * y_texels + y) * x_texels + x;
  471|       |
  472|  16.8k|				int x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6;
  473|  16.8k|				int y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6;
  474|  16.8k|				int z_weight = (((1024 + z_texels / 2) / (z_texels - 1)) * z * (z_weights - 1) + 32) >> 6;
  475|       |
  476|  16.8k|				int x_weight_frac = x_weight & 0xF;
  477|  16.8k|				int y_weight_frac = y_weight & 0xF;
  478|  16.8k|				int z_weight_frac = z_weight & 0xF;
  479|  16.8k|				int x_weight_int = x_weight >> 4;
  480|  16.8k|				int y_weight_int = y_weight >> 4;
  481|  16.8k|				int z_weight_int = z_weight >> 4;
  482|  16.8k|				int qweight[4];
  483|  16.8k|				int weight[4];
  484|  16.8k|				qweight[0] = (z_weight_int * y_weights + y_weight_int) * x_weights + x_weight_int;
  485|  16.8k|				qweight[3] = ((z_weight_int + 1) * y_weights + (y_weight_int + 1)) * x_weights + (x_weight_int + 1);
  486|       |
  487|       |				// simplex interpolation
  488|  16.8k|				int fs = x_weight_frac;
  489|  16.8k|				int ft = y_weight_frac;
  490|  16.8k|				int fp = z_weight_frac;
  491|       |
  492|  16.8k|				int cas = ((fs > ft) << 2) + ((ft > fp) << 1) + ((fs > fp));
  493|  16.8k|				int N = x_weights;
  494|  16.8k|				int NM = x_weights * y_weights;
  495|       |
  496|  16.8k|				int s1, s2, w0, w1, w2, w3;
  497|  16.8k|				switch (cas)
  498|  16.8k|				{
  499|  1.15k|				case 7:
  ------------------
  |  Branch (499:5): [True: 1.15k, False: 15.6k]
  ------------------
  500|  1.15k|					s1 = 1;
  501|  1.15k|					s2 = N;
  502|  1.15k|					w0 = 16 - fs;
  503|  1.15k|					w1 = fs - ft;
  504|  1.15k|					w2 = ft - fp;
  505|  1.15k|					w3 = fp;
  506|  1.15k|					break;
  507|  2.10k|				case 3:
  ------------------
  |  Branch (507:5): [True: 2.10k, False: 14.7k]
  ------------------
  508|  2.10k|					s1 = N;
  509|  2.10k|					s2 = 1;
  510|  2.10k|					w0 = 16 - ft;
  511|  2.10k|					w1 = ft - fs;
  512|  2.10k|					w2 = fs - fp;
  513|  2.10k|					w3 = fp;
  514|  2.10k|					break;
  515|  3.10k|				case 5:
  ------------------
  |  Branch (515:5): [True: 3.10k, False: 13.7k]
  ------------------
  516|  3.10k|					s1 = 1;
  517|  3.10k|					s2 = NM;
  518|  3.10k|					w0 = 16 - fs;
  519|  3.10k|					w1 = fs - fp;
  520|  3.10k|					w2 = fp - ft;
  521|  3.10k|					w3 = ft;
  522|  3.10k|					break;
  523|  2.10k|				case 4:
  ------------------
  |  Branch (523:5): [True: 2.10k, False: 14.7k]
  ------------------
  524|  2.10k|					s1 = NM;
  525|  2.10k|					s2 = 1;
  526|  2.10k|					w0 = 16 - fp;
  527|  2.10k|					w1 = fp - fs;
  528|  2.10k|					w2 = fs - ft;
  529|  2.10k|					w3 = ft;
  530|  2.10k|					break;
  531|  3.10k|				case 2:
  ------------------
  |  Branch (531:5): [True: 3.10k, False: 13.7k]
  ------------------
  532|  3.10k|					s1 = N;
  533|  3.10k|					s2 = NM;
  534|  3.10k|					w0 = 16 - ft;
  535|  3.10k|					w1 = ft - fp;
  536|  3.10k|					w2 = fp - fs;
  537|  3.10k|					w3 = fs;
  538|  3.10k|					break;
  539|  5.28k|				case 0:
  ------------------
  |  Branch (539:5): [True: 5.28k, False: 11.5k]
  ------------------
  540|  5.28k|					s1 = NM;
  541|  5.28k|					s2 = N;
  542|  5.28k|					w0 = 16 - fp;
  543|  5.28k|					w1 = fp - ft;
  544|  5.28k|					w2 = ft - fs;
  545|  5.28k|					w3 = fs;
  546|  5.28k|					break;
  547|      0|				default:
  ------------------
  |  Branch (547:5): [True: 0, False: 16.8k]
  ------------------
  548|      0|					s1 = NM;
  549|      0|					s2 = N;
  550|      0|					w0 = 16 - fp;
  551|      0|					w1 = fp - ft;
  552|      0|					w2 = ft - fs;
  553|      0|					w3 = fs;
  554|      0|					break;
  555|  16.8k|				}
  556|       |
  557|  16.8k|				qweight[1] = qweight[0] + s1;
  558|  16.8k|				qweight[2] = qweight[1] + s2;
  559|  16.8k|				weight[0] = w0;
  560|  16.8k|				weight[1] = w1;
  561|  16.8k|				weight[2] = w2;
  562|  16.8k|				weight[3] = w3;
  563|       |
  564|  84.2k|				for (unsigned int i = 0; i < 4; i++)
  ------------------
  |  Branch (564:30): [True: 67.3k, False: 16.8k]
  ------------------
  565|  67.3k|				{
  566|  67.3k|					if (weight[i] != 0)
  ------------------
  |  Branch (566:10): [True: 42.8k, False: 24.5k]
  ------------------
  567|  42.8k|					{
  568|  42.8k|						wb.grid_weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(qweight[i]);
  569|  42.8k|						wb.weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(weight[i]);
  570|  42.8k|						wb.weight_count_of_texel[texel]++;
  571|  42.8k|						wb.texels_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(texel);
  572|  42.8k|						wb.texel_weights_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(weight[i]);
  573|  42.8k|						wb.texel_count_of_weight[qweight[i]]++;
  574|  42.8k|						max_texel_count_of_weight = astc::max(max_texel_count_of_weight, wb.texel_count_of_weight[qweight[i]]);
  575|  42.8k|					}
  576|  67.3k|				}
  577|  16.8k|			}
  578|  2.80k|		}
  579|    468|	}
  580|       |
  581|     78|	uint8_t max_texel_weight_count = 0;
  582|  16.9k|	for (unsigned int i = 0; i < texels_per_block; i++)
  ------------------
  |  Branch (582:27): [True: 16.8k, False: 78]
  ------------------
  583|  16.8k|	{
  584|  16.8k|		di.texel_weight_count[i] = wb.weight_count_of_texel[i];
  585|  16.8k|		max_texel_weight_count = astc::max(max_texel_weight_count, di.texel_weight_count[i]);
  586|       |
  587|       |		// Init all 4 entries so we can rely on zeros for vectorization
  588|  84.2k|		for (unsigned int j = 0; j < 4; j++)
  ------------------
  |  Branch (588:28): [True: 67.3k, False: 16.8k]
  ------------------
  589|  67.3k|		{
  590|  67.3k|			di.texel_weight_contribs_int_tr[j][i] = 0;
  591|  67.3k|			di.texel_weight_contribs_float_tr[j][i] = 0.0f;
  592|  67.3k|			di.texel_weights_tr[j][i] = 0;
  593|  67.3k|		}
  594|       |
  595|  59.6k|		for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++)
  ------------------
  |  Branch (595:28): [True: 42.8k, False: 16.8k]
  ------------------
  596|  42.8k|		{
  597|  42.8k|			di.texel_weight_contribs_int_tr[j][i] = wb.weights_of_texel[i][j];
  598|  42.8k|			di.texel_weight_contribs_float_tr[j][i] = static_cast<float>(wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM);
  599|  42.8k|			di.texel_weights_tr[j][i] = wb.grid_weights_of_texel[i][j];
  600|  42.8k|		}
  601|  16.8k|	}
  602|       |
  603|     78|	di.max_texel_weight_count = max_texel_weight_count;
  604|       |
  605|  3.03k|	for (unsigned int i = 0; i < weights_per_block; i++)
  ------------------
  |  Branch (605:27): [True: 2.95k, False: 78]
  ------------------
  606|  2.95k|	{
  607|  2.95k|		unsigned int texel_count_wt = wb.texel_count_of_weight[i];
  608|  2.95k|		di.weight_texel_count[i] = static_cast<uint8_t>(texel_count_wt);
  609|       |
  610|  45.7k|		for (unsigned int j = 0; j < texel_count_wt; j++)
  ------------------
  |  Branch (610:28): [True: 42.8k, False: 2.95k]
  ------------------
  611|  42.8k|		{
  612|  42.8k|			unsigned int texel = wb.texels_of_weight[i][j];
  613|       |
  614|       |			// Create transposed versions of these for better vectorization
  615|  42.8k|			di.weight_texels_tr[j][i] = static_cast<uint8_t>(texel);
  616|  42.8k|			di.weights_texel_contribs_tr[j][i] = static_cast<float>(wb.texel_weights_of_weight[i][j]);
  617|       |
  618|       |			// Store the per-texel contribution of this weight for each texel it contributes to
  619|  42.8k|			di.texel_contrib_for_weight[j][i] = 0.0f;
  620|  80.2k|			for (unsigned int k = 0; k < 4; k++)
  ------------------
  |  Branch (620:29): [True: 80.2k, False: 0]
  ------------------
  621|  80.2k|			{
  622|  80.2k|				uint8_t dttw = di.texel_weights_tr[k][texel];
  623|  80.2k|				float dttwf = di.texel_weight_contribs_float_tr[k][texel];
  624|  80.2k|				if (dttw == i && dttwf != 0.0f)
  ------------------
  |  Branch (624:9): [True: 42.8k, False: 37.4k]
  |  Branch (624:22): [True: 42.8k, False: 0]
  ------------------
  625|  42.8k|				{
  626|  42.8k|					di.texel_contrib_for_weight[j][i] = di.texel_weight_contribs_float_tr[k][texel];
  627|  42.8k|					break;
  628|  42.8k|				}
  629|  80.2k|			}
  630|  42.8k|		}
  631|       |
  632|       |		// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
  633|       |		// Match last texel in active lane in SIMD group, for better gathers
  634|  2.95k|		uint8_t last_texel = di.weight_texels_tr[texel_count_wt - 1][i];
  635|  25.4k|		for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++)
  ------------------
  |  Branch (635:41): [True: 22.5k, False: 2.95k]
  ------------------
  636|  22.5k|		{
  637|  22.5k|			di.weight_texels_tr[j][i] = last_texel;
  638|  22.5k|			di.weights_texel_contribs_tr[j][i] = 0.0f;
  639|  22.5k|		}
  640|  2.95k|	}
  641|       |
  642|       |	// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
  643|     78|	size_t texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block);
  644|     78|	for (size_t i = texels_per_block; i < texels_per_block_simd; i++)
  ------------------
  |  Branch (644:36): [True: 0, False: 78]
  ------------------
  645|      0|	{
  646|      0|		di.texel_weight_count[i] = 0;
  647|       |
  648|      0|		for (size_t j = 0; j < 4; j++)
  ------------------
  |  Branch (648:22): [True: 0, False: 0]
  ------------------
  649|      0|		{
  650|      0|			di.texel_weight_contribs_float_tr[j][i] = 0;
  651|      0|			di.texel_weights_tr[j][i] = 0;
  652|      0|			di.texel_weight_contribs_int_tr[j][i] = 0;
  653|      0|		}
  654|      0|	}
  655|       |
  656|       |	// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
  657|       |	// Match last texel in active lane in SIMD group, for better gathers
  658|     78|	int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1];
  659|     78|	uint8_t last_texel = di.weight_texels_tr[last_texel_count_wt - 1][weights_per_block - 1];
  660|       |
  661|     78|	size_t weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block);
  662|    118|	for (size_t i = weights_per_block; i < weights_per_block_simd; i++)
  ------------------
  |  Branch (662:37): [True: 40, False: 78]
  ------------------
  663|     40|	{
  664|     40|		di.weight_texel_count[i] = 0;
  665|       |
  666|  1.15k|		for (size_t j = 0; j < max_texel_count_of_weight; j++)
  ------------------
  |  Branch (666:22): [True: 1.11k, False: 40]
  ------------------
  667|  1.11k|		{
  668|  1.11k|			di.weight_texels_tr[j][i] = last_texel;
  669|  1.11k|			di.weights_texel_contribs_tr[j][i] = 0.0f;
  670|  1.11k|		}
  671|     40|	}
  672|       |
  673|     78|	di.texel_count = static_cast<uint8_t>(texels_per_block);
  674|     78|	di.weight_count = static_cast<uint8_t>(weights_per_block);
  675|     78|	di.weight_x = static_cast<uint8_t>(x_weights);
  676|     78|	di.weight_y = static_cast<uint8_t>(y_weights);
  677|     78|	di.weight_z = static_cast<uint8_t>(z_weights);
  678|     78|}
astcenc_block_sizes.cpp:_ZL20decode_block_mode_3djRjS_S_RbS_S_:
  160|  3.66k|) {
  161|  3.66k|	unsigned int base_quant_mode = (block_mode >> 4) & 1;
  162|  3.66k|	unsigned int H = (block_mode >> 9) & 1;
  163|  3.66k|	unsigned int D = (block_mode >> 10) & 1;
  164|  3.66k|	unsigned int A = (block_mode >> 5) & 0x3;
  165|       |
  166|  3.66k|	x_weights = 0;
  167|  3.66k|	y_weights = 0;
  168|  3.66k|	z_weights = 0;
  169|       |
  170|  3.66k|	if ((block_mode & 3) != 0)
  ------------------
  |  Branch (170:6): [True: 2.74k, False: 913]
  ------------------
  171|  2.74k|	{
  172|  2.74k|		base_quant_mode |= (block_mode & 3) << 1;
  173|  2.74k|		unsigned int B = (block_mode >> 7) & 3;
  174|  2.74k|		unsigned int C = (block_mode >> 2) & 0x3;
  175|  2.74k|		x_weights = A + 2;
  176|  2.74k|		y_weights = B + 2;
  177|  2.74k|		z_weights = C + 2;
  178|  2.74k|	}
  179|    913|	else
  180|    913|	{
  181|    913|		base_quant_mode |= ((block_mode >> 2) & 3) << 1;
  182|    913|		if (((block_mode >> 2) & 3) == 0)
  ------------------
  |  Branch (182:7): [True: 256, False: 657]
  ------------------
  183|    256|		{
  184|    256|			return false;
  185|    256|		}
  186|       |
  187|    657|		int B = (block_mode >> 9) & 3;
  188|    657|		if (((block_mode >> 7) & 3) != 3)
  ------------------
  |  Branch (188:7): [True: 492, False: 165]
  ------------------
  189|    492|		{
  190|    492|			D = 0;
  191|    492|			H = 0;
  192|    492|		}
  193|    657|		switch ((block_mode >> 7) & 3)
  ------------------
  |  Branch (193:11): [True: 0, False: 657]
  ------------------
  194|    657|		{
  195|    164|		case 0:
  ------------------
  |  Branch (195:3): [True: 164, False: 493]
  ------------------
  196|    164|			x_weights = 6;
  197|    164|			y_weights = B + 2;
  198|    164|			z_weights = A + 2;
  199|    164|			break;
  200|    164|		case 1:
  ------------------
  |  Branch (200:3): [True: 164, False: 493]
  ------------------
  201|    164|			x_weights = A + 2;
  202|    164|			y_weights = 6;
  203|    164|			z_weights = B + 2;
  204|    164|			break;
  205|    164|		case 2:
  ------------------
  |  Branch (205:3): [True: 164, False: 493]
  ------------------
  206|    164|			x_weights = A + 2;
  207|    164|			y_weights = B + 2;
  208|    164|			z_weights = 6;
  209|    164|			break;
  210|    165|		case 3:
  ------------------
  |  Branch (210:3): [True: 165, False: 492]
  ------------------
  211|    165|			x_weights = 2;
  212|    165|			y_weights = 2;
  213|    165|			z_weights = 2;
  214|    165|			switch ((block_mode >> 5) & 3)
  ------------------
  |  Branch (214:12): [True: 0, False: 165]
  ------------------
  215|    165|			{
  216|     39|			case 0:
  ------------------
  |  Branch (216:4): [True: 39, False: 126]
  ------------------
  217|     39|				x_weights = 6;
  218|     39|				break;
  219|     39|			case 1:
  ------------------
  |  Branch (219:4): [True: 39, False: 126]
  ------------------
  220|     39|				y_weights = 6;
  221|     39|				break;
  222|     39|			case 2:
  ------------------
  |  Branch (222:4): [True: 39, False: 126]
  ------------------
  223|     39|				z_weights = 6;
  224|     39|				break;
  225|     48|			case 3:
  ------------------
  |  Branch (225:4): [True: 48, False: 117]
  ------------------
  226|     48|				return false;
  227|    165|			}
  228|    117|			break;
  229|    657|		}
  230|    657|	}
  231|       |
  232|  3.35k|	unsigned int weight_count = x_weights * y_weights * z_weights * (D + 1);
  233|  3.35k|	quant_mode = (base_quant_mode - 2) + 6 * H;
  234|  3.35k|	is_dual_plane = D != 0;
  235|       |
  236|  3.35k|	weight_bits = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(quant_mode));
  237|  3.35k|	return (weight_count <= BLOCK_MAX_WEIGHTS &&
  ------------------
  |  Branch (237:10): [True: 1.98k, False: 1.36k]
  ------------------
  238|  3.35k|	        weight_bits >= BLOCK_MIN_WEIGHT_BITS &&
  ------------------
  |  Branch (238:10): [True: 1.94k, False: 42]
  ------------------
  239|  3.35k|	        weight_bits <= BLOCK_MAX_WEIGHT_BITS);
  ------------------
  |  Branch (239:10): [True: 689, False: 1.25k]
  ------------------
  240|  3.66k|}
astcenc_block_sizes.cpp:_ZL20assign_kmeans_texelsR21block_size_descriptor:
  690|      3|) {
  691|       |	// Use all texels for kmeans on a small block
  692|      3|	if (bsd.texel_count <= BLOCK_MAX_KMEANS_TEXELS)
  ------------------
  |  Branch (692:6): [True: 1, False: 2]
  ------------------
  693|      1|	{
  694|     17|		for (uint8_t i = 0; i < bsd.texel_count; i++)
  ------------------
  |  Branch (694:23): [True: 16, False: 1]
  ------------------
  695|     16|		{
  696|     16|			bsd.kmeans_texels[i] = i;
  697|     16|		}
  698|       |
  699|      1|		return;
  700|      1|	}
  701|       |
  702|       |	// Select a random subset of BLOCK_MAX_KMEANS_TEXELS for kmeans on a large block
  703|      2|	uint64_t rng_state[2];
  704|      2|	astc::rand_init(rng_state);
  705|       |
  706|       |	// Initialize array used for tracking used indices
  707|      2|	bool seen[BLOCK_MAX_TEXELS];
  708|    362|	for (uint8_t i = 0; i < bsd.texel_count; i++)
  ------------------
  |  Branch (708:22): [True: 360, False: 2]
  ------------------
  709|    360|	{
  710|    360|		seen[i] = false;
  711|    360|	}
  712|       |
  713|       |	// Assign 64 random indices, retrying if we see repeats
  714|      2|	unsigned int arr_elements_set = 0;
  715|    155|	while (arr_elements_set < BLOCK_MAX_KMEANS_TEXELS)
  ------------------
  |  Branch (715:9): [True: 153, False: 2]
  ------------------
  716|    153|	{
  717|    153|		uint8_t texel = static_cast<uint8_t>(astc::rand(rng_state));
  718|    153|		texel = texel % bsd.texel_count;
  719|    153|		if (!seen[texel])
  ------------------
  |  Branch (719:7): [True: 128, False: 25]
  ------------------
  720|    128|		{
  721|    128|			bsd.kmeans_texels[arr_elements_set++] = texel;
  722|    128|			seen[texel] = true;
  723|    128|		}
  724|    153|	}
  725|      2|}
astcenc_block_sizes.cpp:_ZL34construct_block_size_descriptor_2djjbfR21block_size_descriptor:
  798|      2|) {
  799|       |	// Store a remap table for storing packed decimation modes.
  800|       |	// Indexing uses [Y * 16 + X] and max size for each axis is 12.
  801|      2|	static const unsigned int MAX_DMI = 12 * 16 + 12;
  802|      2|	int decimation_mode_index[MAX_DMI];
  803|       |
  804|      2|	dt_init_working_buffers* wb = new dt_init_working_buffers;
  805|       |
  806|      2|	bsd.xdim = static_cast<uint8_t>(x_texels);
  807|      2|	bsd.ydim = static_cast<uint8_t>(y_texels);
  808|      2|	bsd.zdim = 1;
  809|      2|	bsd.texel_count = static_cast<uint8_t>(x_texels * y_texels);
  810|       |
  811|    410|	for (unsigned int i = 0; i < MAX_DMI; i++)
  ------------------
  |  Branch (811:27): [True: 408, False: 2]
  ------------------
  812|    408|	{
  813|    408|		decimation_mode_index[i] = -1;
  814|    408|	}
  815|       |
  816|       |	// Gather all the decimation grids that can be used with the current block
  817|      2|#if !defined(ASTCENC_DECOMPRESS_ONLY)
  818|      2|	const float *percentiles = get_2d_percentile_table(x_texels, y_texels);
  819|      2|	float always_cutoff = 0.0f;
  820|       |#else
  821|       |	// Unused in decompress-only builds
  822|       |	(void)can_omit_modes;
  823|       |	(void)mode_cutoff;
  824|       |#endif
  825|       |
  826|       |	// Construct the list of block formats referencing the decimation tables
  827|      2|	unsigned int packed_bm_idx = 0;
  828|      2|	unsigned int packed_dm_idx = 0;
  829|       |
  830|       |	// Trackers
  831|      2|	unsigned int bm_counts[4] { 0 };
  832|      2|	unsigned int dm_counts[4] { 0 };
  833|       |
  834|       |	// Clear the list to a known-bad value
  835|  4.09k|	for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
  ------------------
  |  Branch (835:27): [True: 4.09k, False: 2]
  ------------------
  836|  4.09k|	{
  837|  4.09k|		bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE;
  838|  4.09k|	}
  839|       |
  840|       |	// Iterate four times to build a usefully ordered list:
  841|       |	//   - Pass 0 - keep selected single plane "always" block modes
  842|       |	//   - Pass 1 - keep selected single plane "non-always" block modes
  843|       |	//   - Pass 2 - keep select dual plane block modes
  844|       |	//   - Pass 3 - keep everything else that's legal
  845|      2|	unsigned int limit = can_omit_modes ? 3 : 4;
  ------------------
  |  Branch (845:23): [True: 0, False: 2]
  ------------------
  846|     10|	for (unsigned int j = 0; j < limit; j ++)
  ------------------
  |  Branch (846:27): [True: 8, False: 2]
  ------------------
  847|      8|	{
  848|  16.3k|		for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
  ------------------
  |  Branch (848:28): [True: 16.3k, False: 8]
  ------------------
  849|  16.3k|		{
  850|       |			// Skip modes we've already included in a previous pass
  851|  16.3k|			if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE)
  ------------------
  |  Branch (851:8): [True: 1.51k, False: 14.8k]
  ------------------
  852|  1.51k|			{
  853|  1.51k|				continue;
  854|  1.51k|			}
  855|       |
  856|       |			// Decode parameters
  857|  14.8k|			unsigned int x_weights;
  858|  14.8k|			unsigned int y_weights;
  859|  14.8k|			bool is_dual_plane;
  860|  14.8k|			unsigned int quant_mode;
  861|  14.8k|			unsigned int weight_bits;
  862|  14.8k|			bool valid = decode_block_mode_2d(i, x_weights, y_weights, is_dual_plane, quant_mode, weight_bits);
  863|       |
  864|       |			// Always skip invalid encodings for the current block size
  865|  14.8k|			if (!valid || (x_weights > x_texels) || (y_weights > y_texels))
  ------------------
  |  Branch (865:8): [True: 10.1k, False: 4.68k]
  |  Branch (865:18): [True: 1.45k, False: 3.23k]
  |  Branch (865:44): [True: 1.06k, False: 2.16k]
  ------------------
  866|  12.7k|			{
  867|  12.7k|				continue;
  868|  12.7k|			}
  869|       |
  870|       |			// Selectively skip dual plane encodings
  871|  2.16k|			if (((j <= 1) && is_dual_plane) || (j == 2 && !is_dual_plane))
  ------------------
  |  Branch (871:9): [True: 1.83k, False: 330]
  |  Branch (871:21): [True: 660, False: 1.17k]
  |  Branch (871:40): [True: 330, False: 1.17k]
  |  Branch (871:50): [True: 0, False: 330]
  ------------------
  872|    660|			{
  873|    660|				continue;
  874|    660|			}
  875|       |
  876|       |			// Always skip encodings we can't physically encode based on
  877|       |			// generic encoding bit availability
  878|  1.50k|			if (is_dual_plane)
  ------------------
  |  Branch (878:8): [True: 330, False: 1.17k]
  ------------------
  879|    330|			{
  880|       |				 // This is the only check we need as only support 1 partition
  881|    330|				 if ((109 - weight_bits) <= 0)
  ------------------
  |  Branch (881:10): [True: 0, False: 330]
  ------------------
  882|      0|				 {
  883|      0|					continue;
  884|      0|				 }
  885|    330|			}
  886|  1.17k|			else
  887|  1.17k|			{
  888|       |				// This is conservative - fewer bits may be available for > 1 partition
  889|  1.17k|				 if ((111 - weight_bits) <= 0)
  ------------------
  |  Branch (889:10): [True: 0, False: 1.17k]
  ------------------
  890|      0|				 {
  891|      0|					continue;
  892|      0|				 }
  893|  1.17k|			}
  894|       |
  895|       |			// Selectively skip encodings based on percentile
  896|  1.50k|			bool percentile_hit = false;
  897|  1.50k|	#if !defined(ASTCENC_DECOMPRESS_ONLY)
  898|  1.50k|			if (j == 0)
  ------------------
  |  Branch (898:8): [True: 590, False: 918]
  ------------------
  899|    590|			{
  900|    590|				percentile_hit = percentiles[i] <= always_cutoff;
  901|    590|			}
  902|    918|			else
  903|    918|			{
  904|    918|				percentile_hit = percentiles[i] <= mode_cutoff;
  905|    918|			}
  906|  1.50k|	#endif
  907|       |
  908|  1.50k|			if (j != 3 && !percentile_hit)
  ------------------
  |  Branch (908:8): [True: 1.50k, False: 0]
  |  Branch (908:18): [True: 588, False: 920]
  ------------------
  909|    588|			{
  910|    588|				continue;
  911|    588|			}
  912|       |
  913|       |			// Allocate and initialize the decimation table entry if we've not used it yet
  914|    920|			int decimation_mode = decimation_mode_index[y_weights * 16 + x_weights];
  915|    920|			if (decimation_mode < 0)
  ------------------
  |  Branch (915:8): [True: 96, False: 824]
  ------------------
  916|     96|			{
  917|     96|				construct_dt_entry_2d(x_texels, y_texels, x_weights, y_weights, bsd, *wb, packed_dm_idx);
  918|     96|				decimation_mode_index[y_weights * 16 + x_weights] = packed_dm_idx;
  919|     96|				decimation_mode = packed_dm_idx;
  920|       |
  921|     96|				dm_counts[j]++;
  922|     96|				packed_dm_idx++;
  923|     96|			}
  924|       |
  925|    920|			auto& bm = bsd.block_modes[packed_bm_idx];
  926|       |
  927|    920|			bm.decimation_mode = static_cast<uint8_t>(decimation_mode);
  928|    920|			bm.quant_mode = static_cast<uint8_t>(quant_mode);
  929|    920|			bm.is_dual_plane = static_cast<uint8_t>(is_dual_plane);
  930|    920|			bm.weight_bits = static_cast<uint8_t>(weight_bits);
  931|    920|			bm.mode_index = static_cast<uint16_t>(i);
  932|       |
  933|    920|			auto& dm = bsd.decimation_modes[decimation_mode];
  934|       |
  935|    920|			if (is_dual_plane)
  ------------------
  |  Branch (935:8): [True: 330, False: 590]
  ------------------
  936|    330|			{
  937|    330|				dm.set_ref_2plane(bm.get_weight_quant_mode());
  938|    330|			}
  939|    590|			else
  940|    590|			{
  941|    590|				dm.set_ref_1plane(bm.get_weight_quant_mode());
  942|    590|			}
  943|       |
  944|    920|			bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_bm_idx);
  945|       |
  946|    920|			packed_bm_idx++;
  947|    920|			bm_counts[j]++;
  948|    920|		}
  949|      8|	}
  950|       |
  951|      2|	bsd.block_mode_count_1plane_always = bm_counts[0];
  952|      2|	bsd.block_mode_count_1plane_selected = bm_counts[0] + bm_counts[1];
  953|      2|	bsd.block_mode_count_1plane_2plane_selected = bm_counts[0] + bm_counts[1] + bm_counts[2];
  954|      2|	bsd.block_mode_count_all = bm_counts[0] + bm_counts[1] + bm_counts[2] + bm_counts[3];
  955|       |
  956|      2|	bsd.decimation_mode_count_always = dm_counts[0];
  957|      2|	bsd.decimation_mode_count_selected = dm_counts[0] + dm_counts[1] + dm_counts[2];
  958|      2|	bsd.decimation_mode_count_all = dm_counts[0] + dm_counts[1] + dm_counts[2] + dm_counts[3];
  959|       |
  960|      2|#if !defined(ASTCENC_DECOMPRESS_ONLY)
  961|      2|	assert(bsd.block_mode_count_1plane_always > 0);
  962|      2|	assert(bsd.decimation_mode_count_always > 0);
  963|       |
  964|      2|	delete[] percentiles;
  965|      2|#endif
  966|       |
  967|       |	// Ensure the end of the array contains valid data (should never get read)
  968|     80|	for (unsigned int i = bsd.decimation_mode_count_all; i < WEIGHTS_MAX_DECIMATION_MODES; i++)
  ------------------
  |  Branch (968:55): [True: 78, False: 2]
  ------------------
  969|     78|	{
  970|     78|		bsd.decimation_modes[i].maxprec_1plane = -1;
  971|     78|		bsd.decimation_modes[i].maxprec_2planes = -1;
  972|     78|		bsd.decimation_modes[i].refprec_1plane = 0;
  973|     78|		bsd.decimation_modes[i].refprec_2planes = 0;
  974|     78|	}
  975|       |
  976|       |	// Determine the texels to use for kmeans clustering.
  977|      2|	assign_kmeans_texels(bsd);
  978|       |
  979|      2|	delete wb;
  980|      2|}
astcenc_block_sizes.cpp:_ZL20decode_block_mode_2djRjS_RbS_S_:
   43|  14.8k|) {
   44|  14.8k|	unsigned int base_quant_mode = (block_mode >> 4) & 1;
   45|  14.8k|	unsigned int H = (block_mode >> 9) & 1;
   46|  14.8k|	unsigned int D = (block_mode >> 10) & 1;
   47|  14.8k|	unsigned int A = (block_mode >> 5) & 0x3;
   48|       |
   49|  14.8k|	x_weights = 0;
   50|  14.8k|	y_weights = 0;
   51|       |
   52|  14.8k|	if ((block_mode & 3) != 0)
  ------------------
  |  Branch (52:6): [True: 10.9k, False: 3.94k]
  ------------------
   53|  10.9k|	{
   54|  10.9k|		base_quant_mode |= (block_mode & 3) << 1;
   55|  10.9k|		unsigned int B = (block_mode >> 7) & 3;
   56|  10.9k|		switch ((block_mode >> 2) & 3)
  ------------------
  |  Branch (56:11): [True: 0, False: 10.9k]
  ------------------
   57|  10.9k|		{
   58|  2.62k|		case 0:
  ------------------
  |  Branch (58:3): [True: 2.62k, False: 8.30k]
  ------------------
   59|  2.62k|			x_weights = B + 4;
   60|  2.62k|			y_weights = A + 2;
   61|  2.62k|			break;
   62|  2.85k|		case 1:
  ------------------
  |  Branch (62:3): [True: 2.85k, False: 8.07k]
  ------------------
   63|  2.85k|			x_weights = B + 8;
   64|  2.85k|			y_weights = A + 2;
   65|  2.85k|			break;
   66|  2.85k|		case 2:
  ------------------
  |  Branch (66:3): [True: 2.85k, False: 8.07k]
  ------------------
   67|  2.85k|			x_weights = A + 2;
   68|  2.85k|			y_weights = B + 8;
   69|  2.85k|			break;
   70|  2.60k|		case 3:
  ------------------
  |  Branch (70:3): [True: 2.60k, False: 8.32k]
  ------------------
   71|  2.60k|			B &= 1;
   72|  2.60k|			if (block_mode & 0x100)
  ------------------
  |  Branch (72:8): [True: 1.23k, False: 1.37k]
  ------------------
   73|  1.23k|			{
   74|  1.23k|				x_weights = B + 2;
   75|  1.23k|				y_weights = A + 2;
   76|  1.23k|			}
   77|  1.37k|			else
   78|  1.37k|			{
   79|  1.37k|				x_weights = A + 2;
   80|  1.37k|				y_weights = B + 6;
   81|  1.37k|			}
   82|  2.60k|			break;
   83|  10.9k|		}
   84|  10.9k|	}
   85|  3.94k|	else
   86|  3.94k|	{
   87|  3.94k|		base_quant_mode |= ((block_mode >> 2) & 3) << 1;
   88|  3.94k|		if (((block_mode >> 2) & 3) == 0)
  ------------------
  |  Branch (88:7): [True: 1.02k, False: 2.92k]
  ------------------
   89|  1.02k|		{
   90|  1.02k|			return false;
   91|  1.02k|		}
   92|       |
   93|  2.92k|		unsigned int B = (block_mode >> 9) & 3;
   94|  2.92k|		switch ((block_mode >> 7) & 3)
  ------------------
  |  Branch (94:11): [True: 0, False: 2.92k]
  ------------------
   95|  2.92k|		{
   96|    727|		case 0:
  ------------------
  |  Branch (96:3): [True: 727, False: 2.19k]
  ------------------
   97|    727|			x_weights = 12;
   98|    727|			y_weights = A + 2;
   99|    727|			break;
  100|    727|		case 1:
  ------------------
  |  Branch (100:3): [True: 727, False: 2.19k]
  ------------------
  101|    727|			x_weights = A + 2;
  102|    727|			y_weights = 12;
  103|    727|			break;
  104|    707|		case 2:
  ------------------
  |  Branch (104:3): [True: 707, False: 2.21k]
  ------------------
  105|    707|			x_weights = A + 6;
  106|    707|			y_weights = B + 6;
  107|    707|			D = 0;
  108|    707|			H = 0;
  109|    707|			break;
  110|    760|		case 3:
  ------------------
  |  Branch (110:3): [True: 760, False: 2.16k]
  ------------------
  111|    760|			switch ((block_mode >> 5) & 3)
  ------------------
  |  Branch (111:12): [True: 0, False: 760]
  ------------------
  112|    760|			{
  113|    188|			case 0:
  ------------------
  |  Branch (113:4): [True: 188, False: 572]
  ------------------
  114|    188|				x_weights = 6;
  115|    188|				y_weights = 10;
  116|    188|				break;
  117|    188|			case 1:
  ------------------
  |  Branch (117:4): [True: 188, False: 572]
  ------------------
  118|    188|				x_weights = 10;
  119|    188|				y_weights = 6;
  120|    188|				break;
  121|    192|			case 2:
  ------------------
  |  Branch (121:4): [True: 192, False: 568]
  ------------------
  122|    384|			case 3:
  ------------------
  |  Branch (122:4): [True: 192, False: 568]
  ------------------
  123|    384|				return false;
  124|    760|			}
  125|    376|			break;
  126|  2.92k|		}
  127|  2.92k|	}
  128|       |
  129|  13.4k|	unsigned int weight_count = x_weights * y_weights * (D + 1);
  130|  13.4k|	quant_mode = (base_quant_mode - 2) + 6 * H;
  131|  13.4k|	is_dual_plane = D != 0;
  132|       |
  133|  13.4k|	weight_bits = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(quant_mode));
  134|  13.4k|	return (weight_count <= BLOCK_MAX_WEIGHTS &&
  ------------------
  |  Branch (134:10): [True: 10.8k, False: 2.64k]
  ------------------
  135|  13.4k|	        weight_bits >= BLOCK_MIN_WEIGHT_BITS &&
  ------------------
  |  Branch (135:10): [True: 10.0k, False: 728]
  ------------------
  136|  13.4k|	        weight_bits <= BLOCK_MAX_WEIGHT_BITS);
  ------------------
  |  Branch (136:10): [True: 4.68k, False: 5.40k]
  ------------------
  137|  14.8k|}
astcenc_block_sizes.cpp:_ZL21construct_dt_entry_2djjjjR21block_size_descriptorR23dt_init_working_buffersj:
  746|     96|) {
  747|     96|	unsigned int weight_count = x_weights * y_weights;
  748|     96|	assert(weight_count <= BLOCK_MAX_WEIGHTS);
  749|       |
  750|     96|	bool try_2planes = (2 * weight_count) <= BLOCK_MAX_WEIGHTS;
  751|       |
  752|     96|	decimation_info& di = bsd.decimation_tables[index];
  753|     96|	init_decimation_info_2d(x_texels, y_texels, x_weights, y_weights, di, wb);
  754|       |
  755|     96|	int maxprec_1plane = -1;
  756|     96|	int maxprec_2planes = -1;
  757|  1.24k|	for (int i = 0; i < 12; i++)
  ------------------
  |  Branch (757:18): [True: 1.15k, False: 96]
  ------------------
  758|  1.15k|	{
  759|  1.15k|		unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(i));
  760|  1.15k|		if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS)
  ------------------
  |  Branch (760:7): [True: 1.02k, False: 124]
  |  Branch (760:47): [True: 590, False: 438]
  ------------------
  761|    590|		{
  762|    590|			maxprec_1plane = i;
  763|    590|		}
  764|       |
  765|  1.15k|		if (try_2planes)
  ------------------
  |  Branch (765:7): [True: 684, False: 468]
  ------------------
  766|    684|		{
  767|    684|			unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast<quant_method>(i));
  768|    684|			if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS)
  ------------------
  |  Branch (768:8): [True: 658, False: 26]
  |  Branch (768:49): [True: 330, False: 328]
  ------------------
  769|    330|			{
  770|    330|				maxprec_2planes = i;
  771|    330|			}
  772|    684|		}
  773|  1.15k|	}
  774|       |
  775|       |	// At least one of the two should be valid ...
  776|     96|	assert(maxprec_1plane >= 0 || maxprec_2planes >= 0);
  777|     96|	bsd.decimation_modes[index].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
  778|     96|	bsd.decimation_modes[index].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
  779|     96|	bsd.decimation_modes[index].refprec_1plane = 0;
  780|     96|	bsd.decimation_modes[index].refprec_2planes = 0;
  781|     96|}
astcenc_block_sizes.cpp:_ZL23init_decimation_info_2djjjjR15decimation_infoR23dt_init_working_buffers:
  259|     96|) {
  260|     96|	unsigned int texels_per_block = x_texels * y_texels;
  261|     96|	unsigned int weights_per_block = x_weights * y_weights;
  262|       |
  263|     96|	uint8_t max_texel_count_of_weight = 0;
  264|       |
  265|     96|	promise(weights_per_block > 0);
  ------------------
  |  |   61|     96|	#define promise(cond) assert(cond)
  ------------------
  266|     96|	promise(texels_per_block > 0);
  ------------------
  |  |   61|     96|	#define promise(cond) assert(cond)
  ------------------
  267|     96|	promise(x_texels > 0);
  ------------------
  |  |   61|     96|	#define promise(cond) assert(cond)
  ------------------
  268|     96|	promise(y_texels > 0);
  ------------------
  |  |   61|     96|	#define promise(cond) assert(cond)
  ------------------
  269|       |
  270|  2.93k|	for (unsigned int i = 0; i < weights_per_block; i++)
  ------------------
  |  Branch (270:27): [True: 2.83k, False: 96]
  ------------------
  271|  2.83k|	{
  272|  2.83k|		wb.texel_count_of_weight[i] = 0;
  273|  2.83k|	}
  274|       |
  275|  12.7k|	for (unsigned int i = 0; i < texels_per_block; i++)
  ------------------
  |  Branch (275:27): [True: 12.6k, False: 96]
  ------------------
  276|  12.6k|	{
  277|  12.6k|		wb.weight_count_of_texel[i] = 0;
  278|  12.6k|	}
  279|       |
  280|  1.17k|	for (unsigned int y = 0; y < y_texels; y++)
  ------------------
  |  Branch (280:27): [True: 1.08k, False: 96]
  ------------------
  281|  1.08k|	{
  282|  13.7k|		for (unsigned int x = 0; x < x_texels; x++)
  ------------------
  |  Branch (282:28): [True: 12.6k, False: 1.08k]
  ------------------
  283|  12.6k|		{
  284|  12.6k|			unsigned int texel = y * x_texels + x;
  285|       |
  286|  12.6k|			unsigned int x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6;
  287|  12.6k|			unsigned int y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6;
  288|       |
  289|  12.6k|			unsigned int x_weight_frac = x_weight & 0xF;
  290|  12.6k|			unsigned int y_weight_frac = y_weight & 0xF;
  291|  12.6k|			unsigned int x_weight_int = x_weight >> 4;
  292|  12.6k|			unsigned int y_weight_int = y_weight >> 4;
  293|       |
  294|  12.6k|			unsigned int qweight[4];
  295|  12.6k|			qweight[0] = x_weight_int + y_weight_int * x_weights;
  296|  12.6k|			qweight[1] = qweight[0] + 1;
  297|  12.6k|			qweight[2] = qweight[0] + x_weights;
  298|  12.6k|			qweight[3] = qweight[2] + 1;
  299|       |
  300|       |			// Truncated-precision bilinear interpolation
  301|  12.6k|			unsigned int prod = x_weight_frac * y_weight_frac;
  302|       |
  303|  12.6k|			unsigned int weight[4];
  304|  12.6k|			weight[3] = (prod + 8) >> 4;
  305|  12.6k|			weight[1] = x_weight_frac - weight[3];
  306|  12.6k|			weight[2] = y_weight_frac - weight[3];
  307|  12.6k|			weight[0] = 16 - x_weight_frac - y_weight_frac + weight[3];
  308|       |
  309|  63.3k|			for (unsigned int i = 0; i < 4; i++)
  ------------------
  |  Branch (309:29): [True: 50.6k, False: 12.6k]
  ------------------
  310|  50.6k|			{
  311|  50.6k|				if (weight[i] != 0)
  ------------------
  |  Branch (311:9): [True: 37.8k, False: 12.7k]
  ------------------
  312|  37.8k|				{
  313|  37.8k|					wb.grid_weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(qweight[i]);
  314|  37.8k|					wb.weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(weight[i]);
  315|  37.8k|					wb.weight_count_of_texel[texel]++;
  316|  37.8k|					wb.texels_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(texel);
  317|  37.8k|					wb.texel_weights_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(weight[i]);
  318|  37.8k|					wb.texel_count_of_weight[qweight[i]]++;
  319|  37.8k|					max_texel_count_of_weight = astc::max(max_texel_count_of_weight, wb.texel_count_of_weight[qweight[i]]);
  320|  37.8k|				}
  321|  50.6k|			}
  322|  12.6k|		}
  323|  1.08k|	}
  324|       |
  325|     96|	uint8_t max_texel_weight_count = 0;
  326|  12.7k|	for (unsigned int i = 0; i < texels_per_block; i++)
  ------------------
  |  Branch (326:27): [True: 12.6k, False: 96]
  ------------------
  327|  12.6k|	{
  328|  12.6k|		di.texel_weight_count[i] = wb.weight_count_of_texel[i];
  329|  12.6k|		max_texel_weight_count = astc::max(max_texel_weight_count, di.texel_weight_count[i]);
  330|       |
  331|  50.5k|		for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++)
  ------------------
  |  Branch (331:28): [True: 37.8k, False: 12.6k]
  ------------------
  332|  37.8k|		{
  333|  37.8k|			di.texel_weight_contribs_int_tr[j][i] = wb.weights_of_texel[i][j];
  334|  37.8k|			di.texel_weight_contribs_float_tr[j][i] = static_cast<float>(wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM);
  335|  37.8k|			di.texel_weights_tr[j][i] = wb.grid_weights_of_texel[i][j];
  336|  37.8k|		}
  337|       |
  338|       |		// Init all 4 entries so we can rely on zeros for vectorization
  339|  25.4k|		for (unsigned int j = wb.weight_count_of_texel[i]; j < 4; j++)
  ------------------
  |  Branch (339:54): [True: 12.7k, False: 12.6k]
  ------------------
  340|  12.7k|		{
  341|  12.7k|			di.texel_weight_contribs_int_tr[j][i] = 0;
  342|  12.7k|			di.texel_weight_contribs_float_tr[j][i] = 0.0f;
  343|  12.7k|			di.texel_weights_tr[j][i] = 0;
  344|  12.7k|		}
  345|  12.6k|	}
  346|       |
  347|     96|	di.max_texel_weight_count = max_texel_weight_count;
  348|       |
  349|  2.93k|	for (unsigned int i = 0; i < weights_per_block; i++)
  ------------------
  |  Branch (349:27): [True: 2.83k, False: 96]
  ------------------
  350|  2.83k|	{
  351|  2.83k|		unsigned int texel_count_wt = wb.texel_count_of_weight[i];
  352|  2.83k|		di.weight_texel_count[i] = static_cast<uint8_t>(texel_count_wt);
  353|       |
  354|  40.7k|		for (unsigned int j = 0; j < texel_count_wt; j++)
  ------------------
  |  Branch (354:28): [True: 37.8k, False: 2.83k]
  ------------------
  355|  37.8k|		{
  356|  37.8k|			uint8_t texel = wb.texels_of_weight[i][j];
  357|       |
  358|       |			// Create transposed versions of these for better vectorization
  359|  37.8k|			di.weight_texels_tr[j][i] = texel;
  360|  37.8k|			di.weights_texel_contribs_tr[j][i] = static_cast<float>(wb.texel_weights_of_weight[i][j]);
  361|       |
  362|       |			// Store the per-texel contribution of this weight for each texel it contributes to
  363|  37.8k|			di.texel_contrib_for_weight[j][i] = 0.0f;
  364|  81.4k|			for (unsigned int k = 0; k < 4; k++)
  ------------------
  |  Branch (364:29): [True: 81.4k, False: 0]
  ------------------
  365|  81.4k|			{
  366|  81.4k|				uint8_t dttw = di.texel_weights_tr[k][texel];
  367|  81.4k|				float dttwf = di.texel_weight_contribs_float_tr[k][texel];
  368|  81.4k|				if (dttw == i && dttwf != 0.0f)
  ------------------
  |  Branch (368:9): [True: 37.8k, False: 43.5k]
  |  Branch (368:22): [True: 37.8k, False: 0]
  ------------------
  369|  37.8k|				{
  370|  37.8k|					di.texel_contrib_for_weight[j][i] = di.texel_weight_contribs_float_tr[k][texel];
  371|  37.8k|					break;
  372|  37.8k|				}
  373|  81.4k|			}
  374|  37.8k|		}
  375|       |
  376|       |		// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
  377|       |		// Match last texel in active lane in SIMD group, for better gathers
  378|  2.83k|		uint8_t last_texel = di.weight_texels_tr[texel_count_wt - 1][i];
  379|  20.2k|		for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++)
  ------------------
  |  Branch (379:41): [True: 17.4k, False: 2.83k]
  ------------------
  380|  17.4k|		{
  381|  17.4k|			di.weight_texels_tr[j][i] = last_texel;
  382|  17.4k|			di.weights_texel_contribs_tr[j][i] = 0.0f;
  383|  17.4k|		}
  384|  2.83k|	}
  385|       |
  386|       |	// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
  387|     96|	size_t texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block);
  388|     96|	for (size_t i = texels_per_block; i < texels_per_block_simd; i++)
  ------------------
  |  Branch (388:36): [True: 0, False: 96]
  ------------------
  389|      0|	{
  390|      0|		di.texel_weight_count[i] = 0;
  391|       |
  392|      0|		for (size_t j = 0; j < 4; j++)
  ------------------
  |  Branch (392:22): [True: 0, False: 0]
  ------------------
  393|      0|		{
  394|      0|			di.texel_weight_contribs_float_tr[j][i] = 0;
  395|      0|			di.texel_weights_tr[j][i] = 0;
  396|      0|			di.texel_weight_contribs_int_tr[j][i] = 0;
  397|      0|		}
  398|      0|	}
  399|       |
  400|       |	// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
  401|       |	// Match last texel in active lane in SIMD group, for better gathers
  402|     96|	unsigned int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1];
  403|     96|	uint8_t last_texel = di.weight_texels_tr[last_texel_count_wt - 1][weights_per_block - 1];
  404|       |
  405|     96|	size_t weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block);
  406|    184|	for (size_t i = weights_per_block; i < weights_per_block_simd; i++)
  ------------------
  |  Branch (406:37): [True: 88, False: 96]
  ------------------
  407|     88|	{
  408|     88|		di.weight_texel_count[i] = 0;
  409|       |
  410|  2.79k|		for (size_t j = 0; j < max_texel_count_of_weight; j++)
  ------------------
  |  Branch (410:22): [True: 2.70k, False: 88]
  ------------------
  411|  2.70k|		{
  412|  2.70k|			di.weight_texels_tr[j][i] = last_texel;
  413|  2.70k|			di.weights_texel_contribs_tr[j][i] = 0.0f;
  414|  2.70k|		}
  415|     88|	}
  416|       |
  417|     96|	di.texel_count = static_cast<uint8_t>(texels_per_block);
  418|     96|	di.weight_count = static_cast<uint8_t>(weights_per_block);
  419|     96|	di.weight_x = static_cast<uint8_t>(x_weights);
  420|     96|	di.weight_y = static_cast<uint8_t>(y_weights);
  421|     96|	di.weight_z = 1;
  422|     96|}

_Z25get_ise_sequence_bitcountj12quant_method:
  422|  20.5k|) {
  423|       |	// Cope with out-of bounds values - input might be invalid
  424|  20.5k|	if (static_cast<size_t>(quant_level) >= ise_sizes.size())
  ------------------
  |  Branch (424:6): [True: 0, False: 20.5k]
  ------------------
  425|      0|	{
  426|       |		// Arbitrary large number that's more than an ASTC block can hold
  427|      0|		return 1024;
  428|      0|	}
  429|       |
  430|  20.5k|	auto& entry = ise_sizes[quant_level];
  431|  20.5k|	unsigned int divisor = (entry.divisor << 1) + 1;
  432|  20.5k|	return (entry.scale * character_count + divisor - 1) / divisor;
  433|  20.5k|}
_Z10decode_ise12quant_methodjPKhPhj:
  657|     75|) {
  658|     75|	promise(character_count > 0);
  ------------------
  |  |   61|     75|	#define promise(cond) assert(cond)
  ------------------
  659|       |
  660|       |	// Note: due to how the trit/quint-block unpacking is done in this function, we may write more
  661|       |	// temporary results than the number of outputs. The maximum actual number of results is 64 bit,
  662|       |	// but we keep 4 additional character_count of padding.
  663|     75|	uint8_t results[68];
  664|     75|	uint8_t tq_blocks[22] { 0 }; // Trit-blocks or quint-blocks, must be zeroed
  665|       |
  666|     75|	unsigned int bits = btq_counts[quant_level].bits;
  667|     75|	unsigned int trits = btq_counts[quant_level].trits;
  668|     75|	unsigned int quints = btq_counts[quant_level].quints;
  669|       |
  670|     75|	unsigned int lcounter = 0;
  671|     75|	unsigned int hcounter = 0;
  672|       |
  673|       |	// Collect bits for each element, as well as bits for any trit-blocks and quint-blocks.
  674|  1.76k|	for (unsigned int i = 0; i < character_count; i++)
  ------------------
  |  Branch (674:27): [True: 1.69k, False: 75]
  ------------------
  675|  1.69k|	{
  676|  1.69k|		results[i] = static_cast<uint8_t>(read_bits(bits, bit_offset, input_data));
  677|  1.69k|		bit_offset += bits;
  678|       |
  679|  1.69k|		if (trits)
  ------------------
  |  Branch (679:7): [True: 761, False: 930]
  ------------------
  680|    761|		{
  681|    761|			static const uint8_t bits_to_read[5]  { 2, 2, 1, 2, 1 };
  682|    761|			static const uint8_t block_shift[5]   { 0, 2, 4, 5, 7 };
  683|    761|			static const uint8_t next_lcounter[5] { 1, 2, 3, 4, 0 };
  684|    761|			static const uint8_t hcounter_incr[5] { 0, 0, 0, 0, 1 };
  685|    761|			unsigned int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data);
  686|    761|			bit_offset += bits_to_read[lcounter];
  687|    761|			tq_blocks[hcounter] |= tdata << block_shift[lcounter];
  688|    761|			hcounter += hcounter_incr[lcounter];
  689|    761|			lcounter = next_lcounter[lcounter];
  690|    761|		}
  691|       |
  692|  1.69k|		if (quints)
  ------------------
  |  Branch (692:7): [True: 162, False: 1.52k]
  ------------------
  693|    162|		{
  694|    162|			static const uint8_t bits_to_read[3]  { 3, 2, 2 };
  695|    162|			static const uint8_t block_shift[3]   { 0, 3, 5 };
  696|    162|			static const uint8_t next_lcounter[3] { 1, 2, 0 };
  697|    162|			static const uint8_t hcounter_incr[3] { 0, 0, 1 };
  698|    162|			unsigned int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data);
  699|    162|			bit_offset += bits_to_read[lcounter];
  700|    162|			tq_blocks[hcounter] |= tdata << block_shift[lcounter];
  701|    162|			hcounter += hcounter_incr[lcounter];
  702|    162|			lcounter = next_lcounter[lcounter];
  703|    162|		}
  704|  1.69k|	}
  705|       |
  706|       |	// Unpack trit-blocks or quint-blocks as needed
  707|     75|	if (trits)
  ------------------
  |  Branch (707:6): [True: 28, False: 47]
  ------------------
  708|     28|	{
  709|     28|		unsigned int trit_blocks = (character_count + 4) / 5;
  710|     28|		promise(trit_blocks > 0);
  ------------------
  |  |   61|     28|	#define promise(cond) assert(cond)
  ------------------
  711|    190|		for (unsigned int i = 0; i < trit_blocks; i++)
  ------------------
  |  Branch (711:28): [True: 162, False: 28]
  ------------------
  712|    162|		{
  713|    162|			const uint8_t *tritptr = trits_of_integer[tq_blocks[i]];
  714|    162|			results[5 * i    ] |= tritptr[0] << bits;
  715|    162|			results[5 * i + 1] |= tritptr[1] << bits;
  716|    162|			results[5 * i + 2] |= tritptr[2] << bits;
  717|    162|			results[5 * i + 3] |= tritptr[3] << bits;
  718|    162|			results[5 * i + 4] |= tritptr[4] << bits;
  719|    162|		}
  720|     28|	}
  721|       |
  722|     75|	if (quints)
  ------------------
  |  Branch (722:6): [True: 13, False: 62]
  ------------------
  723|     13|	{
  724|     13|		unsigned int quint_blocks = (character_count + 2) / 3;
  725|     13|		promise(quint_blocks > 0);
  ------------------
  |  |   61|     13|	#define promise(cond) assert(cond)
  ------------------
  726|     70|		for (unsigned int i = 0; i < quint_blocks; i++)
  ------------------
  |  Branch (726:28): [True: 57, False: 13]
  ------------------
  727|     57|		{
  728|     57|			const uint8_t *quintptr = quints_of_integer[tq_blocks[i]];
  729|     57|			results[3 * i    ] |= quintptr[0] << bits;
  730|     57|			results[3 * i + 1] |= quintptr[1] << bits;
  731|     57|			results[3 * i + 2] |= quintptr[2] << bits;
  732|     57|		}
  733|     13|	}
  734|       |
  735|  1.76k|	for (unsigned int i = 0; i < character_count; i++)
  ------------------
  |  Branch (735:27): [True: 1.69k, False: 75]
  ------------------
  736|  1.69k|	{
  737|  1.69k|		output_data[i] = results[i];
  738|  1.69k|	}
  739|     75|}
astcenc_integer_sequence.cpp:_ZL9read_bitsjjPKh:
  482|  2.61k|) {
  483|  2.61k|	unsigned int mask = (1 << bitcount) - 1;
  484|  2.61k|	ptr += bitoffset >> 3;
  485|  2.61k|	bitoffset &= 7;
  486|  2.61k|	unsigned int value = ptr[0] | (ptr[1] << 8);
  487|  2.61k|	value >>= bitoffset;
  488|  2.61k|	value &= mask;
  489|  2.61k|	return value;
  490|  2.61k|}

_ZNK10block_mode21get_weight_quant_modeEv:
  438|    920|	{
  439|    920|		return static_cast<quant_method>(this->quant_mode);
  440|    920|	}
_ZN15decimation_mode14set_ref_1planeE12quant_method:
  474|    590|	{
  475|    590|		refprec_1plane |= (1 << weight_quant);
  476|    590|	}
_ZN15decimation_mode14set_ref_2planeE12quant_method:
  495|    330|	{
  496|    330|		refprec_2planes |= static_cast<uint16_t>(1 << weight_quant);
  497|    330|	}
_ZNK21block_size_descriptor14get_block_modeEj:
  637|     50|	{
  638|     50|		unsigned int packed_index = this->block_mode_packed_index[block_mode];
  639|     50|		assert(packed_index != BLOCK_BAD_BLOCK_MODE && packed_index < this->block_mode_count_all);
  640|     50|		return this->block_modes[packed_index];
  641|     50|	}
_ZNK21block_size_descriptor19get_decimation_infoEj:
  671|     50|	{
  672|     50|		return this->decimation_tables[decimation_mode];
  673|     50|	}

_ZN4astc9rand_initEPm:
   33|      2|{
   34|      2|	state[0] = 0xfaf9e171cea1ec6bULL;
   35|      2|	state[1] = 0xf1b318cc06af5d71ULL;
   36|      2|}
_ZN4astc4randEPm:
   40|    153|{
   41|    153|	uint64_t s0 = state[0];
   42|    153|	uint64_t s1 = state[1];
   43|    153|	uint64_t res = s0 + s1;
   44|    153|	s1 ^= s0;
   45|    153|	state[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16);
   46|    153|	state[1] = rotl(s1, 37);
   47|    153|	return res;
   48|    153|}
astcenc_mathlib.cpp:_ZL4rotlmi:
   27|    306|{
   28|    306|	return (val << count) | (val >> (64 - count));
   29|    306|}

astcenc_block_sizes.cpp:_ZN4astcL3maxIhEET_S1_S1_:
  228|   110k|{
  229|   110k|	return p > q ? p : q;
  ------------------
  |  Branch (229:9): [True: 88.7k, False: 21.4k]
  ------------------
  230|   110k|}
astcenc_partition_tables.cpp:_ZN4astcL3minIhEET_S1_S1_:
  177|  13.2k|{
  178|  13.2k|	return p < q ? p : q;
  ------------------
  |  Branch (178:9): [True: 5.05k, False: 8.15k]
  ------------------
  179|  13.2k|}

_Z21init_partition_tablesR21block_size_descriptorbj:
  464|      3|) {
  465|      3|	partition_info* par_tab2 = bsd.partitionings;
  466|      3|	partition_info* par_tab3 = par_tab2 + BLOCK_MAX_PARTITIONINGS;
  467|      3|	partition_info* par_tab4 = par_tab3 + BLOCK_MAX_PARTITIONINGS;
  468|      3|	partition_info* par_tab1 = par_tab4 + BLOCK_MAX_PARTITIONINGS;
  469|       |
  470|      3|	generate_one_partition_info_entry(bsd, 1, 0, 0, *par_tab1);
  471|      3|	bsd.partitioning_count_selected[0] = 1;
  472|      3|	bsd.partitioning_count_all[0] = 1;
  473|       |
  474|      3|	uint64_t* canonical_patterns = new uint64_t[BLOCK_MAX_PARTITIONINGS * BIT_PATTERN_WORDS];
  ------------------
  |  |   25|      3|#define BIT_PATTERN_WORDS (((ASTCENC_BLOCK_MAX_TEXELS * 2) + 63) / 64)
  |  |  ------------------
  |  |  |  |   68|      3|	#define ASTCENC_BLOCK_MAX_TEXELS 216 // A 3D 6x6x6 block
  |  |  ------------------
  ------------------
  475|       |
  476|      3|	build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 2, par_tab2, canonical_patterns);
  477|      3|	build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 3, par_tab3, canonical_patterns);
  478|      3|	build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 4, par_tab4, canonical_patterns);
  479|       |
  480|      3|	delete[] canonical_patterns;
  481|      3|}
astcenc_partition_tables.cpp:_ZL33generate_one_partition_info_entryR21block_size_descriptorjjjR14partition_info:
  282|  13.2k|) {
  283|  13.2k|	int texels_per_block = bsd.texel_count;
  284|  13.2k|	bool small_block = texels_per_block < 32;
  285|       |
  286|  13.2k|	uint8_t *partition_of_texel = pi.partition_of_texel;
  287|       |
  288|       |	// Assign texels to partitions
  289|  13.2k|	int texel_idx = 0;
  290|  13.2k|	int counts[BLOCK_MAX_PARTITIONS] { 0 };
  291|  47.0k|	for (unsigned int z = 0; z < bsd.zdim; z++)
  ------------------
  |  Branch (291:27): [True: 33.7k, False: 13.2k]
  ------------------
  292|  33.7k|	{
  293|   250k|		for (unsigned int y = 0; y <  bsd.ydim; y++)
  ------------------
  |  Branch (293:28): [True: 216k, False: 33.7k]
  ------------------
  294|   216k|		{
  295|  1.76M|			for (unsigned int x = 0; x <  bsd.xdim; x++)
  ------------------
  |  Branch (295:29): [True: 1.55M, False: 216k]
  ------------------
  296|  1.55M|			{
  297|  1.55M|				uint8_t part = select_partition(partition_index, x, y, z, partition_count, small_block);
  298|  1.55M|				pi.texels_of_partition[part][counts[part]++] = static_cast<uint8_t>(texel_idx++);
  299|  1.55M|				*partition_of_texel++ = part;
  300|  1.55M|			}
  301|   216k|		}
  302|  33.7k|	}
  303|       |
  304|       |	// Fill loop tail so we can overfetch later
  305|  53.4k|	for (unsigned int i = 0; i < partition_count; i++)
  ------------------
  |  Branch (305:27): [True: 40.2k, False: 13.2k]
  ------------------
  306|  40.2k|	{
  307|  40.2k|		size_t ptex_count = counts[i];
  308|  40.2k|		size_t ptex_count_simd = round_up_to_simd_multiple_vla(ptex_count);
  309|  78.4k|		for (size_t j = ptex_count; j < ptex_count_simd; j++)
  ------------------
  |  Branch (309:31): [True: 38.2k, False: 40.2k]
  ------------------
  310|  38.2k|		{
  311|  38.2k|			pi.texels_of_partition[i][j] = pi.texels_of_partition[i][ptex_count - 1];
  312|  38.2k|		}
  313|  40.2k|	}
  314|       |
  315|       |	// Populate the actual procedural partition count
  316|  13.2k|	if (counts[0] == 0)
  ------------------
  |  Branch (316:6): [True: 2.40k, False: 10.8k]
  ------------------
  317|  2.40k|	{
  318|  2.40k|		pi.partition_count = 0;
  319|  2.40k|	}
  320|  10.8k|	else if (counts[1] == 0)
  ------------------
  |  Branch (320:11): [True: 2.85k, False: 7.95k]
  ------------------
  321|  2.85k|	{
  322|  2.85k|		pi.partition_count = 1;
  323|  2.85k|	}
  324|  7.95k|	else if (counts[2] == 0)
  ------------------
  |  Branch (324:11): [True: 4.26k, False: 3.69k]
  ------------------
  325|  4.26k|	{
  326|  4.26k|		pi.partition_count = 2;
  327|  4.26k|	}
  328|  3.69k|	else if (counts[3] == 0)
  ------------------
  |  Branch (328:11): [True: 2.14k, False: 1.54k]
  ------------------
  329|  2.14k|	{
  330|  2.14k|		pi.partition_count = 3;
  331|  2.14k|	}
  332|  1.54k|	else
  333|  1.54k|	{
  334|  1.54k|		pi.partition_count = 4;
  335|  1.54k|	}
  336|       |
  337|       |	// Populate the partition index
  338|  13.2k|	pi.partition_index = static_cast<uint16_t>(partition_index);
  339|       |
  340|       |	// Populate the coverage bitmaps for 2/3/4 partitions
  341|  13.2k|	uint64_t* bitmaps { nullptr };
  342|  13.2k|	if (partition_count == 2)
  ------------------
  |  Branch (342:6): [True: 3.99k, False: 9.21k]
  ------------------
  343|  3.99k|	{
  344|  3.99k|		bitmaps = bsd.coverage_bitmaps_2[partition_remap_index];
  345|  3.99k|	}
  346|  9.21k|	else if (partition_count == 3)
  ------------------
  |  Branch (346:11): [True: 4.61k, False: 4.60k]
  ------------------
  347|  4.61k|	{
  348|  4.61k|		bitmaps = bsd.coverage_bitmaps_3[partition_remap_index];
  349|  4.61k|	}
  350|  4.60k|	else if (partition_count == 4)
  ------------------
  |  Branch (350:11): [True: 4.59k, False: 3]
  ------------------
  351|  4.59k|	{
  352|  4.59k|		bitmaps = bsd.coverage_bitmaps_4[partition_remap_index];
  353|  4.59k|	}
  354|       |
  355|  66.0k|	for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++)
  ------------------
  |  Branch (355:27): [True: 52.8k, False: 13.2k]
  ------------------
  356|  52.8k|	{
  357|  52.8k|		pi.partition_texel_count[i] = static_cast<uint8_t>(counts[i]);
  358|  52.8k|	}
  359|       |
  360|       |	// Valid partitionings have texels in all of the requested partitions
  361|  13.2k|	bool valid = pi.partition_count == partition_count;
  362|       |
  363|  13.2k|	if (bitmaps)
  ------------------
  |  Branch (363:6): [True: 13.2k, False: 3]
  ------------------
  364|  13.2k|	{
  365|       |		// Populate the partition coverage bitmap
  366|  53.4k|		for (unsigned int i = 0; i < partition_count; i++)
  ------------------
  |  Branch (366:28): [True: 40.2k, False: 13.2k]
  ------------------
  367|  40.2k|		{
  368|  40.2k|			bitmaps[i] = 0ULL;
  369|  40.2k|		}
  370|       |
  371|  13.2k|		unsigned int texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS);
  372|   615k|		for (unsigned int i = 0; i < texels_to_process; i++)
  ------------------
  |  Branch (372:28): [True: 602k, False: 13.2k]
  ------------------
  373|   602k|		{
  374|   602k|			unsigned int idx = bsd.kmeans_texels[i];
  375|   602k|			bitmaps[pi.partition_of_texel[idx]] |= 1ULL << i;
  376|   602k|		}
  377|  13.2k|	}
  378|       |
  379|  13.2k|	return valid;
  380|  13.2k|}
astcenc_partition_tables.cpp:_ZL16select_partitioniiiiib:
  149|  1.55M|) {
  150|       |	// For small blocks bias the coordinates to get better distribution
  151|  1.55M|	if (small_block)
  ------------------
  |  Branch (151:6): [True: 80.9k, False: 1.47M]
  ------------------
  152|  80.9k|	{
  153|  80.9k|		x <<= 1;
  154|  80.9k|		y <<= 1;
  155|  80.9k|		z <<= 1;
  156|  80.9k|	}
  157|       |
  158|  1.55M|	seed += (partition_count - 1) * 1024;
  159|       |
  160|  1.55M|	uint32_t rnum = hash52(seed);
  161|       |
  162|  1.55M|	uint8_t seed1 = rnum & 0xF;
  163|  1.55M|	uint8_t seed2 = (rnum >> 4) & 0xF;
  164|  1.55M|	uint8_t seed3 = (rnum >> 8) & 0xF;
  165|  1.55M|	uint8_t seed4 = (rnum >> 12) & 0xF;
  166|  1.55M|	uint8_t seed5 = (rnum >> 16) & 0xF;
  167|  1.55M|	uint8_t seed6 = (rnum >> 20) & 0xF;
  168|  1.55M|	uint8_t seed7 = (rnum >> 24) & 0xF;
  169|  1.55M|	uint8_t seed8 = (rnum >> 28) & 0xF;
  170|  1.55M|	uint8_t seed9 = (rnum >> 18) & 0xF;
  171|  1.55M|	uint8_t seed10 = (rnum >> 22) & 0xF;
  172|  1.55M|	uint8_t seed11 = (rnum >> 26) & 0xF;
  173|  1.55M|	uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
  174|       |
  175|       |	// Squaring all the seeds in order to bias their distribution towards lower values.
  176|  1.55M|	seed1 *= seed1;
  177|  1.55M|	seed2 *= seed2;
  178|  1.55M|	seed3 *= seed3;
  179|  1.55M|	seed4 *= seed4;
  180|  1.55M|	seed5 *= seed5;
  181|  1.55M|	seed6 *= seed6;
  182|  1.55M|	seed7 *= seed7;
  183|  1.55M|	seed8 *= seed8;
  184|  1.55M|	seed9 *= seed9;
  185|  1.55M|	seed10 *= seed10;
  186|  1.55M|	seed11 *= seed11;
  187|  1.55M|	seed12 *= seed12;
  188|       |
  189|  1.55M|	int sh1, sh2;
  190|  1.55M|	if (seed & 1)
  ------------------
  |  Branch (190:6): [True: 775k, False: 776k]
  ------------------
  191|   775k|	{
  192|   775k|		sh1 = (seed & 2 ? 4 : 5);
  ------------------
  |  Branch (192:10): [True: 357k, False: 417k]
  ------------------
  193|   775k|		sh2 = (partition_count == 3 ? 6 : 5);
  ------------------
  |  Branch (193:10): [True: 274k, False: 500k]
  ------------------
  194|   775k|	}
  195|   776k|	else
  196|   776k|	{
  197|   776k|		sh1 = (partition_count == 3 ? 6 : 5);
  ------------------
  |  Branch (197:10): [True: 275k, False: 500k]
  ------------------
  198|   776k|		sh2 = (seed & 2 ? 4 : 5);
  ------------------
  |  Branch (198:10): [True: 368k, False: 407k]
  ------------------
  199|   776k|	}
  200|       |
  201|  1.55M|	int sh3 = (seed & 0x10) ? sh1 : sh2;
  ------------------
  |  Branch (201:12): [True: 776k, False: 774k]
  ------------------
  202|       |
  203|  1.55M|	seed1 >>= sh1;
  204|  1.55M|	seed2 >>= sh2;
  205|  1.55M|	seed3 >>= sh1;
  206|  1.55M|	seed4 >>= sh2;
  207|  1.55M|	seed5 >>= sh1;
  208|  1.55M|	seed6 >>= sh2;
  209|  1.55M|	seed7 >>= sh1;
  210|  1.55M|	seed8 >>= sh2;
  211|       |
  212|  1.55M|	seed9 >>= sh3;
  213|  1.55M|	seed10 >>= sh3;
  214|  1.55M|	seed11 >>= sh3;
  215|  1.55M|	seed12 >>= sh3;
  216|       |
  217|  1.55M|	int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
  218|  1.55M|	int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
  219|  1.55M|	int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
  220|  1.55M|	int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
  221|       |
  222|       |	// Apply the saw
  223|  1.55M|	a &= 0x3F;
  224|  1.55M|	b &= 0x3F;
  225|  1.55M|	c &= 0x3F;
  226|  1.55M|	d &= 0x3F;
  227|       |
  228|       |	// Remove some of the components if we are to output < 4 partitions.
  229|  1.55M|	if (partition_count <= 3)
  ------------------
  |  Branch (229:6): [True: 1.00M, False: 546k]
  ------------------
  230|  1.00M|	{
  231|  1.00M|		d = 0;
  232|  1.00M|	}
  233|       |
  234|  1.55M|	if (partition_count <= 2)
  ------------------
  |  Branch (234:6): [True: 454k, False: 1.09M]
  ------------------
  235|   454k|	{
  236|   454k|		c = 0;
  237|   454k|	}
  238|       |
  239|  1.55M|	if (partition_count <= 1)
  ------------------
  |  Branch (239:6): [True: 376, False: 1.55M]
  ------------------
  240|    376|	{
  241|    376|		b = 0;
  242|    376|	}
  243|       |
  244|  1.55M|	uint8_t partition;
  245|  1.55M|	if (a >= b && a >= c && a >= d)
  ------------------
  |  Branch (245:6): [True: 851k, False: 700k]
  |  Branch (245:16): [True: 647k, False: 203k]
  |  Branch (245:26): [True: 599k, False: 47.7k]
  ------------------
  246|   599k|	{
  247|   599k|		partition = 0;
  248|   599k|	}
  249|   951k|	else if (b >= c && b >= d)
  ------------------
  |  Branch (249:11): [True: 543k, False: 408k]
  |  Branch (249:21): [True: 476k, False: 66.7k]
  ------------------
  250|   476k|	{
  251|   476k|		partition = 1;
  252|   476k|	}
  253|   475k|	else if (c >= d)
  ------------------
  |  Branch (253:11): [True: 339k, False: 135k]
  ------------------
  254|   339k|	{
  255|   339k|		partition = 2;
  256|   339k|	}
  257|   135k|	else
  258|   135k|	{
  259|   135k|		partition = 3;
  260|   135k|	}
  261|       |
  262|  1.55M|	return partition;
  263|  1.55M|}
astcenc_partition_tables.cpp:_ZL6hash52j:
  116|  1.55M|) {
  117|  1.55M|	inp ^= inp >> 15;
  118|       |
  119|       |	// (2^4 + 1) * (2^7 + 1) * (2^17 - 1)
  120|  1.55M|	inp *= 0xEEDE0891;
  121|  1.55M|	inp ^= inp >> 5;
  122|  1.55M|	inp += inp << 16;
  123|  1.55M|	inp ^= inp >> 7;
  124|  1.55M|	inp ^= inp >> 3;
  125|  1.55M|	inp ^= inp << 6;
  126|  1.55M|	inp ^= inp >> 17;
  127|  1.55M|	return inp;
  128|  1.55M|}
astcenc_partition_tables.cpp:_ZL45build_partition_table_for_one_partition_countR21block_size_descriptorbjjP14partition_infoPm:
  389|      9|) {
  390|      9|	unsigned int next_index = 0;
  391|      9|	bsd.partitioning_count_selected[partition_count - 1] = 0;
  392|      9|	bsd.partitioning_count_all[partition_count - 1] = 0;
  393|       |
  394|       |	// Skip tables larger than config max partition count if we can omit modes
  395|      9|	if (can_omit_partitionings && (partition_count > partition_count_cutoff))
  ------------------
  |  Branch (395:6): [True: 0, False: 9]
  |  Branch (395:32): [True: 0, False: 0]
  ------------------
  396|      0|	{
  397|      0|		return;
  398|      0|	}
  399|       |
  400|       |	// Iterate through twice
  401|       |	//   - Pass 0: Keep selected partitionings
  402|       |	//   - Pass 1: Keep non-selected partitionings (skip if in omit mode)
  403|      9|	unsigned int max_iter = can_omit_partitionings ? 1 : 2;
  ------------------
  |  Branch (403:26): [True: 0, False: 9]
  ------------------
  404|       |
  405|       |	// Tracker for things we built in the first iteration
  406|      9|	uint8_t build[BLOCK_MAX_PARTITIONINGS] { 0 };
  407|     27|	for (unsigned int x = 0; x < max_iter; x++)
  ------------------
  |  Branch (407:27): [True: 18, False: 9]
  ------------------
  408|     18|	{
  409|  18.4k|		for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONINGS; i++)
  ------------------
  |  Branch (409:28): [True: 18.4k, False: 18]
  ------------------
  410|  18.4k|		{
  411|       |			// Don't include things we built in the first pass
  412|  18.4k|			if ((x == 1) && build[i])
  ------------------
  |  Branch (412:8): [True: 9.21k, False: 9.21k]
  |  Branch (412:20): [True: 5.22k, False: 3.99k]
  ------------------
  413|  5.22k|			{
  414|  5.22k|				continue;
  415|  5.22k|			}
  416|       |
  417|  13.2k|			bool keep_useful = generate_one_partition_info_entry(bsd, partition_count, i, next_index, ptab[next_index]);
  418|  13.2k|			if ((x == 0) && !keep_useful)
  ------------------
  |  Branch (418:8): [True: 9.21k, False: 3.99k]
  |  Branch (418:20): [True: 3.40k, False: 5.81k]
  ------------------
  419|  3.40k|			{
  420|  3.40k|				continue;
  421|  3.40k|			}
  422|       |
  423|  9.80k|			generate_canonical_partitioning(bsd.texel_count, ptab[next_index].partition_of_texel, canonical_patterns + next_index * BIT_PATTERN_WORDS);
  ------------------
  |  |   25|  9.80k|#define BIT_PATTERN_WORDS (((ASTCENC_BLOCK_MAX_TEXELS * 2) + 63) / 64)
  |  |  ------------------
  |  |  |  |   68|  9.80k|	#define ASTCENC_BLOCK_MAX_TEXELS 216 // A 3D 6x6x6 block
  |  |  ------------------
  ------------------
  424|  9.80k|			bool keep_canonical = true;
  425|  4.04M|			for (unsigned int j = 0; j < next_index; j++)
  ------------------
  |  Branch (425:29): [True: 4.03M, False: 7.24k]
  ------------------
  426|  4.03M|			{
  427|  4.03M|				bool match = compare_canonical_partitionings(canonical_patterns + next_index * BIT_PATTERN_WORDS, canonical_patterns +  j * BIT_PATTERN_WORDS);
  ------------------
  |  |   25|  4.03M|#define BIT_PATTERN_WORDS (((ASTCENC_BLOCK_MAX_TEXELS * 2) + 63) / 64)
  |  |  ------------------
  |  |  |  |   68|  4.03M|	#define ASTCENC_BLOCK_MAX_TEXELS 216 // A 3D 6x6x6 block
  |  |  ------------------
  ------------------
              				bool match = compare_canonical_partitionings(canonical_patterns + next_index * BIT_PATTERN_WORDS, canonical_patterns +  j * BIT_PATTERN_WORDS);
  ------------------
  |  |   25|  4.03M|#define BIT_PATTERN_WORDS (((ASTCENC_BLOCK_MAX_TEXELS * 2) + 63) / 64)
  |  |  ------------------
  |  |  |  |   68|  4.03M|	#define ASTCENC_BLOCK_MAX_TEXELS 216 // A 3D 6x6x6 block
  |  |  ------------------
  ------------------
  428|  4.03M|				if (match)
  ------------------
  |  Branch (428:9): [True: 2.55k, False: 4.03M]
  ------------------
  429|  2.55k|				{
  430|  2.55k|					keep_canonical = false;
  431|  2.55k|					break;
  432|  2.55k|				}
  433|  4.03M|			}
  434|       |
  435|  9.80k|			if (keep_useful && keep_canonical)
  ------------------
  |  Branch (435:8): [True: 6.40k, False: 3.40k]
  |  Branch (435:23): [True: 5.22k, False: 1.17k]
  ------------------
  436|  5.22k|			{
  437|  5.22k|				if (x == 0)
  ------------------
  |  Branch (437:9): [True: 5.22k, False: 0]
  ------------------
  438|  5.22k|				{
  439|  5.22k|					bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index);
  440|  5.22k|					bsd.partitioning_count_selected[partition_count - 1]++;
  441|  5.22k|					bsd.partitioning_count_all[partition_count - 1]++;
  442|  5.22k|					build[i] = 1;
  443|  5.22k|					next_index++;
  444|  5.22k|				}
  445|  5.22k|			}
  446|  4.58k|			else
  447|  4.58k|			{
  448|  4.58k|				if (x == 1)
  ------------------
  |  Branch (448:9): [True: 3.99k, False: 589]
  ------------------
  449|  3.99k|				{
  450|  3.99k|					bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index);
  451|  3.99k|					bsd.partitioning_count_all[partition_count - 1]++;
  452|  3.99k|					next_index++;
  453|  3.99k|				}
  454|  4.58k|			}
  455|  9.80k|		}
  456|     18|	}
  457|      9|}
astcenc_partition_tables.cpp:_ZL31generate_canonical_partitioningjPKhPm:
   42|  9.80k|) {
   43|       |	// Clear the pattern
   44|  78.4k|	for (unsigned int i = 0; i < BIT_PATTERN_WORDS; i++)
  ------------------
  |  |   25|  78.4k|#define BIT_PATTERN_WORDS (((ASTCENC_BLOCK_MAX_TEXELS * 2) + 63) / 64)
  |  |  ------------------
  |  |  |  |   68|  78.4k|	#define ASTCENC_BLOCK_MAX_TEXELS 216 // A 3D 6x6x6 block
  |  |  ------------------
  ------------------
  |  Branch (44:27): [True: 68.6k, False: 9.80k]
  ------------------
   45|  68.6k|	{
   46|  68.6k|		bit_pattern[i] = 0;
   47|  68.6k|	}
   48|       |
   49|       |	// Store a mapping to reorder the raw partitions so that the partitions are ordered such
   50|       |	// that the lowest texel index in partition N is smaller than the lowest texel index in
   51|       |	// partition N + 1.
   52|  9.80k|	int mapped_index[BLOCK_MAX_PARTITIONS];
   53|  9.80k|	int map_weight_count = 0;
   54|       |
   55|  49.0k|	for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++)
  ------------------
  |  Branch (55:27): [True: 39.2k, False: 9.80k]
  ------------------
   56|  39.2k|	{
   57|  39.2k|		mapped_index[i] = -1;
   58|  39.2k|	}
   59|       |
   60|  1.19M|	for (unsigned int i = 0; i < texel_count; i++)
  ------------------
  |  Branch (60:27): [True: 1.18M, False: 9.80k]
  ------------------
   61|  1.18M|	{
   62|  1.18M|		int index = partition_of_texel[i];
   63|  1.18M|		if (mapped_index[index] < 0)
  ------------------
  |  Branch (63:7): [True: 24.4k, False: 1.16M]
  ------------------
   64|  24.4k|		{
   65|  24.4k|			mapped_index[index] = map_weight_count++;
   66|  24.4k|		}
   67|       |
   68|  1.18M|		uint64_t xlat_index = mapped_index[index];
   69|  1.18M|		bit_pattern[i >> 5] |= xlat_index << (2 * (i & 0x1F));
   70|  1.18M|	}
   71|  9.80k|}
astcenc_partition_tables.cpp:_ZL31compare_canonical_partitioningsPKmS0_:
   84|  4.03M|) {
   85|  4.03M|	return (part1[0] == part2[0])
  ------------------
  |  Branch (85:9): [True: 79.2k, False: 3.95M]
  ------------------
   86|  4.03M|#if BIT_PATTERN_WORDS > 1
   87|  4.03M|	    && (part1[1] == part2[1])
  ------------------
  |  Branch (87:9): [True: 36.8k, False: 42.3k]
  ------------------
   88|  4.03M|#endif
   89|  4.03M|#if BIT_PATTERN_WORDS > 2
   90|  4.03M|	    && (part1[2] == part2[2])
  ------------------
  |  Branch (90:9): [True: 20.6k, False: 16.1k]
  ------------------
   91|  4.03M|#endif
   92|  4.03M|#if BIT_PATTERN_WORDS > 3
   93|  4.03M|	    && (part1[3] == part2[3])
  ------------------
  |  Branch (93:9): [True: 9.89k, False: 10.7k]
  ------------------
   94|  4.03M|#endif
   95|  4.03M|#if BIT_PATTERN_WORDS > 4
   96|  4.03M|	    && (part1[4] == part2[4])
  ------------------
  |  Branch (96:9): [True: 5.44k, False: 4.44k]
  ------------------
   97|  4.03M|#endif
   98|  4.03M|#if BIT_PATTERN_WORDS > 5
   99|  4.03M|	    && (part1[5] == part2[5])
  ------------------
  |  Branch (99:9): [True: 3.05k, False: 2.39k]
  ------------------
  100|  4.03M|#endif
  101|  4.03M|#if BIT_PATTERN_WORDS > 6
  102|  4.03M|	    && (part1[6] == part2[6])
  ------------------
  |  Branch (102:9): [True: 2.55k, False: 497]
  ------------------
  103|  4.03M|#endif
  104|  4.03M|	    ;
  105|  4.03M|}

_Z23get_2d_percentile_tablejj:
 1168|      2|) {
 1169|      2|	float* unpacked_table = new float[WEIGHTS_MAX_BLOCK_MODES];
 1170|      2|	const packed_percentile_table *apt = get_packed_table(xdim, ydim);
 1171|       |
 1172|       |	// Set the default percentile
 1173|  4.09k|	for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
  ------------------
  |  Branch (1173:27): [True: 4.09k, False: 2]
  ------------------
 1174|  4.09k|	{
 1175|  4.09k|		unpacked_table[i] = 1.0f;
 1176|  4.09k|	}
 1177|       |
 1178|       |	// Populate the unpacked percentile values
 1179|      6|	for (int i = 0; i < 2; i++)
  ------------------
  |  Branch (1179:18): [True: 4, False: 2]
  ------------------
 1180|      4|	{
 1181|      4|		unsigned int itemcount = apt->item_count[i];
 1182|      4|		unsigned int difscale = apt->difscales[i];
 1183|      4|		unsigned int accum = apt->initial_percs[i];
 1184|      4|		const uint16_t *item_ptr = apt->items[i];
 1185|       |
 1186|    924|		for (unsigned int j = 0; j < itemcount; j++)
  ------------------
  |  Branch (1186:28): [True: 920, False: 4]
  ------------------
 1187|    920|		{
 1188|    920|			uint16_t item = item_ptr[j];
 1189|    920|			unsigned int idx = item & 0x7FF;
 1190|    920|			unsigned int weight = (item >> 11) & 0x1F;
 1191|    920|			accum += weight;
 1192|    920|			unpacked_table[idx] = static_cast<float>(accum) / static_cast<float>(difscale);
 1193|    920|		}
 1194|      4|	}
 1195|       |
 1196|      2|	return unpacked_table;
 1197|      2|}
astcenc_percentile_tables.cpp:_ZL16get_packed_tableii:
 1112|      2|) {
 1113|      2|	int idx = (ydim << 8) | xdim;
 1114|      2|	switch (idx)
  ------------------
  |  Branch (1114:10): [True: 0, False: 2]
  ------------------
 1115|      2|	{
 1116|      0|#if ASTCENC_BLOCK_MAX_TEXELS >= (4 * 4)
 1117|      1|		case 0x0404: return &block_pcd_4x4;
  ------------------
  |  Branch (1117:3): [True: 1, False: 1]
  ------------------
 1118|      0|#endif
 1119|      0|#if ASTCENC_BLOCK_MAX_TEXELS >= (5 * 4)
 1120|      0|		case 0x0405: return &block_pcd_5x4;
  ------------------
  |  Branch (1120:3): [True: 0, False: 2]
  ------------------
 1121|      0|#endif
 1122|      0|#if ASTCENC_BLOCK_MAX_TEXELS >= (5 * 5)
 1123|      0|		case 0x0505: return &block_pcd_5x5;
  ------------------
  |  Branch (1123:3): [True: 0, False: 2]
  ------------------
 1124|      0|#endif
 1125|      0|#if ASTCENC_BLOCK_MAX_TEXELS >= (6 * 5)
 1126|      0|		case 0x0506: return &block_pcd_6x5;
  ------------------
  |  Branch (1126:3): [True: 0, False: 2]
  ------------------
 1127|      0|#endif
 1128|      0|#if ASTCENC_BLOCK_MAX_TEXELS >= (6 * 6)
 1129|      0|		case 0x0606: return &block_pcd_6x6;
  ------------------
  |  Branch (1129:3): [True: 0, False: 2]
  ------------------
 1130|      0|#endif
 1131|      0|#if ASTCENC_BLOCK_MAX_TEXELS >= (8 * 5)
 1132|      0|		case 0x0508: return &block_pcd_8x5;
  ------------------
  |  Branch (1132:3): [True: 0, False: 2]
  ------------------
 1133|      0|#endif
 1134|      0|#if ASTCENC_BLOCK_MAX_TEXELS >= (8 * 6)
 1135|      0|		case 0x0608: return &block_pcd_8x6;
  ------------------
  |  Branch (1135:3): [True: 0, False: 2]
  ------------------
 1136|      0|#endif
 1137|      0|#if ASTCENC_BLOCK_MAX_TEXELS >= (8 * 8)
 1138|      0|		case 0x0808: return &block_pcd_8x8;
  ------------------
  |  Branch (1138:3): [True: 0, False: 2]
  ------------------
 1139|      0|#endif
 1140|      0|#if ASTCENC_BLOCK_MAX_TEXELS >= (10 * 5)
 1141|      0|		case 0x050A: return &block_pcd_10x5;
  ------------------
  |  Branch (1141:3): [True: 0, False: 2]
  ------------------
 1142|      0|#endif
 1143|      0|#if ASTCENC_BLOCK_MAX_TEXELS >= (10 * 6)
 1144|      0|		case 0x060A: return &block_pcd_10x6;
  ------------------
  |  Branch (1144:3): [True: 0, False: 2]
  ------------------
 1145|      0|#endif
 1146|      0|#if ASTCENC_BLOCK_MAX_TEXELS >= (10 * 8)
 1147|      0|		case 0x080A: return &block_pcd_10x8;
  ------------------
  |  Branch (1147:3): [True: 0, False: 2]
  ------------------
 1148|      0|#endif
 1149|      0|#if ASTCENC_BLOCK_MAX_TEXELS >= (10 * 10)
 1150|      0|		case 0x0A0A: return &block_pcd_10x10;
  ------------------
  |  Branch (1150:3): [True: 0, False: 2]
  ------------------
 1151|      0|#endif
 1152|      0|#if ASTCENC_BLOCK_MAX_TEXELS >= (12 * 10)
 1153|      0|		case 0x0A0C: return &block_pcd_12x10;
  ------------------
  |  Branch (1153:3): [True: 0, False: 2]
  ------------------
 1154|      0|#endif
 1155|      0|#if ASTCENC_BLOCK_MAX_TEXELS >= (12 * 12)
 1156|      1|		case 0x0C0C: return &block_pcd_12x12;
  ------------------
  |  Branch (1156:3): [True: 1, False: 1]
  ------------------
 1157|      2|#endif
 1158|      2|	}
 1159|       |
 1160|       |	// Should never hit this with a valid 2D block size
 1161|      0|	return nullptr;
 1162|      2|}

_Z20physical_to_symbolicRK21block_size_descriptorPKhR25symbolic_compressed_block:
  295|    308|) {
  296|    308|	uint8_t bswapped[16];
  297|       |
  298|    308|	scb.block_type = SYM_BTYPE_NONCONST;
  299|       |
  300|       |	// Extract header fields
  301|    308|	int block_mode = read_bits(11, 0, pcb);
  302|    308|	if ((block_mode & 0x1FF) == 0x1FC)
  ------------------
  |  Branch (302:6): [True: 233, False: 75]
  ------------------
  303|    233|	{
  304|       |		// Constant color block
  305|       |
  306|       |		// Check what format the data has
  307|    233|		if (block_mode & 0x200)
  ------------------
  |  Branch (307:7): [True: 209, False: 24]
  ------------------
  308|    209|		{
  309|    209|			scb.block_type = SYM_BTYPE_CONST_F16;
  310|    209|		}
  311|     24|		else
  312|     24|		{
  313|     24|			scb.block_type = SYM_BTYPE_CONST_U16;
  314|     24|		}
  315|       |
  316|    233|		scb.partition_count = 0;
  317|  1.16k|		for (int i = 0; i < 4; i++)
  ------------------
  |  Branch (317:19): [True: 932, False: 233]
  ------------------
  318|    932|		{
  319|    932|			scb.constant_color[i] = pcb[2 * i + 8] | (pcb[2 * i + 9] << 8);
  320|    932|		}
  321|       |
  322|       |		// Additionally, check that the void-extent
  323|    233|		if (bsd.zdim == 1)
  ------------------
  |  Branch (323:7): [True: 121, False: 112]
  ------------------
  324|    121|		{
  325|       |			// 2D void-extent
  326|    121|			int rsvbits = read_bits(2, 10, pcb);
  327|    121|			if (rsvbits != 3)
  ------------------
  |  Branch (327:8): [True: 4, False: 117]
  ------------------
  328|      4|			{
  329|      4|				scb.block_type = SYM_BTYPE_ERROR;
  330|      4|				return;
  331|      4|			}
  332|       |
  333|       |			// Low values span 3 bytes so need two read_bits calls
  334|    117|			int vx_low_s = read_bits(8, 12, pcb) | (read_bits(5, 12 + 8, pcb) << 8);
  335|    117|			int vx_high_s = read_bits(13, 25, pcb);
  336|    117|			int vx_low_t = read_bits(8, 38, pcb) | (read_bits(5, 38 + 8, pcb) << 8);
  337|    117|			int vx_high_t = read_bits(13, 51, pcb);
  338|       |
  339|    117|			int all_ones = vx_low_s == 0x1FFF && vx_high_s == 0x1FFF &&
  ------------------
  |  Branch (339:19): [True: 77, False: 40]
  |  Branch (339:41): [True: 53, False: 24]
  ------------------
  340|    117|			               vx_low_t == 0x1FFF && vx_high_t == 0x1FFF;
  ------------------
  |  Branch (340:19): [True: 27, False: 26]
  |  Branch (340:41): [True: 1, False: 26]
  ------------------
  341|       |
  342|    117|			if ((vx_low_s >= vx_high_s || vx_low_t >= vx_high_t) && !all_ones)
  ------------------
  |  Branch (342:9): [True: 90, False: 27]
  |  Branch (342:34): [True: 22, False: 5]
  |  Branch (342:60): [True: 111, False: 1]
  ------------------
  343|    111|			{
  344|    111|				scb.block_type = SYM_BTYPE_ERROR;
  345|    111|				return;
  346|    111|			}
  347|    117|		}
  348|    112|		else
  349|    112|		{
  350|       |			// 3D void-extent
  351|    112|			int vx_low_s = read_bits(9, 10, pcb);
  352|    112|			int vx_high_s = read_bits(9, 19, pcb);
  353|    112|			int vx_low_t = read_bits(9, 28, pcb);
  354|    112|			int vx_high_t = read_bits(9, 37, pcb);
  355|    112|			int vx_low_r = read_bits(9, 46, pcb);
  356|    112|			int vx_high_r = read_bits(9, 55, pcb);
  357|       |
  358|    112|			int all_ones = vx_low_s == 0x1FF && vx_high_s == 0x1FF &&
  ------------------
  |  Branch (358:19): [True: 72, False: 40]
  |  Branch (358:40): [True: 58, False: 14]
  ------------------
  359|    112|			               vx_low_t == 0x1FF && vx_high_t == 0x1FF &&
  ------------------
  |  Branch (359:19): [True: 42, False: 16]
  |  Branch (359:40): [True: 32, False: 10]
  ------------------
  360|    112|			               vx_low_r == 0x1FF && vx_high_r == 0x1FF;
  ------------------
  |  Branch (360:19): [True: 18, False: 14]
  |  Branch (360:40): [True: 3, False: 15]
  ------------------
  361|       |
  362|    112|			if ((vx_low_s >= vx_high_s || vx_low_t >= vx_high_t || vx_low_r >= vx_high_r) && !all_ones)
  ------------------
  |  Branch (362:9): [True: 80, False: 32]
  |  Branch (362:34): [True: 13, False: 19]
  |  Branch (362:59): [True: 13, False: 6]
  |  Branch (362:85): [True: 103, False: 3]
  ------------------
  363|    103|			{
  364|    103|				scb.block_type = SYM_BTYPE_ERROR;
  365|    103|				return;
  366|    103|			}
  367|    112|		}
  368|       |
  369|     15|		return;
  370|    233|	}
  371|       |
  372|     75|	unsigned int packed_index = bsd.block_mode_packed_index[block_mode];
  373|     75|	if (packed_index == BLOCK_BAD_BLOCK_MODE)
  ------------------
  |  Branch (373:6): [True: 25, False: 50]
  ------------------
  374|     25|	{
  375|     25|		scb.block_type = SYM_BTYPE_ERROR;
  376|     25|		return;
  377|     25|	}
  378|       |
  379|     50|	const auto& bm = bsd.get_block_mode(block_mode);
  380|     50|	const auto& di = bsd.get_decimation_info(bm.decimation_mode);
  381|       |
  382|     50|	int weight_count = di.weight_count;
  383|     50|	promise(weight_count > 0);
  ------------------
  |  |   61|     50|	#define promise(cond) assert(cond)
  ------------------
  384|       |
  385|     50|	quant_method weight_quant_method = static_cast<quant_method>(bm.quant_mode);
  386|     50|	int is_dual_plane = bm.is_dual_plane;
  387|       |
  388|     50|	int real_weight_count = is_dual_plane ? 2 * weight_count : weight_count;
  ------------------
  |  Branch (388:26): [True: 18, False: 32]
  ------------------
  389|       |
  390|     50|	int partition_count = read_bits(2, 11, pcb) + 1;
  391|     50|	promise(partition_count > 0);
  ------------------
  |  |   61|     50|	#define promise(cond) assert(cond)
  ------------------
  392|       |
  393|     50|	scb.block_mode = static_cast<uint16_t>(block_mode);
  394|     50|	scb.partition_count = static_cast<uint8_t>(partition_count);
  395|       |
  396|    850|	for (int i = 0; i < 16; i++)
  ------------------
  |  Branch (396:18): [True: 800, False: 50]
  ------------------
  397|    800|	{
  398|    800|		bswapped[i] = static_cast<uint8_t>(bitrev8(pcb[15 - i]));
  399|    800|	}
  400|       |
  401|     50|	int bits_for_weights = get_ise_sequence_bitcount(real_weight_count, weight_quant_method);
  402|       |
  403|     50|	int below_weights_pos = 128 - bits_for_weights;
  404|       |
  405|     50|	uint8_t indices[64];
  406|     50|	const auto& qat = quant_and_xfer_tables[weight_quant_method];
  407|       |
  408|     50|	decode_ise(weight_quant_method, real_weight_count, bswapped, indices, 0);
  409|       |
  410|     50|	if (is_dual_plane)
  ------------------
  |  Branch (410:6): [True: 18, False: 32]
  ------------------
  411|     18|	{
  412|    280|		for (int i = 0; i < weight_count; i++)
  ------------------
  |  Branch (412:19): [True: 262, False: 18]
  ------------------
  413|    262|		{
  414|    262|			scb.weights[i] = qat.unscramble_and_unquant_map[indices[2 * i]];
  415|    262|			scb.weights[i + WEIGHTS_PLANE2_OFFSET] = qat.unscramble_and_unquant_map[indices[2 * i + 1]];
  416|    262|		}
  417|     18|	}
  418|     32|	else
  419|     32|	{
  420|  1.02k|		for (int i = 0; i < weight_count; i++)
  ------------------
  |  Branch (420:19): [True: 991, False: 32]
  ------------------
  421|    991|		{
  422|    991|			scb.weights[i] = qat.unscramble_and_unquant_map[indices[i]];
  423|    991|		}
  424|     32|	}
  425|       |
  426|     50|	if (is_dual_plane && partition_count == 4)
  ------------------
  |  Branch (426:6): [True: 18, False: 32]
  |  Branch (426:23): [True: 11, False: 7]
  ------------------
  427|     11|	{
  428|     11|		scb.block_type = SYM_BTYPE_ERROR;
  429|     11|		return;
  430|     11|	}
  431|       |
  432|     39|	scb.color_formats_matched = 0;
  433|       |
  434|       |	// Determine the format of each endpoint pair
  435|     39|	int color_formats[BLOCK_MAX_PARTITIONS];
  436|     39|	int encoded_type_highpart_size = 0;
  437|     39|	if (partition_count == 1)
  ------------------
  |  Branch (437:6): [True: 11, False: 28]
  ------------------
  438|     11|	{
  439|     11|		color_formats[0] = read_bits(4, 13, pcb);
  440|     11|		scb.partition_index = 0;
  441|     11|	}
  442|     28|	else
  443|     28|	{
  444|     28|		encoded_type_highpart_size = (3 * partition_count) - 4;
  445|     28|		below_weights_pos -= encoded_type_highpart_size;
  446|     28|		int encoded_type = read_bits(6, 13 + PARTITION_INDEX_BITS, pcb) |
  447|     28|		                  (read_bits(encoded_type_highpart_size, below_weights_pos, pcb) << 6);
  448|     28|		int baseclass = encoded_type & 0x3;
  449|     28|		if (baseclass == 0)
  ------------------
  |  Branch (449:7): [True: 7, False: 21]
  ------------------
  450|      7|		{
  451|     25|			for (int i = 0; i < partition_count; i++)
  ------------------
  |  Branch (451:20): [True: 18, False: 7]
  ------------------
  452|     18|			{
  453|     18|				color_formats[i] = (encoded_type >> 2) & 0xF;
  454|     18|			}
  455|       |
  456|      7|			below_weights_pos += encoded_type_highpart_size;
  457|      7|			scb.color_formats_matched = 1;
  458|      7|			encoded_type_highpart_size = 0;
  459|      7|		}
  460|     21|		else
  461|     21|		{
  462|     21|			int bitpos = 2;
  463|     21|			baseclass--;
  464|       |
  465|     91|			for (int i = 0; i < partition_count; i++)
  ------------------
  |  Branch (465:20): [True: 70, False: 21]
  ------------------
  466|     70|			{
  467|     70|				color_formats[i] = (((encoded_type >> bitpos) & 1) + baseclass) << 2;
  468|     70|				bitpos++;
  469|     70|			}
  470|       |
  471|     91|			for (int i = 0; i < partition_count; i++)
  ------------------
  |  Branch (471:20): [True: 70, False: 21]
  ------------------
  472|     70|			{
  473|     70|				color_formats[i] |= (encoded_type >> bitpos) & 3;
  474|     70|				bitpos += 2;
  475|     70|			}
  476|     21|		}
  477|     28|		scb.partition_index = static_cast<uint16_t>(read_bits(10, 13, pcb));
  478|     28|	}
  479|       |
  480|    138|	for (int i = 0; i < partition_count; i++)
  ------------------
  |  Branch (480:18): [True: 99, False: 39]
  ------------------
  481|     99|	{
  482|     99|		scb.color_formats[i] = static_cast<uint8_t>(color_formats[i]);
  483|     99|	}
  484|       |
  485|       |	// Determine number of color endpoint integers
  486|     39|	int color_integer_count = 0;
  487|    138|	for (int i = 0; i < partition_count; i++)
  ------------------
  |  Branch (487:18): [True: 99, False: 39]
  ------------------
  488|     99|	{
  489|     99|		int endpoint_class = color_formats[i] >> 2;
  490|     99|		color_integer_count += (endpoint_class + 1) * 2;
  491|     99|	}
  492|       |
  493|     39|	if (color_integer_count > 18)
  ------------------
  |  Branch (493:6): [True: 4, False: 35]
  ------------------
  494|      4|	{
  495|      4|		scb.block_type = SYM_BTYPE_ERROR;
  496|      4|		return;
  497|      4|	}
  498|       |
  499|       |	// Determine the color endpoint format to use
  500|     35|	static const int color_bits_arr[5] { -1, 115 - 4, 113 - 4 - PARTITION_INDEX_BITS, 113 - 4 - PARTITION_INDEX_BITS, 113 - 4 - PARTITION_INDEX_BITS };
  501|     35|	int color_bits = color_bits_arr[partition_count] - bits_for_weights - encoded_type_highpart_size;
  502|     35|	if (is_dual_plane)
  ------------------
  |  Branch (502:6): [True: 7, False: 28]
  ------------------
  503|      7|	{
  504|      7|		color_bits -= 2;
  505|      7|	}
  506|       |
  507|     35|	if (color_bits < 0)
  ------------------
  |  Branch (507:6): [True: 4, False: 31]
  ------------------
  508|      4|	{
  509|      4|		color_bits = 0;
  510|      4|	}
  511|       |
  512|     35|	int color_quant_level = quant_mode_table[color_integer_count >> 1][color_bits];
  513|     35|	if (color_quant_level < QUANT_6)
  ------------------
  |  Branch (513:6): [True: 10, False: 25]
  ------------------
  514|     10|	{
  515|     10|		scb.block_type = SYM_BTYPE_ERROR;
  516|     10|		return;
  517|     10|	}
  518|       |
  519|       |	// Unpack the integer color values and assign to endpoints
  520|     25|	scb.quant_mode = static_cast<quant_method>(color_quant_level);
  521|       |
  522|     25|	uint8_t values_to_decode[32];
  523|     25|	decode_ise(static_cast<quant_method>(color_quant_level), color_integer_count, pcb,
  524|     25|	           values_to_decode, (partition_count == 1 ? 17 : 19 + PARTITION_INDEX_BITS));
  ------------------
  |  Branch (524:32): [True: 11, False: 14]
  ------------------
  525|       |
  526|     25|	int valuecount_to_decode = 0;
  527|     25|	const uint8_t* unpack_table = color_scrambled_pquant_to_uquant_tables[scb.quant_mode - QUANT_6];
  528|     77|	for (int i = 0; i < partition_count; i++)
  ------------------
  |  Branch (528:18): [True: 52, False: 25]
  ------------------
  529|     52|	{
  530|     52|		int vals = 2 * (color_formats[i] >> 2) + 2;
  531|    228|		for (int j = 0; j < vals; j++)
  ------------------
  |  Branch (531:19): [True: 176, False: 52]
  ------------------
  532|    176|		{
  533|    176|			scb.color_values[i][j] = unpack_table[values_to_decode[j + valuecount_to_decode]];
  534|    176|		}
  535|     52|		valuecount_to_decode += vals;
  536|     52|	}
  537|       |
  538|       |	// Fetch component for second-plane in the case of dual plane of weights.
  539|     25|	scb.plane2_component = -1;
  540|     25|	if (is_dual_plane)
  ------------------
  |  Branch (540:6): [True: 5, False: 20]
  ------------------
  541|      5|	{
  542|      5|		scb.plane2_component = static_cast<int8_t>(read_bits(2, below_weights_pos - 2, pcb));
  543|      5|	}
  544|     25|}
astcenc_symbolic_physical.cpp:_ZL7bitrev8i:
   34|    800|{
   35|    800|	p = ((p & 0x0F) << 4) | ((p >> 4) & 0x0F);
   36|    800|	p = ((p & 0x33) << 2) | ((p >> 2) & 0x33);
   37|    800|	p = ((p & 0x55) << 1) | ((p >> 1) & 0x55);
   38|    800|	return p;
   39|    800|}
astcenc_symbolic_physical.cpp:_ZL9read_bitsiiPKh:
   58|  1.95k|) {
   59|  1.95k|	int mask = (1 << bitcount) - 1;
   60|  1.95k|	ptr += bitoffset >> 3;
   61|  1.95k|	bitoffset &= 7;
   62|  1.95k|	int value = ptr[0] | (ptr[1] << 8);
   63|  1.95k|	value >>= bitoffset;
   64|  1.95k|	value &= mask;
   65|  1.95k|	return value;
   66|  1.95k|}

_Z29round_up_to_simd_multiple_vlam:
  243|  40.5k|{
  244|  40.5k|	size_t multiples = (count + ASTCENC_SIMD_WIDTH - 1) / ASTCENC_SIMD_WIDTH;
  ------------------
  |  |  203|  40.5k|	#define ASTCENC_SIMD_WIDTH 4
  ------------------
              	size_t multiples = (count + ASTCENC_SIMD_WIDTH - 1) / ASTCENC_SIMD_WIDTH;
  ------------------
  |  |  203|  40.5k|	#define ASTCENC_SIMD_WIDTH 4
  ------------------
  245|  40.5k|	return multiples * ASTCENC_SIMD_WIDTH;
  ------------------
  |  |  203|  40.5k|	#define ASTCENC_SIMD_WIDTH 4
  ------------------
  246|  40.5k|}

