_Z15bsd_initializerv:
   63|      1|{
   64|      4|	for (int i = 0; i < testSz.size(); i++)
  ------------------
  |  Branch (64:18): [True: 3, False: 1]
  ------------------
   65|      3|	{
   66|      3|		init_block_size_descriptor(
   67|      3|		    testSz[i].x,
   68|      3|		    testSz[i].y,
   69|      3|		    testSz[i].z,
   70|      3|		    false,
   71|      3|		    4,
   72|      3|		    1.0f,
   73|      3|		    testBSD[i]);
   74|      3|	}
   75|       |
   76|      1|	return true;
   77|      1|}
LLVMFuzzerTestOneInput:
   82|    298|) {
   83|       |	// Preinitialize the block size descriptors we need
   84|    298|	static bool init = bsd_initializer();
   85|       |
   86|       |	// Must have 4 (select block size) and 16 (payload) bytes
   87|    298|	if (size < 4 + 16)
  ------------------
  |  Branch (87:6): [True: 9, False: 289]
  ------------------
   88|      9|	{
   89|      9|		return 0;
   90|      9|	}
   91|       |
   92|    289|	FuzzedDataProvider stream(data, size);
   93|       |
   94|       |	// Select a block size to test
   95|    289|	int i = stream.ConsumeIntegralInRange<int>(0, testSz.size() - 1);
   96|       |
   97|       |	// Populate the physical block
   98|    289|	uint8_t pcb[16];
   99|    289|	std::vector<uint8_t> buffer = stream.ConsumeBytes<uint8_t>(16);
  100|    289|	std::memcpy(pcb, buffer.data(), 16);
  101|       |
  102|       |	// Call the function under test
  103|    289|	symbolic_compressed_block scb;
  104|    289|	physical_to_symbolic(testBSD[i], pcb, scb);
  105|       |
  106|    289|	return 0;
  107|    298|}

_Z26init_block_size_descriptorjjjbjfR21block_size_descriptor:
 1207|      3|) {
 1208|      3|	if (z_texels > 1)
  ------------------
  |  Branch (1208:6): [True: 1, False: 2]
  ------------------
 1209|      1|	{
 1210|      1|		construct_block_size_descriptor_3d(x_texels, y_texels, z_texels, bsd);
 1211|      1|	}
 1212|      2|	else
 1213|      2|	{
 1214|      2|		construct_block_size_descriptor_2d(x_texels, y_texels, can_omit_modes, mode_cutoff, bsd);
 1215|      2|	}
 1216|       |
 1217|      3|	init_partition_tables(bsd, can_omit_modes, partition_count_cutoff);
 1218|      3|}
astcenc_block_sizes.cpp:_ZL34construct_block_size_descriptor_3djjjR21block_size_descriptor:
 1030|      1|) {
 1031|       |	// Store a remap table for storing packed decimation modes.
 1032|       |	// Indexing uses [Z * 64 + Y *  8 + X] and max size for each axis is 6.
 1033|      1|	static constexpr unsigned int MAX_DMI = 6 * 64 + 6 * 8 + 6;
 1034|      1|	int decimation_mode_index[MAX_DMI];
 1035|      1|	unsigned int decimation_mode_count = 0;
 1036|       |
 1037|      1|	dt_init_working_buffers* wb = new dt_init_working_buffers;
 1038|       |
 1039|      1|	bsd.xdim = static_cast<uint8_t>(x_texels);
 1040|      1|	bsd.ydim = static_cast<uint8_t>(y_texels);
 1041|      1|	bsd.zdim = static_cast<uint8_t>(z_texels);
 1042|      1|	bsd.texel_count = static_cast<uint8_t>(x_texels * y_texels * z_texels);
 1043|       |
 1044|    439|	for (unsigned int i = 0; i < MAX_DMI; i++)
  ------------------
  |  Branch (1044:27): [True: 438, False: 1]
  ------------------
 1045|    438|	{
 1046|    438|		decimation_mode_index[i] = -1;
 1047|    438|	}
 1048|       |
 1049|       |	// gather all the infill-modes that can be used with the current block size
 1050|      6|	for (unsigned int x_weights = 2; x_weights <= x_texels; x_weights++)
  ------------------
  |  Branch (1050:35): [True: 5, False: 1]
  ------------------
 1051|      5|	{
 1052|     30|		for (unsigned int y_weights = 2; y_weights <= y_texels; y_weights++)
  ------------------
  |  Branch (1052:36): [True: 25, False: 5]
  ------------------
 1053|     25|		{
 1054|    150|			for (unsigned int z_weights = 2; z_weights <= z_texels; z_weights++)
  ------------------
  |  Branch (1054:37): [True: 125, False: 25]
  ------------------
 1055|    125|			{
 1056|    125|				unsigned int weight_count = x_weights * y_weights * z_weights;
 1057|    125|				if (weight_count > BLOCK_MAX_WEIGHTS)
  ------------------
  |  Branch (1057:9): [True: 47, False: 78]
  ------------------
 1058|     47|				{
 1059|     47|					continue;
 1060|     47|				}
 1061|       |
 1062|     78|				decimation_info& di = bsd.decimation_tables[decimation_mode_count];
 1063|     78|				decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights] = decimation_mode_count;
 1064|     78|				init_decimation_info_3d(x_texels, y_texels, z_texels, x_weights, y_weights, z_weights, di, *wb);
 1065|       |
 1066|     78|				int maxprec_1plane = -1;
 1067|     78|				int maxprec_2planes = -1;
 1068|  1.01k|				for (unsigned int i = 0; i < 12; i++)
  ------------------
  |  Branch (1068:30): [True: 936, False: 78]
  ------------------
 1069|    936|				{
 1070|    936|					unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(i));
 1071|    936|					if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS)
  ------------------
  |  Branch (1071:10): [True: 916, False: 20]
  |  Branch (1071:50): [True: 417, False: 499]
  ------------------
 1072|    417|					{
 1073|    417|						maxprec_1plane = i;
 1074|    417|					}
 1075|       |
 1076|    936|					unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast<quant_method>(i));
 1077|    936|					if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS)
  ------------------
  |  Branch (1077:10): [True: 935, False: 1]
  |  Branch (1077:51): [True: 154, False: 781]
  ------------------
 1078|    154|					{
 1079|    154|						maxprec_2planes = i;
 1080|    154|					}
 1081|    936|				}
 1082|       |
 1083|     78|				if ((2 * weight_count) > BLOCK_MAX_WEIGHTS)
  ------------------
  |  Branch (1083:9): [True: 46, False: 32]
  ------------------
 1084|     46|				{
 1085|     46|					maxprec_2planes = -1;
 1086|     46|				}
 1087|       |
 1088|     78|				bsd.decimation_modes[decimation_mode_count].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
 1089|     78|				bsd.decimation_modes[decimation_mode_count].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
 1090|     78|				bsd.decimation_modes[decimation_mode_count].refprec_1plane = maxprec_1plane == -1 ? 0 : 0xFFFF;
  ------------------
  |  Branch (1090:66): [True: 0, False: 78]
  ------------------
 1091|     78|				bsd.decimation_modes[decimation_mode_count].refprec_2planes = maxprec_2planes == -1 ? 0 : 0xFFFF;
  ------------------
  |  Branch (1091:67): [True: 46, False: 32]
  ------------------
 1092|     78|				decimation_mode_count++;
 1093|     78|			}
 1094|     25|		}
 1095|      5|	}
 1096|       |
 1097|       |	// Ensure the end of the array contains valid data (should never get read)
 1098|     10|	for (unsigned int i = decimation_mode_count; i < WEIGHTS_MAX_DECIMATION_MODES; i++)
  ------------------
  |  Branch (1098:47): [True: 9, False: 1]
  ------------------
 1099|      9|	{
 1100|      9|		bsd.decimation_modes[i].maxprec_1plane = -1;
 1101|      9|		bsd.decimation_modes[i].maxprec_2planes = -1;
 1102|      9|		bsd.decimation_modes[i].refprec_1plane = 0;
 1103|      9|		bsd.decimation_modes[i].refprec_2planes = 0;
 1104|      9|	}
 1105|       |
 1106|      1|	bsd.decimation_mode_count_always = 0; // Skipped for 3D modes
 1107|      1|	bsd.decimation_mode_count_selected = decimation_mode_count;
 1108|      1|	bsd.decimation_mode_count_all = decimation_mode_count;
 1109|       |
 1110|       |	// Construct the list of block formats referencing the decimation tables
 1111|       |
 1112|       |	// Clear the list to a known-bad value
 1113|  2.04k|	for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
  ------------------
  |  Branch (1113:27): [True: 2.04k, False: 1]
  ------------------
 1114|  2.04k|	{
 1115|  2.04k|		bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE;
 1116|  2.04k|	}
 1117|       |
 1118|      1|	unsigned int packed_idx = 0;
 1119|      1|	unsigned int bm_counts[2] { 0 };
 1120|       |
 1121|       |	// Iterate two times to build a usefully ordered list:
 1122|       |	//   - Pass 0 - keep valid single plane block modes
 1123|       |	//   - Pass 1 - keep valid dual plane block modes
 1124|      3|	for (unsigned int j = 0; j < 2; j++)
  ------------------
  |  Branch (1124:27): [True: 2, False: 1]
  ------------------
 1125|      2|	{
 1126|  4.09k|		for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
  ------------------
  |  Branch (1126:28): [True: 4.09k, False: 2]
  ------------------
 1127|  4.09k|		{
 1128|       |			// Skip modes we've already included in a previous pass
 1129|  4.09k|			if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE)
  ------------------
  |  Branch (1129:8): [True: 435, False: 3.66k]
  ------------------
 1130|    435|			{
 1131|    435|				continue;
 1132|    435|			}
 1133|       |
 1134|  3.66k|			unsigned int x_weights;
 1135|  3.66k|			unsigned int y_weights;
 1136|  3.66k|			unsigned int z_weights;
 1137|  3.66k|			bool is_dual_plane;
 1138|  3.66k|			unsigned int quant_mode;
 1139|  3.66k|			unsigned int weight_bits;
 1140|       |
 1141|  3.66k|			bool valid = decode_block_mode_3d(i, x_weights, y_weights, z_weights, is_dual_plane, quant_mode, weight_bits);
 1142|       |			// Skip invalid encodings
 1143|  3.66k|			if (!valid || x_weights > x_texels || y_weights > y_texels || z_weights > z_texels)
  ------------------
  |  Branch (1143:8): [True: 2.97k, False: 689]
  |  Branch (1143:18): [True: 0, False: 689]
  |  Branch (1143:42): [True: 0, False: 689]
  |  Branch (1143:66): [True: 0, False: 689]
  ------------------
 1144|  2.97k|			{
 1145|  2.97k|				continue;
 1146|  2.97k|			}
 1147|       |
 1148|       |			// Skip encodings in the wrong iteration
 1149|    689|			if ((j == 0 && is_dual_plane) || (j == 1 && !is_dual_plane))
  ------------------
  |  Branch (1149:9): [True: 562, False: 127]
  |  Branch (1149:19): [True: 127, False: 435]
  |  Branch (1149:38): [True: 127, False: 435]
  |  Branch (1149:48): [True: 0, False: 127]
  ------------------
 1150|    127|			{
 1151|    127|				continue;
 1152|    127|			}
 1153|       |
 1154|       |			// Always skip encodings we can't physically encode based on bit availability
 1155|    562|			if (is_dual_plane)
  ------------------
  |  Branch (1155:8): [True: 127, False: 435]
  ------------------
 1156|    127|			{
 1157|       |				 // This is the only check we need as only support 1 partition
 1158|    127|				 if ((109 - weight_bits) <= 0)
  ------------------
  |  Branch (1158:10): [True: 0, False: 127]
  ------------------
 1159|      0|				 {
 1160|      0|					continue;
 1161|      0|				 }
 1162|    127|			}
 1163|    435|			else
 1164|    435|			{
 1165|       |				// This is conservative - fewer bits may be available for > 1 partition
 1166|    435|				 if ((111 - weight_bits) <= 0)
  ------------------
  |  Branch (1166:10): [True: 0, False: 435]
  ------------------
 1167|      0|				 {
 1168|      0|					continue;
 1169|      0|				 }
 1170|    435|			}
 1171|       |
 1172|    562|			int decimation_mode = decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights];
 1173|    562|			bsd.block_modes[packed_idx].decimation_mode = static_cast<uint8_t>(decimation_mode);
 1174|    562|			bsd.block_modes[packed_idx].quant_mode = static_cast<uint8_t>(quant_mode);
 1175|    562|			bsd.block_modes[packed_idx].weight_bits = static_cast<uint8_t>(weight_bits);
 1176|    562|			bsd.block_modes[packed_idx].is_dual_plane = static_cast<uint8_t>(is_dual_plane);
 1177|    562|			bsd.block_modes[packed_idx].mode_index = static_cast<uint16_t>(i);
 1178|       |
 1179|    562|			bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_idx);
 1180|    562|			bm_counts[j]++;
 1181|    562|			packed_idx++;
 1182|    562|		}
 1183|      2|	}
 1184|       |
 1185|      1|	bsd.block_mode_count_1plane_always = 0;  // Skipped for 3D modes
 1186|      1|	bsd.block_mode_count_1plane_selected = bm_counts[0];
 1187|      1|	bsd.block_mode_count_1plane_2plane_selected = bm_counts[0] + bm_counts[1];
 1188|      1|	bsd.block_mode_count_all = bm_counts[0] + bm_counts[1];
 1189|       |
 1190|      1|#if !defined(ASTCENC_DECOMPRESS_ONLY)
 1191|       |	// Determine the texels to use for kmeans clustering.
 1192|      1|	assign_kmeans_texels(bsd);
 1193|      1|#endif
 1194|       |
 1195|      1|	delete wb;
 1196|      1|}
astcenc_block_sizes.cpp:_ZL23init_decimation_info_3djjjjjjR15decimation_infoR23dt_init_working_buffers:
  459|     78|) {
  460|     78|	unsigned int texels_per_block = x_texels * y_texels * z_texels;
  461|     78|	unsigned int weights_per_block = x_weights * y_weights * z_weights;
  462|       |
  463|     78|#if !defined(ASTCENC_DECOMPRESS_ONLY)
  464|     78|	uint8_t max_texel_count_of_weight = 0;
  465|     78|#endif
  466|       |
  467|     78|	promise(weights_per_block > 0);
  ------------------
  |  |   62|     78|	#define promise(cond) assert(cond)
  ------------------
  |  Branch (467:2): [True: 78, False: 0]
  ------------------
  468|     78|	promise(texels_per_block > 0);
  ------------------
  |  |   62|     78|	#define promise(cond) assert(cond)
  ------------------
  |  Branch (468:2): [True: 78, False: 0]
  ------------------
  469|       |
  470|  3.03k|	for (unsigned int i = 0; i < weights_per_block; i++)
  ------------------
  |  Branch (470:27): [True: 2.95k, False: 78]
  ------------------
  471|  2.95k|	{
  472|  2.95k|		wb.texel_count_of_weight[i] = 0;
  473|  2.95k|	}
  474|       |
  475|  16.9k|	for (unsigned int i = 0; i < texels_per_block; i++)
  ------------------
  |  Branch (475:27): [True: 16.8k, False: 78]
  ------------------
  476|  16.8k|	{
  477|  16.8k|		wb.weight_count_of_texel[i] = 0;
  478|  16.8k|	}
  479|       |
  480|    546|	for (unsigned int z = 0; z < z_texels; z++)
  ------------------
  |  Branch (480:27): [True: 468, False: 78]
  ------------------
  481|    468|	{
  482|  3.27k|		for (unsigned int y = 0; y < y_texels; y++)
  ------------------
  |  Branch (482:28): [True: 2.80k, False: 468]
  ------------------
  483|  2.80k|		{
  484|  19.6k|			for (unsigned int x = 0; x < x_texels; x++)
  ------------------
  |  Branch (484:29): [True: 16.8k, False: 2.80k]
  ------------------
  485|  16.8k|			{
  486|  16.8k|				int texel = (z * y_texels + y) * x_texels + x;
  487|       |
  488|  16.8k|				int x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6;
  489|  16.8k|				int y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6;
  490|  16.8k|				int z_weight = (((1024 + z_texels / 2) / (z_texels - 1)) * z * (z_weights - 1) + 32) >> 6;
  491|       |
  492|  16.8k|				int x_weight_frac = x_weight & 0xF;
  493|  16.8k|				int y_weight_frac = y_weight & 0xF;
  494|  16.8k|				int z_weight_frac = z_weight & 0xF;
  495|  16.8k|				int x_weight_int = x_weight >> 4;
  496|  16.8k|				int y_weight_int = y_weight >> 4;
  497|  16.8k|				int z_weight_int = z_weight >> 4;
  498|  16.8k|				int qweight[4];
  499|  16.8k|				int weight[4];
  500|  16.8k|				qweight[0] = (z_weight_int * y_weights + y_weight_int) * x_weights + x_weight_int;
  501|  16.8k|				qweight[3] = ((z_weight_int + 1) * y_weights + (y_weight_int + 1)) * x_weights + (x_weight_int + 1);
  502|       |
  503|       |				// simplex interpolation
  504|  16.8k|				int fs = x_weight_frac;
  505|  16.8k|				int ft = y_weight_frac;
  506|  16.8k|				int fp = z_weight_frac;
  507|       |
  508|  16.8k|				int cas = ((fs > ft) << 2) + ((ft > fp) << 1) + ((fs > fp));
  509|  16.8k|				int N = x_weights;
  510|  16.8k|				int NM = x_weights * y_weights;
  511|       |
  512|  16.8k|				int s1, s2, w0, w1, w2, w3;
  513|  16.8k|				switch (cas)
  514|  16.8k|				{
  515|  1.15k|				case 7:
  ------------------
  |  Branch (515:5): [True: 1.15k, False: 15.6k]
  ------------------
  516|  1.15k|					s1 = 1;
  517|  1.15k|					s2 = N;
  518|  1.15k|					w0 = 16 - fs;
  519|  1.15k|					w1 = fs - ft;
  520|  1.15k|					w2 = ft - fp;
  521|  1.15k|					w3 = fp;
  522|  1.15k|					break;
  523|  2.10k|				case 3:
  ------------------
  |  Branch (523:5): [True: 2.10k, False: 14.7k]
  ------------------
  524|  2.10k|					s1 = N;
  525|  2.10k|					s2 = 1;
  526|  2.10k|					w0 = 16 - ft;
  527|  2.10k|					w1 = ft - fs;
  528|  2.10k|					w2 = fs - fp;
  529|  2.10k|					w3 = fp;
  530|  2.10k|					break;
  531|  3.10k|				case 5:
  ------------------
  |  Branch (531:5): [True: 3.10k, False: 13.7k]
  ------------------
  532|  3.10k|					s1 = 1;
  533|  3.10k|					s2 = NM;
  534|  3.10k|					w0 = 16 - fs;
  535|  3.10k|					w1 = fs - fp;
  536|  3.10k|					w2 = fp - ft;
  537|  3.10k|					w3 = ft;
  538|  3.10k|					break;
  539|  2.10k|				case 4:
  ------------------
  |  Branch (539:5): [True: 2.10k, False: 14.7k]
  ------------------
  540|  2.10k|					s1 = NM;
  541|  2.10k|					s2 = 1;
  542|  2.10k|					w0 = 16 - fp;
  543|  2.10k|					w1 = fp - fs;
  544|  2.10k|					w2 = fs - ft;
  545|  2.10k|					w3 = ft;
  546|  2.10k|					break;
  547|  3.10k|				case 2:
  ------------------
  |  Branch (547:5): [True: 3.10k, False: 13.7k]
  ------------------
  548|  3.10k|					s1 = N;
  549|  3.10k|					s2 = NM;
  550|  3.10k|					w0 = 16 - ft;
  551|  3.10k|					w1 = ft - fp;
  552|  3.10k|					w2 = fp - fs;
  553|  3.10k|					w3 = fs;
  554|  3.10k|					break;
  555|  5.28k|				case 0:
  ------------------
  |  Branch (555:5): [True: 5.28k, False: 11.5k]
  ------------------
  556|  5.28k|					s1 = NM;
  557|  5.28k|					s2 = N;
  558|  5.28k|					w0 = 16 - fp;
  559|  5.28k|					w1 = fp - ft;
  560|  5.28k|					w2 = ft - fs;
  561|  5.28k|					w3 = fs;
  562|  5.28k|					break;
  563|      0|				default:
  ------------------
  |  Branch (563:5): [True: 0, False: 16.8k]
  ------------------
  564|      0|					s1 = NM;
  565|      0|					s2 = N;
  566|      0|					w0 = 16 - fp;
  567|      0|					w1 = fp - ft;
  568|      0|					w2 = ft - fs;
  569|      0|					w3 = fs;
  570|      0|					break;
  571|  16.8k|				}
  572|       |
  573|  16.8k|				qweight[1] = qweight[0] + s1;
  574|  16.8k|				qweight[2] = qweight[1] + s2;
  575|  16.8k|				weight[0] = w0;
  576|  16.8k|				weight[1] = w1;
  577|  16.8k|				weight[2] = w2;
  578|  16.8k|				weight[3] = w3;
  579|       |
  580|  84.2k|				for (unsigned int i = 0; i < 4; i++)
  ------------------
  |  Branch (580:30): [True: 67.3k, False: 16.8k]
  ------------------
  581|  67.3k|				{
  582|  67.3k|					if (weight[i] != 0)
  ------------------
  |  Branch (582:10): [True: 42.8k, False: 24.5k]
  ------------------
  583|  42.8k|					{
  584|  42.8k|						wb.grid_weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(qweight[i]);
  585|  42.8k|						wb.weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(weight[i]);
  586|  42.8k|						wb.weight_count_of_texel[texel]++;
  587|  42.8k|						wb.texels_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(texel);
  588|  42.8k|						wb.texel_weights_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(weight[i]);
  589|  42.8k|						wb.texel_count_of_weight[qweight[i]]++;
  590|  42.8k|#if !defined(ASTCENC_DECOMPRESS_ONLY)
  591|  42.8k|						max_texel_count_of_weight = astc::max(max_texel_count_of_weight, wb.texel_count_of_weight[qweight[i]]);
  592|  42.8k|#endif
  593|  42.8k|					}
  594|  67.3k|				}
  595|  16.8k|			}
  596|  2.80k|		}
  597|    468|	}
  598|       |
  599|     78|	uint8_t max_texel_weight_count = 0;
  600|  16.9k|	for (unsigned int i = 0; i < texels_per_block; i++)
  ------------------
  |  Branch (600:27): [True: 16.8k, False: 78]
  ------------------
  601|  16.8k|	{
  602|  16.8k|		di.texel_weight_count[i] = wb.weight_count_of_texel[i];
  603|  16.8k|		max_texel_weight_count = astc::max(max_texel_weight_count, di.texel_weight_count[i]);
  604|       |
  605|       |		// Init all 4 entries so we can rely on zeros for vectorization
  606|  84.2k|		for (unsigned int j = 0; j < 4; j++)
  ------------------
  |  Branch (606:28): [True: 67.3k, False: 16.8k]
  ------------------
  607|  67.3k|		{
  608|  67.3k|			di.texel_weight_contribs_int_tr[j][i] = 0;
  609|  67.3k|#if !defined(ASTCENC_DECOMPRESS_ONLY)
  610|  67.3k|			di.texel_weight_contribs_float_tr[j][i] = 0.0f;
  611|  67.3k|#endif
  612|  67.3k|			di.texel_weights_tr[j][i] = 0;
  613|  67.3k|		}
  614|       |
  615|  59.6k|		for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++)
  ------------------
  |  Branch (615:28): [True: 42.8k, False: 16.8k]
  ------------------
  616|  42.8k|		{
  617|  42.8k|			di.texel_weight_contribs_int_tr[j][i] = wb.weights_of_texel[i][j];
  618|  42.8k|#if !defined(ASTCENC_DECOMPRESS_ONLY)
  619|  42.8k|			di.texel_weight_contribs_float_tr[j][i] = static_cast<float>(wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM);
  620|  42.8k|#endif
  621|  42.8k|			di.texel_weights_tr[j][i] = wb.grid_weights_of_texel[i][j];
  622|  42.8k|		}
  623|  16.8k|	}
  624|       |
  625|     78|	di.max_texel_weight_count = max_texel_weight_count;
  626|       |
  627|     78|#if !defined(ASTCENC_DECOMPRESS_ONLY)
  628|  3.03k|	for (unsigned int i = 0; i < weights_per_block; i++)
  ------------------
  |  Branch (628:27): [True: 2.95k, False: 78]
  ------------------
  629|  2.95k|	{
  630|  2.95k|		unsigned int texel_count_wt = wb.texel_count_of_weight[i];
  631|  2.95k|		di.weight_texel_count[i] = static_cast<uint8_t>(texel_count_wt);
  632|       |
  633|  45.7k|		for (unsigned int j = 0; j < texel_count_wt; j++)
  ------------------
  |  Branch (633:28): [True: 42.8k, False: 2.95k]
  ------------------
  634|  42.8k|		{
  635|  42.8k|			unsigned int texel = wb.texels_of_weight[i][j];
  636|       |
  637|       |			// Create transposed versions of these for better vectorization
  638|  42.8k|			di.weight_texels_tr[j][i] = static_cast<uint8_t>(texel);
  639|  42.8k|			di.weights_texel_contribs_tr[j][i] = static_cast<float>(wb.texel_weights_of_weight[i][j]);
  640|       |
  641|       |			// Store the per-texel contribution of this weight for each texel it contributes to
  642|  42.8k|			di.texel_contrib_for_weight[j][i] = 0.0f;
  643|  80.2k|			for (unsigned int k = 0; k < 4; k++)
  ------------------
  |  Branch (643:29): [True: 80.2k, False: 0]
  ------------------
  644|  80.2k|			{
  645|  80.2k|				uint8_t dttw = di.texel_weights_tr[k][texel];
  646|  80.2k|				float dttwf = di.texel_weight_contribs_float_tr[k][texel];
  647|  80.2k|				if (dttw == i && dttwf != 0.0f)
  ------------------
  |  Branch (647:9): [True: 42.8k, False: 37.4k]
  |  Branch (647:22): [True: 42.8k, False: 0]
  ------------------
  648|  42.8k|				{
  649|  42.8k|					di.texel_contrib_for_weight[j][i] = di.texel_weight_contribs_float_tr[k][texel];
  650|  42.8k|					break;
  651|  42.8k|				}
  652|  80.2k|			}
  653|  42.8k|		}
  654|       |
  655|       |		// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
  656|       |		// Match last texel in active lane in SIMD group, for better gathers
  657|  2.95k|		uint8_t last_texel = di.weight_texels_tr[texel_count_wt - 1][i];
  658|  25.4k|		for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++)
  ------------------
  |  Branch (658:41): [True: 22.5k, False: 2.95k]
  ------------------
  659|  22.5k|		{
  660|  22.5k|			di.weight_texels_tr[j][i] = last_texel;
  661|  22.5k|			di.weights_texel_contribs_tr[j][i] = 0.0f;
  662|  22.5k|		}
  663|  2.95k|	}
  664|     78|#endif
  665|       |
  666|       |	// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
  667|     78|	size_t texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block);
  668|     78|	for (size_t i = texels_per_block; i < texels_per_block_simd; i++)
  ------------------
  |  Branch (668:36): [True: 0, False: 78]
  ------------------
  669|      0|	{
  670|      0|		di.texel_weight_count[i] = 0;
  671|       |
  672|      0|		for (size_t j = 0; j < 4; j++)
  ------------------
  |  Branch (672:22): [True: 0, False: 0]
  ------------------
  673|      0|		{
  674|      0|#if !defined(ASTCENC_DECOMPRESS_ONLY)
  675|      0|			di.texel_weight_contribs_float_tr[j][i] = 0;
  676|      0|#endif
  677|      0|			di.texel_weights_tr[j][i] = 0;
  678|      0|			di.texel_weight_contribs_int_tr[j][i] = 0;
  679|      0|		}
  680|      0|	}
  681|       |
  682|     78|#if !defined(ASTCENC_DECOMPRESS_ONLY)
  683|       |	// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
  684|       |	// Match last texel in active lane in SIMD group, for better gathers
  685|     78|	int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1];
  686|     78|	uint8_t last_texel = di.weight_texels_tr[last_texel_count_wt - 1][weights_per_block - 1];
  687|       |
  688|     78|	size_t weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block);
  689|    118|	for (size_t i = weights_per_block; i < weights_per_block_simd; i++)
  ------------------
  |  Branch (689:37): [True: 40, False: 78]
  ------------------
  690|     40|	{
  691|     40|		di.weight_texel_count[i] = 0;
  692|       |
  693|  1.15k|		for (size_t j = 0; j < max_texel_count_of_weight; j++)
  ------------------
  |  Branch (693:22): [True: 1.11k, False: 40]
  ------------------
  694|  1.11k|		{
  695|  1.11k|			di.weight_texels_tr[j][i] = last_texel;
  696|  1.11k|			di.weights_texel_contribs_tr[j][i] = 0.0f;
  697|  1.11k|		}
  698|     40|	}
  699|     78|#endif
  700|       |
  701|     78|	di.texel_count = static_cast<uint8_t>(texels_per_block);
  702|     78|	di.weight_count = static_cast<uint8_t>(weights_per_block);
  703|     78|	di.weight_x = static_cast<uint8_t>(x_weights);
  704|     78|	di.weight_y = static_cast<uint8_t>(y_weights);
  705|     78|	di.weight_z = static_cast<uint8_t>(z_weights);
  706|     78|}
astcenc_block_sizes.cpp:_ZL20decode_block_mode_3djRjS_S_RbS_S_:
  160|  3.66k|) {
  161|  3.66k|	unsigned int base_quant_mode = (block_mode >> 4) & 1;
  162|  3.66k|	unsigned int H = (block_mode >> 9) & 1;
  163|  3.66k|	unsigned int D = (block_mode >> 10) & 1;
  164|  3.66k|	unsigned int A = (block_mode >> 5) & 0x3;
  165|       |
  166|  3.66k|	x_weights = 0;
  167|  3.66k|	y_weights = 0;
  168|  3.66k|	z_weights = 0;
  169|       |
  170|  3.66k|	if ((block_mode & 3) != 0)
  ------------------
  |  Branch (170:6): [True: 2.74k, False: 913]
  ------------------
  171|  2.74k|	{
  172|  2.74k|		base_quant_mode |= (block_mode & 3) << 1;
  173|  2.74k|		unsigned int B = (block_mode >> 7) & 3;
  174|  2.74k|		unsigned int C = (block_mode >> 2) & 0x3;
  175|  2.74k|		x_weights = A + 2;
  176|  2.74k|		y_weights = B + 2;
  177|  2.74k|		z_weights = C + 2;
  178|  2.74k|	}
  179|    913|	else
  180|    913|	{
  181|    913|		base_quant_mode |= ((block_mode >> 2) & 3) << 1;
  182|    913|		if (((block_mode >> 2) & 3) == 0)
  ------------------
  |  Branch (182:7): [True: 256, False: 657]
  ------------------
  183|    256|		{
  184|    256|			return false;
  185|    256|		}
  186|       |
  187|    657|		int B = (block_mode >> 9) & 3;
  188|    657|		if (((block_mode >> 7) & 3) != 3)
  ------------------
  |  Branch (188:7): [True: 492, False: 165]
  ------------------
  189|    492|		{
  190|    492|			D = 0;
  191|    492|			H = 0;
  192|    492|		}
  193|    657|		switch ((block_mode >> 7) & 3)
  ------------------
  |  Branch (193:11): [True: 657, False: 0]
  ------------------
  194|    657|		{
  195|    164|		case 0:
  ------------------
  |  Branch (195:3): [True: 164, False: 493]
  ------------------
  196|    164|			x_weights = 6;
  197|    164|			y_weights = B + 2;
  198|    164|			z_weights = A + 2;
  199|    164|			break;
  200|    164|		case 1:
  ------------------
  |  Branch (200:3): [True: 164, False: 493]
  ------------------
  201|    164|			x_weights = A + 2;
  202|    164|			y_weights = 6;
  203|    164|			z_weights = B + 2;
  204|    164|			break;
  205|    164|		case 2:
  ------------------
  |  Branch (205:3): [True: 164, False: 493]
  ------------------
  206|    164|			x_weights = A + 2;
  207|    164|			y_weights = B + 2;
  208|    164|			z_weights = 6;
  209|    164|			break;
  210|    165|		case 3:
  ------------------
  |  Branch (210:3): [True: 165, False: 492]
  ------------------
  211|    165|			x_weights = 2;
  212|    165|			y_weights = 2;
  213|    165|			z_weights = 2;
  214|    165|			switch ((block_mode >> 5) & 3)
  ------------------
  |  Branch (214:12): [True: 165, False: 0]
  ------------------
  215|    165|			{
  216|     39|			case 0:
  ------------------
  |  Branch (216:4): [True: 39, False: 126]
  ------------------
  217|     39|				x_weights = 6;
  218|     39|				break;
  219|     39|			case 1:
  ------------------
  |  Branch (219:4): [True: 39, False: 126]
  ------------------
  220|     39|				y_weights = 6;
  221|     39|				break;
  222|     39|			case 2:
  ------------------
  |  Branch (222:4): [True: 39, False: 126]
  ------------------
  223|     39|				z_weights = 6;
  224|     39|				break;
  225|     48|			case 3:
  ------------------
  |  Branch (225:4): [True: 48, False: 117]
  ------------------
  226|     48|				return false;
  227|    165|			}
  228|    117|			break;
  229|    657|		}
  230|    657|	}
  231|       |
  232|  3.35k|	unsigned int weight_count = x_weights * y_weights * z_weights * (D + 1);
  233|  3.35k|	quant_mode = (base_quant_mode - 2) + 6 * H;
  234|  3.35k|	is_dual_plane = D != 0;
  235|       |
  236|  3.35k|	weight_bits = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(quant_mode));
  237|  3.35k|	return (weight_count <= BLOCK_MAX_WEIGHTS &&
  ------------------
  |  Branch (237:10): [True: 1.98k, False: 1.36k]
  ------------------
  238|  1.98k|	        weight_bits >= BLOCK_MIN_WEIGHT_BITS &&
  ------------------
  |  Branch (238:10): [True: 1.94k, False: 42]
  ------------------
  239|  1.94k|	        weight_bits <= BLOCK_MAX_WEIGHT_BITS);
  ------------------
  |  Branch (239:10): [True: 689, False: 1.25k]
  ------------------
  240|  3.66k|}
astcenc_block_sizes.cpp:_ZL20assign_kmeans_texelsR21block_size_descriptor:
  719|      3|) {
  720|       |	// Use all texels for kmeans on a small block
  721|      3|	if (bsd.texel_count <= BLOCK_MAX_KMEANS_TEXELS)
  ------------------
  |  Branch (721:6): [True: 1, False: 2]
  ------------------
  722|      1|	{
  723|     17|		for (uint8_t i = 0; i < bsd.texel_count; i++)
  ------------------
  |  Branch (723:23): [True: 16, False: 1]
  ------------------
  724|     16|		{
  725|     16|			bsd.kmeans_texels[i] = i;
  726|     16|		}
  727|       |
  728|      1|		return;
  729|      1|	}
  730|       |
  731|       |	// Select a random subset of BLOCK_MAX_KMEANS_TEXELS for kmeans on a large block
  732|      2|	uint64_t rng_state[2];
  733|      2|	astc::rand_init(rng_state);
  734|       |
  735|       |	// Initialize array used for tracking used indices
  736|      2|	bool seen[BLOCK_MAX_TEXELS];
  737|    362|	for (uint8_t i = 0; i < bsd.texel_count; i++)
  ------------------
  |  Branch (737:22): [True: 360, False: 2]
  ------------------
  738|    360|	{
  739|    360|		seen[i] = false;
  740|    360|	}
  741|       |
  742|       |	// Assign 64 random indices, retrying if we see repeats
  743|      2|	unsigned int arr_elements_set = 0;
  744|    155|	while (arr_elements_set < BLOCK_MAX_KMEANS_TEXELS)
  ------------------
  |  Branch (744:9): [True: 153, False: 2]
  ------------------
  745|    153|	{
  746|    153|		uint8_t texel = static_cast<uint8_t>(astc::rand(rng_state));
  747|    153|		texel = texel % bsd.texel_count;
  748|    153|		if (!seen[texel])
  ------------------
  |  Branch (748:7): [True: 128, False: 25]
  ------------------
  749|    128|		{
  750|    128|			bsd.kmeans_texels[arr_elements_set++] = texel;
  751|    128|			seen[texel] = true;
  752|    128|		}
  753|    153|	}
  754|      2|}
astcenc_block_sizes.cpp:_ZL34construct_block_size_descriptor_2djjbfR21block_size_descriptor:
  828|      2|) {
  829|       |	// Store a remap table for storing packed decimation modes.
  830|       |	// Indexing uses [Y * 16 + X] and max size for each axis is 12.
  831|      2|	static const unsigned int MAX_DMI = 12 * 16 + 12;
  832|      2|	int decimation_mode_index[MAX_DMI];
  833|       |
  834|      2|	dt_init_working_buffers* wb = new dt_init_working_buffers;
  835|       |
  836|      2|	bsd.xdim = static_cast<uint8_t>(x_texels);
  837|      2|	bsd.ydim = static_cast<uint8_t>(y_texels);
  838|      2|	bsd.zdim = 1;
  839|      2|	bsd.texel_count = static_cast<uint8_t>(x_texels * y_texels);
  840|       |
  841|    410|	for (unsigned int i = 0; i < MAX_DMI; i++)
  ------------------
  |  Branch (841:27): [True: 408, False: 2]
  ------------------
  842|    408|	{
  843|    408|		decimation_mode_index[i] = -1;
  844|    408|	}
  845|       |
  846|       |	// Gather all the decimation grids that can be used with the current block
  847|      2|#if !defined(ASTCENC_DECOMPRESS_ONLY)
  848|      2|	const float *percentiles = get_2d_percentile_table(x_texels, y_texels);
  849|      2|	float always_cutoff = 0.0f;
  850|       |#else
  851|       |	// Unused in decompress-only builds
  852|       |	(void)can_omit_modes;
  853|       |	(void)mode_cutoff;
  854|       |#endif
  855|       |
  856|       |	// Construct the list of block formats referencing the decimation tables
  857|      2|	unsigned int packed_bm_idx = 0;
  858|      2|	unsigned int packed_dm_idx = 0;
  859|       |
  860|       |	// Trackers
  861|      2|	unsigned int bm_counts[4] { 0 };
  862|      2|	unsigned int dm_counts[4] { 0 };
  863|       |
  864|       |	// Clear the list to a known-bad value
  865|  4.09k|	for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
  ------------------
  |  Branch (865:27): [True: 4.09k, False: 2]
  ------------------
  866|  4.09k|	{
  867|  4.09k|		bsd.block_mode_packed_index[i] = BLOCK_BAD_BLOCK_MODE;
  868|  4.09k|	}
  869|       |
  870|       |	// Iterate four times to build a usefully ordered list:
  871|       |	//   - Pass 0 - keep selected single plane "always" block modes
  872|       |	//   - Pass 1 - keep selected single plane "non-always" block modes
  873|       |	//   - Pass 2 - keep select dual plane block modes
  874|       |	//   - Pass 3 - keep everything else that's legal
  875|      2|	unsigned int limit = can_omit_modes ? 3 : 4;
  ------------------
  |  Branch (875:23): [True: 0, False: 2]
  ------------------
  876|     10|	for (unsigned int j = 0; j < limit; j ++)
  ------------------
  |  Branch (876:27): [True: 8, False: 2]
  ------------------
  877|      8|	{
  878|  16.3k|		for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
  ------------------
  |  Branch (878:28): [True: 16.3k, False: 8]
  ------------------
  879|  16.3k|		{
  880|       |			// Skip modes we've already included in a previous pass
  881|  16.3k|			if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE)
  ------------------
  |  Branch (881:8): [True: 1.51k, False: 14.8k]
  ------------------
  882|  1.51k|			{
  883|  1.51k|				continue;
  884|  1.51k|			}
  885|       |
  886|       |			// Decode parameters
  887|  14.8k|			unsigned int x_weights;
  888|  14.8k|			unsigned int y_weights;
  889|  14.8k|			bool is_dual_plane;
  890|  14.8k|			unsigned int quant_mode;
  891|  14.8k|			unsigned int weight_bits;
  892|  14.8k|			bool valid = decode_block_mode_2d(i, x_weights, y_weights, is_dual_plane, quant_mode, weight_bits);
  893|       |
  894|       |			// Always skip invalid encodings for the current block size
  895|  14.8k|			if (!valid || (x_weights > x_texels) || (y_weights > y_texels))
  ------------------
  |  Branch (895:8): [True: 10.1k, False: 4.68k]
  |  Branch (895:18): [True: 1.45k, False: 3.23k]
  |  Branch (895:44): [True: 1.06k, False: 2.16k]
  ------------------
  896|  12.7k|			{
  897|  12.7k|				continue;
  898|  12.7k|			}
  899|       |
  900|       |			// Selectively skip dual plane encodings
  901|  2.16k|			if (((j <= 1) && is_dual_plane) || (j == 2 && !is_dual_plane))
  ------------------
  |  Branch (901:9): [True: 1.83k, False: 330]
  |  Branch (901:21): [True: 660, False: 1.17k]
  |  Branch (901:40): [True: 330, False: 1.17k]
  |  Branch (901:50): [True: 0, False: 330]
  ------------------
  902|    660|			{
  903|    660|				continue;
  904|    660|			}
  905|       |
  906|       |			// Always skip encodings we can't physically encode based on
  907|       |			// generic encoding bit availability
  908|  1.50k|			if (is_dual_plane)
  ------------------
  |  Branch (908:8): [True: 330, False: 1.17k]
  ------------------
  909|    330|			{
  910|       |				 // This is the only check we need as only support 1 partition
  911|    330|				 if ((109 - weight_bits) <= 0)
  ------------------
  |  Branch (911:10): [True: 0, False: 330]
  ------------------
  912|      0|				 {
  913|      0|					continue;
  914|      0|				 }
  915|    330|			}
  916|  1.17k|			else
  917|  1.17k|			{
  918|       |				// This is conservative - fewer bits may be available for > 1 partition
  919|  1.17k|				 if ((111 - weight_bits) <= 0)
  ------------------
  |  Branch (919:10): [True: 0, False: 1.17k]
  ------------------
  920|      0|				 {
  921|      0|					continue;
  922|      0|				 }
  923|  1.17k|			}
  924|       |
  925|       |			// Selectively skip encodings based on percentile
  926|  1.50k|			bool percentile_hit = false;
  927|  1.50k|	#if !defined(ASTCENC_DECOMPRESS_ONLY)
  928|  1.50k|			if (j == 0)
  ------------------
  |  Branch (928:8): [True: 590, False: 918]
  ------------------
  929|    590|			{
  930|    590|				percentile_hit = percentiles[i] <= always_cutoff;
  931|    590|			}
  932|    918|			else
  933|    918|			{
  934|    918|				percentile_hit = percentiles[i] <= mode_cutoff;
  935|    918|			}
  936|  1.50k|	#endif
  937|       |
  938|  1.50k|			if (j != 3 && !percentile_hit)
  ------------------
  |  Branch (938:8): [True: 1.50k, False: 0]
  |  Branch (938:18): [True: 588, False: 920]
  ------------------
  939|    588|			{
  940|    588|				continue;
  941|    588|			}
  942|       |
  943|       |			// Allocate and initialize the decimation table entry if we've not used it yet
  944|    920|			int decimation_mode = decimation_mode_index[y_weights * 16 + x_weights];
  945|    920|			if (decimation_mode < 0)
  ------------------
  |  Branch (945:8): [True: 96, False: 824]
  ------------------
  946|     96|			{
  947|     96|				construct_dt_entry_2d(x_texels, y_texels, x_weights, y_weights, bsd, *wb, packed_dm_idx);
  948|     96|				decimation_mode_index[y_weights * 16 + x_weights] = packed_dm_idx;
  949|     96|				decimation_mode = packed_dm_idx;
  950|       |
  951|     96|				dm_counts[j]++;
  952|     96|				packed_dm_idx++;
  953|     96|			}
  954|       |
  955|    920|			auto& bm = bsd.block_modes[packed_bm_idx];
  956|       |
  957|    920|			bm.decimation_mode = static_cast<uint8_t>(decimation_mode);
  958|    920|			bm.quant_mode = static_cast<uint8_t>(quant_mode);
  959|    920|			bm.is_dual_plane = static_cast<uint8_t>(is_dual_plane);
  960|    920|			bm.weight_bits = static_cast<uint8_t>(weight_bits);
  961|    920|			bm.mode_index = static_cast<uint16_t>(i);
  962|       |
  963|    920|			auto& dm = bsd.decimation_modes[decimation_mode];
  964|       |
  965|    920|			if (is_dual_plane)
  ------------------
  |  Branch (965:8): [True: 330, False: 590]
  ------------------
  966|    330|			{
  967|    330|				dm.set_ref_2plane(bm.get_weight_quant_mode());
  968|    330|			}
  969|    590|			else
  970|    590|			{
  971|    590|				dm.set_ref_1plane(bm.get_weight_quant_mode());
  972|    590|			}
  973|       |
  974|    920|			bsd.block_mode_packed_index[i] = static_cast<uint16_t>(packed_bm_idx);
  975|       |
  976|    920|			packed_bm_idx++;
  977|    920|			bm_counts[j]++;
  978|    920|		}
  979|      8|	}
  980|       |
  981|      2|	bsd.block_mode_count_1plane_always = bm_counts[0];
  982|      2|	bsd.block_mode_count_1plane_selected = bm_counts[0] + bm_counts[1];
  983|      2|	bsd.block_mode_count_1plane_2plane_selected = bm_counts[0] + bm_counts[1] + bm_counts[2];
  984|      2|	bsd.block_mode_count_all = bm_counts[0] + bm_counts[1] + bm_counts[2] + bm_counts[3];
  985|       |
  986|      2|	bsd.decimation_mode_count_always = dm_counts[0];
  987|      2|	bsd.decimation_mode_count_selected = dm_counts[0] + dm_counts[1] + dm_counts[2];
  988|      2|	bsd.decimation_mode_count_all = dm_counts[0] + dm_counts[1] + dm_counts[2] + dm_counts[3];
  989|       |
  990|      2|#if !defined(ASTCENC_DECOMPRESS_ONLY)
  991|      2|	assert(bsd.block_mode_count_1plane_always > 0);
  ------------------
  |  Branch (991:2): [True: 2, False: 0]
  ------------------
  992|      2|	assert(bsd.decimation_mode_count_always > 0);
  ------------------
  |  Branch (992:2): [True: 2, False: 0]
  ------------------
  993|       |
  994|      2|	delete[] percentiles;
  995|      2|#endif
  996|       |
  997|       |	// Ensure the end of the array contains valid data (should never get read)
  998|     80|	for (unsigned int i = bsd.decimation_mode_count_all; i < WEIGHTS_MAX_DECIMATION_MODES; i++)
  ------------------
  |  Branch (998:55): [True: 78, False: 2]
  ------------------
  999|     78|	{
 1000|     78|		bsd.decimation_modes[i].maxprec_1plane = -1;
 1001|     78|		bsd.decimation_modes[i].maxprec_2planes = -1;
 1002|     78|		bsd.decimation_modes[i].refprec_1plane = 0;
 1003|     78|		bsd.decimation_modes[i].refprec_2planes = 0;
 1004|     78|	}
 1005|       |
 1006|      2|#if !defined(ASTCENC_DECOMPRESS_ONLY)
 1007|       |	// Determine the texels to use for kmeans clustering.
 1008|      2|	assign_kmeans_texels(bsd);
 1009|      2|#endif
 1010|       |
 1011|      2|	delete wb;
 1012|      2|}
astcenc_block_sizes.cpp:_ZL20decode_block_mode_2djRjS_RbS_S_:
   43|  14.8k|) {
   44|  14.8k|	unsigned int base_quant_mode = (block_mode >> 4) & 1;
   45|  14.8k|	unsigned int H = (block_mode >> 9) & 1;
   46|  14.8k|	unsigned int D = (block_mode >> 10) & 1;
   47|  14.8k|	unsigned int A = (block_mode >> 5) & 0x3;
   48|       |
   49|  14.8k|	x_weights = 0;
   50|  14.8k|	y_weights = 0;
   51|       |
   52|  14.8k|	if ((block_mode & 3) != 0)
  ------------------
  |  Branch (52:6): [True: 10.9k, False: 3.94k]
  ------------------
   53|  10.9k|	{
   54|  10.9k|		base_quant_mode |= (block_mode & 3) << 1;
   55|  10.9k|		unsigned int B = (block_mode >> 7) & 3;
   56|  10.9k|		switch ((block_mode >> 2) & 3)
  ------------------
  |  Branch (56:11): [True: 10.9k, False: 0]
  ------------------
   57|  10.9k|		{
   58|  2.62k|		case 0:
  ------------------
  |  Branch (58:3): [True: 2.62k, False: 8.30k]
  ------------------
   59|  2.62k|			x_weights = B + 4;
   60|  2.62k|			y_weights = A + 2;
   61|  2.62k|			break;
   62|  2.85k|		case 1:
  ------------------
  |  Branch (62:3): [True: 2.85k, False: 8.07k]
  ------------------
   63|  2.85k|			x_weights = B + 8;
   64|  2.85k|			y_weights = A + 2;
   65|  2.85k|			break;
   66|  2.85k|		case 2:
  ------------------
  |  Branch (66:3): [True: 2.85k, False: 8.07k]
  ------------------
   67|  2.85k|			x_weights = A + 2;
   68|  2.85k|			y_weights = B + 8;
   69|  2.85k|			break;
   70|  2.60k|		case 3:
  ------------------
  |  Branch (70:3): [True: 2.60k, False: 8.32k]
  ------------------
   71|  2.60k|			B &= 1;
   72|  2.60k|			if (block_mode & 0x100)
  ------------------
  |  Branch (72:8): [True: 1.23k, False: 1.37k]
  ------------------
   73|  1.23k|			{
   74|  1.23k|				x_weights = B + 2;
   75|  1.23k|				y_weights = A + 2;
   76|  1.23k|			}
   77|  1.37k|			else
   78|  1.37k|			{
   79|  1.37k|				x_weights = A + 2;
   80|  1.37k|				y_weights = B + 6;
   81|  1.37k|			}
   82|  2.60k|			break;
   83|  10.9k|		}
   84|  10.9k|	}
   85|  3.94k|	else
   86|  3.94k|	{
   87|  3.94k|		base_quant_mode |= ((block_mode >> 2) & 3) << 1;
   88|  3.94k|		if (((block_mode >> 2) & 3) == 0)
  ------------------
  |  Branch (88:7): [True: 1.02k, False: 2.92k]
  ------------------
   89|  1.02k|		{
   90|  1.02k|			return false;
   91|  1.02k|		}
   92|       |
   93|  2.92k|		unsigned int B = (block_mode >> 9) & 3;
   94|  2.92k|		switch ((block_mode >> 7) & 3)
  ------------------
  |  Branch (94:11): [True: 2.92k, False: 0]
  ------------------
   95|  2.92k|		{
   96|    727|		case 0:
  ------------------
  |  Branch (96:3): [True: 727, False: 2.19k]
  ------------------
   97|    727|			x_weights = 12;
   98|    727|			y_weights = A + 2;
   99|    727|			break;
  100|    727|		case 1:
  ------------------
  |  Branch (100:3): [True: 727, False: 2.19k]
  ------------------
  101|    727|			x_weights = A + 2;
  102|    727|			y_weights = 12;
  103|    727|			break;
  104|    707|		case 2:
  ------------------
  |  Branch (104:3): [True: 707, False: 2.21k]
  ------------------
  105|    707|			x_weights = A + 6;
  106|    707|			y_weights = B + 6;
  107|    707|			D = 0;
  108|    707|			H = 0;
  109|    707|			break;
  110|    760|		case 3:
  ------------------
  |  Branch (110:3): [True: 760, False: 2.16k]
  ------------------
  111|    760|			switch ((block_mode >> 5) & 3)
  ------------------
  |  Branch (111:12): [True: 760, False: 0]
  ------------------
  112|    760|			{
  113|    188|			case 0:
  ------------------
  |  Branch (113:4): [True: 188, False: 572]
  ------------------
  114|    188|				x_weights = 6;
  115|    188|				y_weights = 10;
  116|    188|				break;
  117|    188|			case 1:
  ------------------
  |  Branch (117:4): [True: 188, False: 572]
  ------------------
  118|    188|				x_weights = 10;
  119|    188|				y_weights = 6;
  120|    188|				break;
  121|    192|			case 2:
  ------------------
  |  Branch (121:4): [True: 192, False: 568]
  ------------------
  122|    384|			case 3:
  ------------------
  |  Branch (122:4): [True: 192, False: 568]
  ------------------
  123|    384|				return false;
  124|    760|			}
  125|    376|			break;
  126|  2.92k|		}
  127|  2.92k|	}
  128|       |
  129|  13.4k|	unsigned int weight_count = x_weights * y_weights * (D + 1);
  130|  13.4k|	quant_mode = (base_quant_mode - 2) + 6 * H;
  131|  13.4k|	is_dual_plane = D != 0;
  132|       |
  133|  13.4k|	weight_bits = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(quant_mode));
  134|  13.4k|	return (weight_count <= BLOCK_MAX_WEIGHTS &&
  ------------------
  |  Branch (134:10): [True: 10.8k, False: 2.64k]
  ------------------
  135|  10.8k|	        weight_bits >= BLOCK_MIN_WEIGHT_BITS &&
  ------------------
  |  Branch (135:10): [True: 10.0k, False: 728]
  ------------------
  136|  10.0k|	        weight_bits <= BLOCK_MAX_WEIGHT_BITS);
  ------------------
  |  Branch (136:10): [True: 4.68k, False: 5.40k]
  ------------------
  137|  14.8k|}
astcenc_block_sizes.cpp:_ZL21construct_dt_entry_2djjjjR21block_size_descriptorR23dt_init_working_buffersj:
  776|     96|) {
  777|     96|	unsigned int weight_count = x_weights * y_weights;
  778|     96|	assert(weight_count <= BLOCK_MAX_WEIGHTS);
  ------------------
  |  Branch (778:2): [True: 96, False: 0]
  ------------------
  779|       |
  780|     96|	bool try_2planes = (2 * weight_count) <= BLOCK_MAX_WEIGHTS;
  781|       |
  782|     96|	decimation_info& di = bsd.decimation_tables[index];
  783|     96|	init_decimation_info_2d(x_texels, y_texels, x_weights, y_weights, di, wb);
  784|       |
  785|     96|	int maxprec_1plane = -1;
  786|     96|	int maxprec_2planes = -1;
  787|  1.24k|	for (int i = 0; i < 12; i++)
  ------------------
  |  Branch (787:18): [True: 1.15k, False: 96]
  ------------------
  788|  1.15k|	{
  789|  1.15k|		unsigned int bits_1plane = get_ise_sequence_bitcount(weight_count, static_cast<quant_method>(i));
  790|  1.15k|		if (bits_1plane >= BLOCK_MIN_WEIGHT_BITS && bits_1plane <= BLOCK_MAX_WEIGHT_BITS)
  ------------------
  |  Branch (790:7): [True: 1.02k, False: 124]
  |  Branch (790:47): [True: 590, False: 438]
  ------------------
  791|    590|		{
  792|    590|			maxprec_1plane = i;
  793|    590|		}
  794|       |
  795|  1.15k|		if (try_2planes)
  ------------------
  |  Branch (795:7): [True: 684, False: 468]
  ------------------
  796|    684|		{
  797|    684|			unsigned int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, static_cast<quant_method>(i));
  798|    684|			if (bits_2planes >= BLOCK_MIN_WEIGHT_BITS && bits_2planes <= BLOCK_MAX_WEIGHT_BITS)
  ------------------
  |  Branch (798:8): [True: 658, False: 26]
  |  Branch (798:49): [True: 330, False: 328]
  ------------------
  799|    330|			{
  800|    330|				maxprec_2planes = i;
  801|    330|			}
  802|    684|		}
  803|  1.15k|	}
  804|       |
  805|       |	// At least one of the two should be valid ...
  806|     96|	assert(maxprec_1plane >= 0 || maxprec_2planes >= 0);
  ------------------
  |  Branch (806:2): [True: 94, False: 2]
  |  Branch (806:2): [True: 2, False: 0]
  |  Branch (806:2): [True: 96, False: 0]
  ------------------
  807|     96|	bsd.decimation_modes[index].maxprec_1plane = static_cast<int8_t>(maxprec_1plane);
  808|     96|	bsd.decimation_modes[index].maxprec_2planes = static_cast<int8_t>(maxprec_2planes);
  809|     96|	bsd.decimation_modes[index].refprec_1plane = 0;
  810|     96|	bsd.decimation_modes[index].refprec_2planes = 0;
  811|     96|}
astcenc_block_sizes.cpp:_ZL23init_decimation_info_2djjjjR15decimation_infoR23dt_init_working_buffers:
  259|     96|) {
  260|     96|	unsigned int texels_per_block = x_texels * y_texels;
  261|     96|	unsigned int weights_per_block = x_weights * y_weights;
  262|       |
  263|     96|#if !defined(ASTCENC_DECOMPRESS_ONLY)
  264|     96|	uint8_t max_texel_count_of_weight = 0;
  265|     96|#endif
  266|       |
  267|     96|	promise(weights_per_block > 0);
  ------------------
  |  |   62|     96|	#define promise(cond) assert(cond)
  ------------------
  |  Branch (267:2): [True: 96, False: 0]
  ------------------
  268|     96|	promise(texels_per_block > 0);
  ------------------
  |  |   62|     96|	#define promise(cond) assert(cond)
  ------------------
  |  Branch (268:2): [True: 96, False: 0]
  ------------------
  269|     96|	promise(x_texels > 0);
  ------------------
  |  |   62|     96|	#define promise(cond) assert(cond)
  ------------------
  |  Branch (269:2): [True: 96, False: 0]
  ------------------
  270|     96|	promise(y_texels > 0);
  ------------------
  |  |   62|     96|	#define promise(cond) assert(cond)
  ------------------
  |  Branch (270:2): [True: 96, False: 0]
  ------------------
  271|       |
  272|  2.93k|	for (unsigned int i = 0; i < weights_per_block; i++)
  ------------------
  |  Branch (272:27): [True: 2.83k, False: 96]
  ------------------
  273|  2.83k|	{
  274|  2.83k|		wb.texel_count_of_weight[i] = 0;
  275|  2.83k|	}
  276|       |
  277|  12.7k|	for (unsigned int i = 0; i < texels_per_block; i++)
  ------------------
  |  Branch (277:27): [True: 12.6k, False: 96]
  ------------------
  278|  12.6k|	{
  279|  12.6k|		wb.weight_count_of_texel[i] = 0;
  280|  12.6k|	}
  281|       |
  282|  1.17k|	for (unsigned int y = 0; y < y_texels; y++)
  ------------------
  |  Branch (282:27): [True: 1.08k, False: 96]
  ------------------
  283|  1.08k|	{
  284|  13.7k|		for (unsigned int x = 0; x < x_texels; x++)
  ------------------
  |  Branch (284:28): [True: 12.6k, False: 1.08k]
  ------------------
  285|  12.6k|		{
  286|  12.6k|			unsigned int texel = y * x_texels + x;
  287|       |
  288|  12.6k|			unsigned int x_weight = (((1024 + x_texels / 2) / (x_texels - 1)) * x * (x_weights - 1) + 32) >> 6;
  289|  12.6k|			unsigned int y_weight = (((1024 + y_texels / 2) / (y_texels - 1)) * y * (y_weights - 1) + 32) >> 6;
  290|       |
  291|  12.6k|			unsigned int x_weight_frac = x_weight & 0xF;
  292|  12.6k|			unsigned int y_weight_frac = y_weight & 0xF;
  293|  12.6k|			unsigned int x_weight_int = x_weight >> 4;
  294|  12.6k|			unsigned int y_weight_int = y_weight >> 4;
  295|       |
  296|  12.6k|			unsigned int qweight[4];
  297|  12.6k|			qweight[0] = x_weight_int + y_weight_int * x_weights;
  298|  12.6k|			qweight[1] = qweight[0] + 1;
  299|  12.6k|			qweight[2] = qweight[0] + x_weights;
  300|  12.6k|			qweight[3] = qweight[2] + 1;
  301|       |
  302|       |			// Truncated-precision bilinear interpolation
  303|  12.6k|			unsigned int prod = x_weight_frac * y_weight_frac;
  304|       |
  305|  12.6k|			unsigned int weight[4];
  306|  12.6k|			weight[3] = (prod + 8) >> 4;
  307|  12.6k|			weight[1] = x_weight_frac - weight[3];
  308|  12.6k|			weight[2] = y_weight_frac - weight[3];
  309|  12.6k|			weight[0] = 16 - x_weight_frac - y_weight_frac + weight[3];
  310|       |
  311|  63.3k|			for (unsigned int i = 0; i < 4; i++)
  ------------------
  |  Branch (311:29): [True: 50.6k, False: 12.6k]
  ------------------
  312|  50.6k|			{
  313|  50.6k|				if (weight[i] != 0)
  ------------------
  |  Branch (313:9): [True: 37.8k, False: 12.7k]
  ------------------
  314|  37.8k|				{
  315|  37.8k|					wb.grid_weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(qweight[i]);
  316|  37.8k|					wb.weights_of_texel[texel][wb.weight_count_of_texel[texel]] = static_cast<uint8_t>(weight[i]);
  317|  37.8k|					wb.weight_count_of_texel[texel]++;
  318|  37.8k|					wb.texels_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(texel);
  319|  37.8k|					wb.texel_weights_of_weight[qweight[i]][wb.texel_count_of_weight[qweight[i]]] = static_cast<uint8_t>(weight[i]);
  320|  37.8k|					wb.texel_count_of_weight[qweight[i]]++;
  321|  37.8k|#if !defined(ASTCENC_DECOMPRESS_ONLY)
  322|  37.8k|					max_texel_count_of_weight = astc::max(max_texel_count_of_weight, wb.texel_count_of_weight[qweight[i]]);
  323|  37.8k|#endif
  324|  37.8k|				}
  325|  50.6k|			}
  326|  12.6k|		}
  327|  1.08k|	}
  328|       |
  329|     96|	uint8_t max_texel_weight_count = 0;
  330|  12.7k|	for (unsigned int i = 0; i < texels_per_block; i++)
  ------------------
  |  Branch (330:27): [True: 12.6k, False: 96]
  ------------------
  331|  12.6k|	{
  332|  12.6k|		di.texel_weight_count[i] = wb.weight_count_of_texel[i];
  333|  12.6k|		max_texel_weight_count = astc::max(max_texel_weight_count, di.texel_weight_count[i]);
  334|       |
  335|  50.5k|		for (unsigned int j = 0; j < wb.weight_count_of_texel[i]; j++)
  ------------------
  |  Branch (335:28): [True: 37.8k, False: 12.6k]
  ------------------
  336|  37.8k|		{
  337|  37.8k|			di.texel_weight_contribs_int_tr[j][i] = wb.weights_of_texel[i][j];
  338|  37.8k|#if !defined(ASTCENC_DECOMPRESS_ONLY)
  339|  37.8k|			di.texel_weight_contribs_float_tr[j][i] = static_cast<float>(wb.weights_of_texel[i][j]) * (1.0f / WEIGHTS_TEXEL_SUM);
  340|  37.8k|#endif
  341|  37.8k|			di.texel_weights_tr[j][i] = wb.grid_weights_of_texel[i][j];
  342|  37.8k|		}
  343|       |
  344|       |		// Init all 4 entries so we can rely on zeros for vectorization
  345|  25.4k|		for (unsigned int j = wb.weight_count_of_texel[i]; j < 4; j++)
  ------------------
  |  Branch (345:54): [True: 12.7k, False: 12.6k]
  ------------------
  346|  12.7k|		{
  347|  12.7k|			di.texel_weight_contribs_int_tr[j][i] = 0;
  348|  12.7k|#if !defined(ASTCENC_DECOMPRESS_ONLY)
  349|  12.7k|			di.texel_weight_contribs_float_tr[j][i] = 0.0f;
  350|  12.7k|#endif
  351|  12.7k|			di.texel_weights_tr[j][i] = 0;
  352|  12.7k|		}
  353|  12.6k|	}
  354|       |
  355|     96|	di.max_texel_weight_count = max_texel_weight_count;
  356|       |
  357|     96|#if !defined(ASTCENC_DECOMPRESS_ONLY)
  358|  2.93k|	for (unsigned int i = 0; i < weights_per_block; i++)
  ------------------
  |  Branch (358:27): [True: 2.83k, False: 96]
  ------------------
  359|  2.83k|	{
  360|  2.83k|		unsigned int texel_count_wt = wb.texel_count_of_weight[i];
  361|  2.83k|		di.weight_texel_count[i] = static_cast<uint8_t>(texel_count_wt);
  362|       |
  363|  40.7k|		for (unsigned int j = 0; j < texel_count_wt; j++)
  ------------------
  |  Branch (363:28): [True: 37.8k, False: 2.83k]
  ------------------
  364|  37.8k|		{
  365|  37.8k|			uint8_t texel = wb.texels_of_weight[i][j];
  366|       |
  367|       |			// Create transposed versions of these for better vectorization
  368|  37.8k|			di.weight_texels_tr[j][i] = texel;
  369|  37.8k|			di.weights_texel_contribs_tr[j][i] = static_cast<float>(wb.texel_weights_of_weight[i][j]);
  370|       |
  371|       |			// Store the per-texel contribution of this weight for each texel it contributes to
  372|  37.8k|			di.texel_contrib_for_weight[j][i] = 0.0f;
  373|  81.4k|			for (unsigned int k = 0; k < 4; k++)
  ------------------
  |  Branch (373:29): [True: 81.4k, False: 0]
  ------------------
  374|  81.4k|			{
  375|  81.4k|				uint8_t dttw = di.texel_weights_tr[k][texel];
  376|  81.4k|				float dttwf = di.texel_weight_contribs_float_tr[k][texel];
  377|  81.4k|				if (dttw == i && dttwf != 0.0f)
  ------------------
  |  Branch (377:9): [True: 37.8k, False: 43.5k]
  |  Branch (377:22): [True: 37.8k, False: 0]
  ------------------
  378|  37.8k|				{
  379|  37.8k|					di.texel_contrib_for_weight[j][i] = di.texel_weight_contribs_float_tr[k][texel];
  380|  37.8k|					break;
  381|  37.8k|				}
  382|  81.4k|			}
  383|  37.8k|		}
  384|       |
  385|       |		// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
  386|       |		// Match last texel in active lane in SIMD group, for better gathers
  387|  2.83k|		uint8_t last_texel = di.weight_texels_tr[texel_count_wt - 1][i];
  388|  20.2k|		for (unsigned int j = texel_count_wt; j < max_texel_count_of_weight; j++)
  ------------------
  |  Branch (388:41): [True: 17.4k, False: 2.83k]
  ------------------
  389|  17.4k|		{
  390|  17.4k|			di.weight_texels_tr[j][i] = last_texel;
  391|  17.4k|			di.weights_texel_contribs_tr[j][i] = 0.0f;
  392|  17.4k|		}
  393|  2.83k|	}
  394|     96|#endif
  395|       |
  396|       |	// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
  397|     96|	size_t texels_per_block_simd = round_up_to_simd_multiple_vla(texels_per_block);
  398|     96|	for (size_t i = texels_per_block; i < texels_per_block_simd; i++)
  ------------------
  |  Branch (398:36): [True: 0, False: 96]
  ------------------
  399|      0|	{
  400|      0|		di.texel_weight_count[i] = 0;
  401|       |
  402|      0|		for (size_t j = 0; j < 4; j++)
  ------------------
  |  Branch (402:22): [True: 0, False: 0]
  ------------------
  403|      0|		{
  404|      0|#if !defined(ASTCENC_DECOMPRESS_ONLY)
  405|      0|			di.texel_weight_contribs_float_tr[j][i] = 0;
  406|      0|#endif
  407|      0|			di.texel_weights_tr[j][i] = 0;
  408|      0|			di.texel_weight_contribs_int_tr[j][i] = 0;
  409|      0|		}
  410|      0|	}
  411|       |
  412|     96|#if !defined(ASTCENC_DECOMPRESS_ONLY)
  413|       |	// Initialize array tail so we can over-fetch with SIMD later to avoid loop tails
  414|       |	// Match last texel in active lane in SIMD group, for better gathers
  415|     96|	unsigned int last_texel_count_wt = wb.texel_count_of_weight[weights_per_block - 1];
  416|     96|	uint8_t last_texel = di.weight_texels_tr[last_texel_count_wt - 1][weights_per_block - 1];
  417|       |
  418|     96|	size_t weights_per_block_simd = round_up_to_simd_multiple_vla(weights_per_block);
  419|    184|	for (size_t i = weights_per_block; i < weights_per_block_simd; i++)
  ------------------
  |  Branch (419:37): [True: 88, False: 96]
  ------------------
  420|     88|	{
  421|     88|		di.weight_texel_count[i] = 0;
  422|       |
  423|  2.79k|		for (size_t j = 0; j < max_texel_count_of_weight; j++)
  ------------------
  |  Branch (423:22): [True: 2.70k, False: 88]
  ------------------
  424|  2.70k|		{
  425|  2.70k|			di.weight_texels_tr[j][i] = last_texel;
  426|  2.70k|			di.weights_texel_contribs_tr[j][i] = 0.0f;
  427|  2.70k|		}
  428|     88|	}
  429|     96|#endif
  430|       |
  431|     96|	di.texel_count = static_cast<uint8_t>(texels_per_block);
  432|     96|	di.weight_count = static_cast<uint8_t>(weights_per_block);
  433|     96|	di.weight_x = static_cast<uint8_t>(x_weights);
  434|     96|	di.weight_y = static_cast<uint8_t>(y_weights);
  435|     96|	di.weight_z = 1;
  436|     96|}

_Z25get_ise_sequence_bitcountj12quant_method:
  422|  20.5k|) {
  423|       |	// Cope with out-of bounds values - input might be invalid
  424|  20.5k|	if (static_cast<size_t>(quant_level) >= ise_sizes.size())
  ------------------
  |  Branch (424:6): [True: 0, False: 20.5k]
  ------------------
  425|      0|	{
  426|       |		// Arbitrary large number that's more than an ASTC block can hold
  427|      0|		return 1024;
  428|      0|	}
  429|       |
  430|  20.5k|	auto& entry = ise_sizes[quant_level];
  431|  20.5k|	unsigned int divisor = (entry.divisor << 1) + 1;
  432|  20.5k|	return (entry.scale * character_count + divisor - 1) / divisor;
  433|  20.5k|}
_Z10decode_ise12quant_methodjPKhPhj:
  657|     70|) {
  658|     70|	promise(character_count > 0);
  ------------------
  |  |   62|     70|	#define promise(cond) assert(cond)
  ------------------
  |  Branch (658:2): [True: 70, False: 0]
  ------------------
  659|       |
  660|       |	// Note: due to how the trit/quint-block unpacking is done in this function, we may write more
  661|       |	// temporary results than the number of outputs. The maximum actual number of results is 64 bit,
  662|       |	// but we keep 4 additional character_count of padding.
  663|     70|	uint8_t results[68];
  664|     70|	uint8_t tq_blocks[22] { 0 }; // Trit-blocks or quint-blocks, must be zeroed
  665|       |
  666|     70|	unsigned int bits = btq_counts[quant_level].bits;
  667|     70|	unsigned int trits = btq_counts[quant_level].trits;
  668|     70|	unsigned int quints = btq_counts[quant_level].quints;
  669|       |
  670|     70|	unsigned int lcounter = 0;
  671|     70|	unsigned int hcounter = 0;
  672|       |
  673|       |	// Collect bits for each element, as well as bits for any trit-blocks and quint-blocks.
  674|  1.53k|	for (unsigned int i = 0; i < character_count; i++)
  ------------------
  |  Branch (674:27): [True: 1.46k, False: 70]
  ------------------
  675|  1.46k|	{
  676|  1.46k|		results[i] = static_cast<uint8_t>(read_bits(bits, bit_offset, input_data));
  677|  1.46k|		bit_offset += bits;
  678|       |
  679|  1.46k|		if (trits)
  ------------------
  |  Branch (679:7): [True: 613, False: 855]
  ------------------
  680|    613|		{
  681|    613|			static const uint8_t bits_to_read[5]  { 2, 2, 1, 2, 1 };
  682|    613|			static const uint8_t block_shift[5]   { 0, 2, 4, 5, 7 };
  683|    613|			static const uint8_t next_lcounter[5] { 1, 2, 3, 4, 0 };
  684|    613|			static const uint8_t hcounter_incr[5] { 0, 0, 0, 0, 1 };
  685|    613|			unsigned int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data);
  686|    613|			bit_offset += bits_to_read[lcounter];
  687|    613|			tq_blocks[hcounter] |= tdata << block_shift[lcounter];
  688|    613|			hcounter += hcounter_incr[lcounter];
  689|    613|			lcounter = next_lcounter[lcounter];
  690|    613|		}
  691|       |
  692|  1.46k|		if (quints)
  ------------------
  |  Branch (692:7): [True: 199, False: 1.26k]
  ------------------
  693|    199|		{
  694|    199|			static const uint8_t bits_to_read[3]  { 3, 2, 2 };
  695|    199|			static const uint8_t block_shift[3]   { 0, 3, 5 };
  696|    199|			static const uint8_t next_lcounter[3] { 1, 2, 0 };
  697|    199|			static const uint8_t hcounter_incr[3] { 0, 0, 1 };
  698|    199|			unsigned int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data);
  699|    199|			bit_offset += bits_to_read[lcounter];
  700|    199|			tq_blocks[hcounter] |= tdata << block_shift[lcounter];
  701|    199|			hcounter += hcounter_incr[lcounter];
  702|    199|			lcounter = next_lcounter[lcounter];
  703|    199|		}
  704|  1.46k|	}
  705|       |
  706|       |	// Unpack trit-blocks or quint-blocks as needed
  707|     70|	if (trits)
  ------------------
  |  Branch (707:6): [True: 25, False: 45]
  ------------------
  708|     25|	{
  709|     25|		unsigned int trit_blocks = (character_count + 4) / 5;
  710|     25|		promise(trit_blocks > 0);
  ------------------
  |  |   62|     25|	#define promise(cond) assert(cond)
  ------------------
  |  Branch (710:3): [True: 25, False: 0]
  ------------------
  711|    160|		for (unsigned int i = 0; i < trit_blocks; i++)
  ------------------
  |  Branch (711:28): [True: 135, False: 25]
  ------------------
  712|    135|		{
  713|    135|			const uint8_t *tritptr = trits_of_integer[tq_blocks[i]];
  714|    135|			results[5 * i    ] |= tritptr[0] << bits;
  715|    135|			results[5 * i + 1] |= tritptr[1] << bits;
  716|    135|			results[5 * i + 2] |= tritptr[2] << bits;
  717|    135|			results[5 * i + 3] |= tritptr[3] << bits;
  718|    135|			results[5 * i + 4] |= tritptr[4] << bits;
  719|    135|		}
  720|     25|	}
  721|       |
  722|     70|	if (quints)
  ------------------
  |  Branch (722:6): [True: 15, False: 55]
  ------------------
  723|     15|	{
  724|     15|		unsigned int quint_blocks = (character_count + 2) / 3;
  725|     15|		promise(quint_blocks > 0);
  ------------------
  |  |   62|     15|	#define promise(cond) assert(cond)
  ------------------
  |  Branch (725:3): [True: 15, False: 0]
  ------------------
  726|     85|		for (unsigned int i = 0; i < quint_blocks; i++)
  ------------------
  |  Branch (726:28): [True: 70, False: 15]
  ------------------
  727|     70|		{
  728|     70|			const uint8_t *quintptr = quints_of_integer[tq_blocks[i]];
  729|     70|			results[3 * i    ] |= quintptr[0] << bits;
  730|     70|			results[3 * i + 1] |= quintptr[1] << bits;
  731|     70|			results[3 * i + 2] |= quintptr[2] << bits;
  732|     70|		}
  733|     15|	}
  734|       |
  735|  1.53k|	for (unsigned int i = 0; i < character_count; i++)
  ------------------
  |  Branch (735:27): [True: 1.46k, False: 70]
  ------------------
  736|  1.46k|	{
  737|  1.46k|		output_data[i] = results[i];
  738|  1.46k|	}
  739|     70|}
astcenc_integer_sequence.cpp:_ZL9read_bitsjjPKh:
  482|  2.28k|) {
  483|  2.28k|	unsigned int mask = (1 << bitcount) - 1;
  484|  2.28k|	ptr += bitoffset >> 3;
  485|  2.28k|	bitoffset &= 7;
  486|  2.28k|	unsigned int value = ptr[0] | (ptr[1] << 8);
  487|  2.28k|	value >>= bitoffset;
  488|  2.28k|	value &= mask;
  489|  2.28k|	return value;
  490|  2.28k|}

_ZNK10block_mode21get_weight_quant_modeEv:
  441|    920|	{
  442|    920|		return static_cast<quant_method>(this->quant_mode);
  443|    920|	}
_ZN15decimation_mode14set_ref_1planeE12quant_method:
  477|    590|	{
  478|    590|		refprec_1plane |= (1 << weight_quant);
  479|    590|	}
_ZN15decimation_mode14set_ref_2planeE12quant_method:
  498|    330|	{
  499|    330|		refprec_2planes |= static_cast<uint16_t>(1 << weight_quant);
  500|    330|	}
_ZNK21block_size_descriptor14get_block_modeEj:
  642|     45|	{
  643|     45|		unsigned int packed_index = this->block_mode_packed_index[block_mode];
  644|     45|		assert(packed_index != BLOCK_BAD_BLOCK_MODE && packed_index < this->block_mode_count_all);
  ------------------
  |  Branch (644:3): [True: 45, False: 0]
  |  Branch (644:3): [True: 45, False: 0]
  |  Branch (644:3): [True: 45, False: 0]
  ------------------
  645|     45|		return this->block_modes[packed_index];
  646|     45|	}
_ZNK21block_size_descriptor19get_decimation_infoEj:
  676|     45|	{
  677|     45|		return this->decimation_tables[decimation_mode];
  678|     45|	}

_ZN4astc9rand_initEPm:
   33|      2|{
   34|      2|	state[0] = 0xfaf9e171cea1ec6bULL;
   35|      2|	state[1] = 0xf1b318cc06af5d71ULL;
   36|      2|}
_ZN4astc4randEPm:
   40|    153|{
   41|    153|	uint64_t s0 = state[0];
   42|    153|	uint64_t s1 = state[1];
   43|    153|	uint64_t res = s0 + s1;
   44|    153|	s1 ^= s0;
   45|    153|	state[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16);
   46|    153|	state[1] = rotl(s1, 37);
   47|    153|	return res;
   48|    153|}
astcenc_mathlib.cpp:_ZL4rotlmi:
   27|    306|{
   28|    306|	return (val << count) | (val >> (64 - count));
   29|    306|}

astcenc_block_sizes.cpp:_ZN4astcL3maxIhEET_S1_S1_:
  221|   110k|{
  222|   110k|	return p > q ? p : q;
  ------------------
  |  Branch (222:9): [True: 88.7k, False: 21.4k]
  ------------------
  223|   110k|}
astcenc_partition_tables.cpp:_ZN4astcL3minIhEET_S1_S1_:
  170|  13.2k|{
  171|  13.2k|	return p < q ? p : q;
  ------------------
  |  Branch (171:9): [True: 5.05k, False: 8.15k]
  ------------------
  172|  13.2k|}

_Z21init_partition_tablesR21block_size_descriptorbj:
  471|      3|) {
  472|      3|	partition_info* par_tab2 = bsd.partitionings;
  473|      3|	partition_info* par_tab3 = par_tab2 + BLOCK_MAX_PARTITIONINGS;
  474|      3|	partition_info* par_tab4 = par_tab3 + BLOCK_MAX_PARTITIONINGS;
  475|      3|	partition_info* par_tab1 = par_tab4 + BLOCK_MAX_PARTITIONINGS;
  476|       |
  477|      3|	generate_one_partition_info_entry(bsd, 1, 0, 0, *par_tab1);
  478|      3|	bsd.partitioning_count_selected[0] = 1;
  479|      3|	bsd.partitioning_count_all[0] = 1;
  480|       |
  481|      3|	uint64_t* canonical_patterns = new uint64_t[BLOCK_MAX_PARTITIONINGS * BIT_PATTERN_WORDS];
  ------------------
  |  |   25|      3|#define BIT_PATTERN_WORDS (((ASTCENC_BLOCK_MAX_TEXELS * 2) + 63) / 64)
  |  |  ------------------
  |  |  |  |   69|      3|	#define ASTCENC_BLOCK_MAX_TEXELS 216 // A 3D 6x6x6 block
  |  |  ------------------
  ------------------
  482|       |
  483|      3|	build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 2, par_tab2, canonical_patterns);
  484|      3|	build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 3, par_tab3, canonical_patterns);
  485|      3|	build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 4, par_tab4, canonical_patterns);
  486|       |
  487|      3|	delete[] canonical_patterns;
  488|      3|}
astcenc_partition_tables.cpp:_ZL33generate_one_partition_info_entryR21block_size_descriptorjjjR14partition_info:
  282|  13.2k|) {
  283|       |#if defined(ASTCENC_DECOMPRESS_ONLY)
  284|       |	// Suppress unused parameter warning
  285|       |	(void)partition_remap_index;
  286|       |#endif
  287|       |
  288|  13.2k|	int texels_per_block = bsd.texel_count;
  289|  13.2k|	bool small_block = texels_per_block < 32;
  290|       |
  291|  13.2k|	uint8_t *partition_of_texel = pi.partition_of_texel;
  292|       |
  293|       |	// Assign texels to partitions
  294|  13.2k|	int texel_idx = 0;
  295|  13.2k|	int counts[BLOCK_MAX_PARTITIONS] { 0 };
  296|  47.0k|	for (unsigned int z = 0; z < bsd.zdim; z++)
  ------------------
  |  Branch (296:27): [True: 33.7k, False: 13.2k]
  ------------------
  297|  33.7k|	{
  298|   250k|		for (unsigned int y = 0; y <  bsd.ydim; y++)
  ------------------
  |  Branch (298:28): [True: 216k, False: 33.7k]
  ------------------
  299|   216k|		{
  300|  1.76M|			for (unsigned int x = 0; x <  bsd.xdim; x++)
  ------------------
  |  Branch (300:29): [True: 1.55M, False: 216k]
  ------------------
  301|  1.55M|			{
  302|  1.55M|				uint8_t part = select_partition(partition_index, x, y, z, partition_count, small_block);
  303|  1.55M|				pi.texels_of_partition[part][counts[part]++] = static_cast<uint8_t>(texel_idx++);
  304|  1.55M|				*partition_of_texel++ = part;
  305|  1.55M|			}
  306|   216k|		}
  307|  33.7k|	}
  308|       |
  309|       |	// Fill loop tail so we can overfetch later
  310|  53.4k|	for (unsigned int i = 0; i < partition_count; i++)
  ------------------
  |  Branch (310:27): [True: 40.2k, False: 13.2k]
  ------------------
  311|  40.2k|	{
  312|  40.2k|		size_t ptex_count = counts[i];
  313|  40.2k|		size_t ptex_count_simd = round_up_to_simd_multiple_vla(ptex_count);
  314|  78.4k|		for (size_t j = ptex_count; j < ptex_count_simd; j++)
  ------------------
  |  Branch (314:31): [True: 38.2k, False: 40.2k]
  ------------------
  315|  38.2k|		{
  316|  38.2k|			pi.texels_of_partition[i][j] = pi.texels_of_partition[i][ptex_count - 1];
  317|  38.2k|		}
  318|  40.2k|	}
  319|       |
  320|       |	// Populate the actual procedural partition count
  321|  13.2k|	if (counts[0] == 0)
  ------------------
  |  Branch (321:6): [True: 2.40k, False: 10.8k]
  ------------------
  322|  2.40k|	{
  323|  2.40k|		pi.partition_count = 0;
  324|  2.40k|	}
  325|  10.8k|	else if (counts[1] == 0)
  ------------------
  |  Branch (325:11): [True: 2.85k, False: 7.95k]
  ------------------
  326|  2.85k|	{
  327|  2.85k|		pi.partition_count = 1;
  328|  2.85k|	}
  329|  7.95k|	else if (counts[2] == 0)
  ------------------
  |  Branch (329:11): [True: 4.26k, False: 3.69k]
  ------------------
  330|  4.26k|	{
  331|  4.26k|		pi.partition_count = 2;
  332|  4.26k|	}
  333|  3.69k|	else if (counts[3] == 0)
  ------------------
  |  Branch (333:11): [True: 2.14k, False: 1.54k]
  ------------------
  334|  2.14k|	{
  335|  2.14k|		pi.partition_count = 3;
  336|  2.14k|	}
  337|  1.54k|	else
  338|  1.54k|	{
  339|  1.54k|		pi.partition_count = 4;
  340|  1.54k|	}
  341|       |
  342|       |	// Populate the partition index
  343|  13.2k|	pi.partition_index = static_cast<uint16_t>(partition_index);
  344|       |
  345|  66.0k|	for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++)
  ------------------
  |  Branch (345:27): [True: 52.8k, False: 13.2k]
  ------------------
  346|  52.8k|	{
  347|  52.8k|		pi.partition_texel_count[i] = static_cast<uint8_t>(counts[i]);
  348|  52.8k|	}
  349|       |
  350|       |	// Valid partitionings have texels in all of the requested partitions
  351|  13.2k|	bool valid = pi.partition_count == partition_count;
  352|       |
  353|  13.2k|#if !defined(ASTCENC_DECOMPRESS_ONLY)
  354|       |	// Populate the coverage bitmaps for 2/3/4 partitions
  355|  13.2k|	uint64_t* bitmaps { nullptr };
  356|  13.2k|	if (partition_count == 2)
  ------------------
  |  Branch (356:6): [True: 3.99k, False: 9.21k]
  ------------------
  357|  3.99k|	{
  358|  3.99k|		bitmaps = bsd.coverage_bitmaps_2[partition_remap_index];
  359|  3.99k|	}
  360|  9.21k|	else if (partition_count == 3)
  ------------------
  |  Branch (360:11): [True: 4.61k, False: 4.60k]
  ------------------
  361|  4.61k|	{
  362|  4.61k|		bitmaps = bsd.coverage_bitmaps_3[partition_remap_index];
  363|  4.61k|	}
  364|  4.60k|	else if (partition_count == 4)
  ------------------
  |  Branch (364:11): [True: 4.59k, False: 3]
  ------------------
  365|  4.59k|	{
  366|  4.59k|		bitmaps = bsd.coverage_bitmaps_4[partition_remap_index];
  367|  4.59k|	}
  368|       |
  369|  13.2k|	if (bitmaps)
  ------------------
  |  Branch (369:6): [True: 13.2k, False: 3]
  ------------------
  370|  13.2k|	{
  371|       |		// Populate the partition coverage bitmap
  372|  53.4k|		for (unsigned int i = 0; i < partition_count; i++)
  ------------------
  |  Branch (372:28): [True: 40.2k, False: 13.2k]
  ------------------
  373|  40.2k|		{
  374|  40.2k|			bitmaps[i] = 0ULL;
  375|  40.2k|		}
  376|       |
  377|  13.2k|		unsigned int texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS);
  378|   615k|		for (unsigned int i = 0; i < texels_to_process; i++)
  ------------------
  |  Branch (378:28): [True: 602k, False: 13.2k]
  ------------------
  379|   602k|		{
  380|   602k|			unsigned int idx = bsd.kmeans_texels[i];
  381|   602k|			bitmaps[pi.partition_of_texel[idx]] |= 1ULL << i;
  382|   602k|		}
  383|  13.2k|	}
  384|  13.2k|#endif
  385|       |
  386|  13.2k|	return valid;
  387|  13.2k|}
astcenc_partition_tables.cpp:_ZL16select_partitioniiiiib:
  149|  1.55M|) {
  150|       |	// For small blocks bias the coordinates to get better distribution
  151|  1.55M|	if (small_block)
  ------------------
  |  Branch (151:6): [True: 80.9k, False: 1.47M]
  ------------------
  152|  80.9k|	{
  153|  80.9k|		x <<= 1;
  154|  80.9k|		y <<= 1;
  155|  80.9k|		z <<= 1;
  156|  80.9k|	}
  157|       |
  158|  1.55M|	seed += (partition_count - 1) * 1024;
  159|       |
  160|  1.55M|	uint32_t rnum = hash52(seed);
  161|       |
  162|  1.55M|	uint8_t seed1 = rnum & 0xF;
  163|  1.55M|	uint8_t seed2 = (rnum >> 4) & 0xF;
  164|  1.55M|	uint8_t seed3 = (rnum >> 8) & 0xF;
  165|  1.55M|	uint8_t seed4 = (rnum >> 12) & 0xF;
  166|  1.55M|	uint8_t seed5 = (rnum >> 16) & 0xF;
  167|  1.55M|	uint8_t seed6 = (rnum >> 20) & 0xF;
  168|  1.55M|	uint8_t seed7 = (rnum >> 24) & 0xF;
  169|  1.55M|	uint8_t seed8 = (rnum >> 28) & 0xF;
  170|  1.55M|	uint8_t seed9 = (rnum >> 18) & 0xF;
  171|  1.55M|	uint8_t seed10 = (rnum >> 22) & 0xF;
  172|  1.55M|	uint8_t seed11 = (rnum >> 26) & 0xF;
  173|  1.55M|	uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
  174|       |
  175|       |	// Squaring all the seeds in order to bias their distribution towards lower values.
  176|  1.55M|	seed1 *= seed1;
  177|  1.55M|	seed2 *= seed2;
  178|  1.55M|	seed3 *= seed3;
  179|  1.55M|	seed4 *= seed4;
  180|  1.55M|	seed5 *= seed5;
  181|  1.55M|	seed6 *= seed6;
  182|  1.55M|	seed7 *= seed7;
  183|  1.55M|	seed8 *= seed8;
  184|  1.55M|	seed9 *= seed9;
  185|  1.55M|	seed10 *= seed10;
  186|  1.55M|	seed11 *= seed11;
  187|  1.55M|	seed12 *= seed12;
  188|       |
  189|  1.55M|	int sh1, sh2;
  190|  1.55M|	if (seed & 1)
  ------------------
  |  Branch (190:6): [True: 775k, False: 776k]
  ------------------
  191|   775k|	{
  192|   775k|		sh1 = (seed & 2 ? 4 : 5);
  ------------------
  |  Branch (192:10): [True: 357k, False: 417k]
  ------------------
  193|   775k|		sh2 = (partition_count == 3 ? 6 : 5);
  ------------------
  |  Branch (193:10): [True: 274k, False: 500k]
  ------------------
  194|   775k|	}
  195|   776k|	else
  196|   776k|	{
  197|   776k|		sh1 = (partition_count == 3 ? 6 : 5);
  ------------------
  |  Branch (197:10): [True: 275k, False: 500k]
  ------------------
  198|   776k|		sh2 = (seed & 2 ? 4 : 5);
  ------------------
  |  Branch (198:10): [True: 368k, False: 407k]
  ------------------
  199|   776k|	}
  200|       |
  201|  1.55M|	int sh3 = (seed & 0x10) ? sh1 : sh2;
  ------------------
  |  Branch (201:12): [True: 776k, False: 774k]
  ------------------
  202|       |
  203|  1.55M|	seed1 >>= sh1;
  204|  1.55M|	seed2 >>= sh2;
  205|  1.55M|	seed3 >>= sh1;
  206|  1.55M|	seed4 >>= sh2;
  207|  1.55M|	seed5 >>= sh1;
  208|  1.55M|	seed6 >>= sh2;
  209|  1.55M|	seed7 >>= sh1;
  210|  1.55M|	seed8 >>= sh2;
  211|       |
  212|  1.55M|	seed9 >>= sh3;
  213|  1.55M|	seed10 >>= sh3;
  214|  1.55M|	seed11 >>= sh3;
  215|  1.55M|	seed12 >>= sh3;
  216|       |
  217|  1.55M|	int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
  218|  1.55M|	int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
  219|  1.55M|	int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
  220|  1.55M|	int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
  221|       |
  222|       |	// Apply the saw
  223|  1.55M|	a &= 0x3F;
  224|  1.55M|	b &= 0x3F;
  225|  1.55M|	c &= 0x3F;
  226|  1.55M|	d &= 0x3F;
  227|       |
  228|       |	// Remove some of the components if we are to output < 4 partitions.
  229|  1.55M|	if (partition_count <= 3)
  ------------------
  |  Branch (229:6): [True: 1.00M, False: 546k]
  ------------------
  230|  1.00M|	{
  231|  1.00M|		d = 0;
  232|  1.00M|	}
  233|       |
  234|  1.55M|	if (partition_count <= 2)
  ------------------
  |  Branch (234:6): [True: 454k, False: 1.09M]
  ------------------
  235|   454k|	{
  236|   454k|		c = 0;
  237|   454k|	}
  238|       |
  239|  1.55M|	if (partition_count <= 1)
  ------------------
  |  Branch (239:6): [True: 376, False: 1.55M]
  ------------------
  240|    376|	{
  241|    376|		b = 0;
  242|    376|	}
  243|       |
  244|  1.55M|	uint8_t partition;
  245|  1.55M|	if (a >= b && a >= c && a >= d)
  ------------------
  |  Branch (245:6): [True: 851k, False: 700k]
  |  Branch (245:16): [True: 647k, False: 203k]
  |  Branch (245:26): [True: 599k, False: 47.7k]
  ------------------
  246|   599k|	{
  247|   599k|		partition = 0;
  248|   599k|	}
  249|   951k|	else if (b >= c && b >= d)
  ------------------
  |  Branch (249:11): [True: 543k, False: 408k]
  |  Branch (249:21): [True: 476k, False: 66.7k]
  ------------------
  250|   476k|	{
  251|   476k|		partition = 1;
  252|   476k|	}
  253|   475k|	else if (c >= d)
  ------------------
  |  Branch (253:11): [True: 339k, False: 135k]
  ------------------
  254|   339k|	{
  255|   339k|		partition = 2;
  256|   339k|	}
  257|   135k|	else
  258|   135k|	{
  259|   135k|		partition = 3;
  260|   135k|	}
  261|       |
  262|  1.55M|	return partition;
  263|  1.55M|}
astcenc_partition_tables.cpp:_ZL6hash52j:
  116|  1.55M|) {
  117|  1.55M|	inp ^= inp >> 15;
  118|       |
  119|       |	// (2^4 + 1) * (2^7 + 1) * (2^17 - 1)
  120|  1.55M|	inp *= 0xEEDE0891;
  121|  1.55M|	inp ^= inp >> 5;
  122|  1.55M|	inp += inp << 16;
  123|  1.55M|	inp ^= inp >> 7;
  124|  1.55M|	inp ^= inp >> 3;
  125|  1.55M|	inp ^= inp << 6;
  126|  1.55M|	inp ^= inp >> 17;
  127|  1.55M|	return inp;
  128|  1.55M|}
astcenc_partition_tables.cpp:_ZL45build_partition_table_for_one_partition_countR21block_size_descriptorbjjP14partition_infoPm:
  396|      9|) {
  397|      9|	unsigned int next_index = 0;
  398|      9|	bsd.partitioning_count_selected[partition_count - 1] = 0;
  399|      9|	bsd.partitioning_count_all[partition_count - 1] = 0;
  400|       |
  401|       |	// Skip tables larger than config max partition count if we can omit modes
  402|      9|	if (can_omit_partitionings && (partition_count > partition_count_cutoff))
  ------------------
  |  Branch (402:6): [True: 0, False: 9]
  |  Branch (402:32): [True: 0, False: 0]
  ------------------
  403|      0|	{
  404|      0|		return;
  405|      0|	}
  406|       |
  407|       |	// Iterate through twice
  408|       |	//   - Pass 0: Keep selected partitionings
  409|       |	//   - Pass 1: Keep non-selected partitionings (skip if in omit mode)
  410|      9|	unsigned int max_iter = can_omit_partitionings ? 1 : 2;
  ------------------
  |  Branch (410:26): [True: 0, False: 9]
  ------------------
  411|       |
  412|       |	// Tracker for things we built in the first iteration
  413|      9|	uint8_t build[BLOCK_MAX_PARTITIONINGS] { 0 };
  414|     27|	for (unsigned int x = 0; x < max_iter; x++)
  ------------------
  |  Branch (414:27): [True: 18, False: 9]
  ------------------
  415|     18|	{
  416|  18.4k|		for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONINGS; i++)
  ------------------
  |  Branch (416:28): [True: 18.4k, False: 18]
  ------------------
  417|  18.4k|		{
  418|       |			// Don't include things we built in the first pass
  419|  18.4k|			if ((x == 1) && build[i])
  ------------------
  |  Branch (419:8): [True: 9.21k, False: 9.21k]
  |  Branch (419:20): [True: 5.22k, False: 3.99k]
  ------------------
  420|  5.22k|			{
  421|  5.22k|				continue;
  422|  5.22k|			}
  423|       |
  424|  13.2k|			bool keep_useful = generate_one_partition_info_entry(bsd, partition_count, i, next_index, ptab[next_index]);
  425|  13.2k|			if ((x == 0) && !keep_useful)
  ------------------
  |  Branch (425:8): [True: 9.21k, False: 3.99k]
  |  Branch (425:20): [True: 3.40k, False: 5.81k]
  ------------------
  426|  3.40k|			{
  427|  3.40k|				continue;
  428|  3.40k|			}
  429|       |
  430|  9.80k|			generate_canonical_partitioning(bsd.texel_count, ptab[next_index].partition_of_texel, canonical_patterns + next_index * BIT_PATTERN_WORDS);
  ------------------
  |  |   25|  9.80k|#define BIT_PATTERN_WORDS (((ASTCENC_BLOCK_MAX_TEXELS * 2) + 63) / 64)
  |  |  ------------------
  |  |  |  |   69|  9.80k|	#define ASTCENC_BLOCK_MAX_TEXELS 216 // A 3D 6x6x6 block
  |  |  ------------------
  ------------------
  431|  9.80k|			bool keep_canonical = true;
  432|  4.04M|			for (unsigned int j = 0; j < next_index; j++)
  ------------------
  |  Branch (432:29): [True: 4.03M, False: 7.24k]
  ------------------
  433|  4.03M|			{
  434|  4.03M|				bool match = compare_canonical_partitionings(canonical_patterns + next_index * BIT_PATTERN_WORDS, canonical_patterns +  j * BIT_PATTERN_WORDS);
  ------------------
  |  |   25|  4.03M|#define BIT_PATTERN_WORDS (((ASTCENC_BLOCK_MAX_TEXELS * 2) + 63) / 64)
  |  |  ------------------
  |  |  |  |   69|  4.03M|	#define ASTCENC_BLOCK_MAX_TEXELS 216 // A 3D 6x6x6 block
  |  |  ------------------
  ------------------
              				bool match = compare_canonical_partitionings(canonical_patterns + next_index * BIT_PATTERN_WORDS, canonical_patterns +  j * BIT_PATTERN_WORDS);
  ------------------
  |  |   25|  4.03M|#define BIT_PATTERN_WORDS (((ASTCENC_BLOCK_MAX_TEXELS * 2) + 63) / 64)
  |  |  ------------------
  |  |  |  |   69|  4.03M|	#define ASTCENC_BLOCK_MAX_TEXELS 216 // A 3D 6x6x6 block
  |  |  ------------------
  ------------------
  435|  4.03M|				if (match)
  ------------------
  |  Branch (435:9): [True: 2.55k, False: 4.03M]
  ------------------
  436|  2.55k|				{
  437|  2.55k|					keep_canonical = false;
  438|  2.55k|					break;
  439|  2.55k|				}
  440|  4.03M|			}
  441|       |
  442|  9.80k|			if (keep_useful && keep_canonical)
  ------------------
  |  Branch (442:8): [True: 6.40k, False: 3.40k]
  |  Branch (442:23): [True: 5.22k, False: 1.17k]
  ------------------
  443|  5.22k|			{
  444|  5.22k|				if (x == 0)
  ------------------
  |  Branch (444:9): [True: 5.22k, False: 0]
  ------------------
  445|  5.22k|				{
  446|  5.22k|					bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index);
  447|  5.22k|					bsd.partitioning_count_selected[partition_count - 1]++;
  448|  5.22k|					bsd.partitioning_count_all[partition_count - 1]++;
  449|  5.22k|					build[i] = 1;
  450|  5.22k|					next_index++;
  451|  5.22k|				}
  452|  5.22k|			}
  453|  4.58k|			else
  454|  4.58k|			{
  455|  4.58k|				if (x == 1)
  ------------------
  |  Branch (455:9): [True: 3.99k, False: 589]
  ------------------
  456|  3.99k|				{
  457|  3.99k|					bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index);
  458|  3.99k|					bsd.partitioning_count_all[partition_count - 1]++;
  459|  3.99k|					next_index++;
  460|  3.99k|				}
  461|  4.58k|			}
  462|  9.80k|		}
  463|     18|	}
  464|      9|}
astcenc_partition_tables.cpp:_ZL31generate_canonical_partitioningjPKhPm:
   42|  9.80k|) {
   43|       |	// Clear the pattern
   44|  78.4k|	for (unsigned int i = 0; i < BIT_PATTERN_WORDS; i++)
  ------------------
  |  |   25|  78.4k|#define BIT_PATTERN_WORDS (((ASTCENC_BLOCK_MAX_TEXELS * 2) + 63) / 64)
  |  |  ------------------
  |  |  |  |   69|  78.4k|	#define ASTCENC_BLOCK_MAX_TEXELS 216 // A 3D 6x6x6 block
  |  |  ------------------
  ------------------
  |  Branch (44:27): [True: 68.6k, False: 9.80k]
  ------------------
   45|  68.6k|	{
   46|  68.6k|		bit_pattern[i] = 0;
   47|  68.6k|	}
   48|       |
   49|       |	// Store a mapping to reorder the raw partitions so that the partitions are ordered such
   50|       |	// that the lowest texel index in partition N is smaller than the lowest texel index in
   51|       |	// partition N + 1.
   52|  9.80k|	int mapped_index[BLOCK_MAX_PARTITIONS];
   53|  9.80k|	int map_weight_count = 0;
   54|       |
   55|  49.0k|	for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++)
  ------------------
  |  Branch (55:27): [True: 39.2k, False: 9.80k]
  ------------------
   56|  39.2k|	{
   57|  39.2k|		mapped_index[i] = -1;
   58|  39.2k|	}
   59|       |
   60|  1.19M|	for (unsigned int i = 0; i < texel_count; i++)
  ------------------
  |  Branch (60:27): [True: 1.18M, False: 9.80k]
  ------------------
   61|  1.18M|	{
   62|  1.18M|		int index = partition_of_texel[i];
   63|  1.18M|		if (mapped_index[index] < 0)
  ------------------
  |  Branch (63:7): [True: 24.4k, False: 1.16M]
  ------------------
   64|  24.4k|		{
   65|  24.4k|			mapped_index[index] = map_weight_count++;
   66|  24.4k|		}
   67|       |
   68|  1.18M|		uint64_t xlat_index = mapped_index[index];
   69|  1.18M|		bit_pattern[i >> 5] |= xlat_index << (2 * (i & 0x1F));
   70|  1.18M|	}
   71|  9.80k|}
astcenc_partition_tables.cpp:_ZL31compare_canonical_partitioningsPKmS0_:
   84|  4.03M|) {
   85|  4.03M|	return (part1[0] == part2[0])
  ------------------
  |  Branch (85:9): [True: 79.2k, False: 3.95M]
  ------------------
   86|  79.2k|#if BIT_PATTERN_WORDS > 1
   87|  79.2k|	    && (part1[1] == part2[1])
  ------------------
  |  Branch (87:9): [True: 36.8k, False: 42.3k]
  ------------------
   88|  36.8k|#endif
   89|  36.8k|#if BIT_PATTERN_WORDS > 2
   90|  36.8k|	    && (part1[2] == part2[2])
  ------------------
  |  Branch (90:9): [True: 20.6k, False: 16.1k]
  ------------------
   91|  20.6k|#endif
   92|  20.6k|#if BIT_PATTERN_WORDS > 3
   93|  20.6k|	    && (part1[3] == part2[3])
  ------------------
  |  Branch (93:9): [True: 9.89k, False: 10.7k]
  ------------------
   94|  9.89k|#endif
   95|  9.89k|#if BIT_PATTERN_WORDS > 4
   96|  9.89k|	    && (part1[4] == part2[4])
  ------------------
  |  Branch (96:9): [True: 5.44k, False: 4.44k]
  ------------------
   97|  5.44k|#endif
   98|  5.44k|#if BIT_PATTERN_WORDS > 5
   99|  5.44k|	    && (part1[5] == part2[5])
  ------------------
  |  Branch (99:9): [True: 3.05k, False: 2.39k]
  ------------------
  100|  3.05k|#endif
  101|  3.05k|#if BIT_PATTERN_WORDS > 6
  102|  3.05k|	    && (part1[6] == part2[6])
  ------------------
  |  Branch (102:9): [True: 2.55k, False: 497]
  ------------------
  103|  4.03M|#endif
  104|  4.03M|	    ;
  105|  4.03M|}

_Z23get_2d_percentile_tablejj:
 1168|      2|) {
 1169|      2|	float* unpacked_table = new float[WEIGHTS_MAX_BLOCK_MODES];
 1170|      2|	const packed_percentile_table *apt = get_packed_table(xdim, ydim);
 1171|       |
 1172|       |	// Set the default percentile
 1173|  4.09k|	for (unsigned int i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++)
  ------------------
  |  Branch (1173:27): [True: 4.09k, False: 2]
  ------------------
 1174|  4.09k|	{
 1175|  4.09k|		unpacked_table[i] = 1.0f;
 1176|  4.09k|	}
 1177|       |
 1178|       |	// Populate the unpacked percentile values
 1179|      6|	for (int i = 0; i < 2; i++)
  ------------------
  |  Branch (1179:18): [True: 4, False: 2]
  ------------------
 1180|      4|	{
 1181|      4|		unsigned int itemcount = apt->item_count[i];
 1182|      4|		unsigned int difscale = apt->difscales[i];
 1183|      4|		unsigned int accum = apt->initial_percs[i];
 1184|      4|		const uint16_t *item_ptr = apt->items[i];
 1185|       |
 1186|    924|		for (unsigned int j = 0; j < itemcount; j++)
  ------------------
  |  Branch (1186:28): [True: 920, False: 4]
  ------------------
 1187|    920|		{
 1188|    920|			uint16_t item = item_ptr[j];
 1189|    920|			unsigned int idx = item & 0x7FF;
 1190|    920|			unsigned int weight = (item >> 11) & 0x1F;
 1191|    920|			accum += weight;
 1192|    920|			unpacked_table[idx] = static_cast<float>(accum) / static_cast<float>(difscale);
 1193|    920|		}
 1194|      4|	}
 1195|       |
 1196|      2|	return unpacked_table;
 1197|      2|}
astcenc_percentile_tables.cpp:_ZL16get_packed_tableii:
 1112|      2|) {
 1113|      2|	int idx = (ydim << 8) | xdim;
 1114|      2|	switch (idx)
  ------------------
  |  Branch (1114:10): [True: 2, False: 0]
  ------------------
 1115|      2|	{
 1116|      0|#if ASTCENC_BLOCK_MAX_TEXELS >= (4 * 4)
 1117|      1|		case 0x0404: return &block_pcd_4x4;
  ------------------
  |  Branch (1117:3): [True: 1, False: 1]
  ------------------
 1118|      0|#endif
 1119|      0|#if ASTCENC_BLOCK_MAX_TEXELS >= (5 * 4)
 1120|      0|		case 0x0405: return &block_pcd_5x4;
  ------------------
  |  Branch (1120:3): [True: 0, False: 2]
  ------------------
 1121|      0|#endif
 1122|      0|#if ASTCENC_BLOCK_MAX_TEXELS >= (5 * 5)
 1123|      0|		case 0x0505: return &block_pcd_5x5;
  ------------------
  |  Branch (1123:3): [True: 0, False: 2]
  ------------------
 1124|      0|#endif
 1125|      0|#if ASTCENC_BLOCK_MAX_TEXELS >= (6 * 5)
 1126|      0|		case 0x0506: return &block_pcd_6x5;
  ------------------
  |  Branch (1126:3): [True: 0, False: 2]
  ------------------
 1127|      0|#endif
 1128|      0|#if ASTCENC_BLOCK_MAX_TEXELS >= (6 * 6)
 1129|      0|		case 0x0606: return &block_pcd_6x6;
  ------------------
  |  Branch (1129:3): [True: 0, False: 2]
  ------------------
 1130|      0|#endif
 1131|      0|#if ASTCENC_BLOCK_MAX_TEXELS >= (8 * 5)
 1132|      0|		case 0x0508: return &block_pcd_8x5;
  ------------------
  |  Branch (1132:3): [True: 0, False: 2]
  ------------------
 1133|      0|#endif
 1134|      0|#if ASTCENC_BLOCK_MAX_TEXELS >= (8 * 6)
 1135|      0|		case 0x0608: return &block_pcd_8x6;
  ------------------
  |  Branch (1135:3): [True: 0, False: 2]
  ------------------
 1136|      0|#endif
 1137|      0|#if ASTCENC_BLOCK_MAX_TEXELS >= (8 * 8)
 1138|      0|		case 0x0808: return &block_pcd_8x8;
  ------------------
  |  Branch (1138:3): [True: 0, False: 2]
  ------------------
 1139|      0|#endif
 1140|      0|#if ASTCENC_BLOCK_MAX_TEXELS >= (10 * 5)
 1141|      0|		case 0x050A: return &block_pcd_10x5;
  ------------------
  |  Branch (1141:3): [True: 0, False: 2]
  ------------------
 1142|      0|#endif
 1143|      0|#if ASTCENC_BLOCK_MAX_TEXELS >= (10 * 6)
 1144|      0|		case 0x060A: return &block_pcd_10x6;
  ------------------
  |  Branch (1144:3): [True: 0, False: 2]
  ------------------
 1145|      0|#endif
 1146|      0|#if ASTCENC_BLOCK_MAX_TEXELS >= (10 * 8)
 1147|      0|		case 0x080A: return &block_pcd_10x8;
  ------------------
  |  Branch (1147:3): [True: 0, False: 2]
  ------------------
 1148|      0|#endif
 1149|      0|#if ASTCENC_BLOCK_MAX_TEXELS >= (10 * 10)
 1150|      0|		case 0x0A0A: return &block_pcd_10x10;
  ------------------
  |  Branch (1150:3): [True: 0, False: 2]
  ------------------
 1151|      0|#endif
 1152|      0|#if ASTCENC_BLOCK_MAX_TEXELS >= (12 * 10)
 1153|      0|		case 0x0A0C: return &block_pcd_12x10;
  ------------------
  |  Branch (1153:3): [True: 0, False: 2]
  ------------------
 1154|      0|#endif
 1155|      0|#if ASTCENC_BLOCK_MAX_TEXELS >= (12 * 12)
 1156|      1|		case 0x0C0C: return &block_pcd_12x12;
  ------------------
  |  Branch (1156:3): [True: 1, False: 1]
  ------------------
 1157|      2|#endif
 1158|      2|	}
 1159|       |
 1160|       |	// Should never hit this with a valid 2D block size
 1161|      0|	return nullptr;
 1162|      2|}

_Z20physical_to_symbolicRK21block_size_descriptorPKhR25symbolic_compressed_block:
  295|    289|) {
  296|    289|	uint8_t bswapped[16];
  297|       |
  298|    289|	scb.block_type = SYM_BTYPE_NONCONST;
  299|       |
  300|       |	// Extract header fields
  301|    289|	int block_mode = read_bits(11, 0, pcb);
  302|    289|	if ((block_mode & 0x1FF) == 0x1FC)
  ------------------
  |  Branch (302:6): [True: 223, False: 66]
  ------------------
  303|    223|	{
  304|       |		// Constant color block
  305|       |
  306|       |		// Check what format the data has
  307|    223|		if (block_mode & 0x200)
  ------------------
  |  Branch (307:7): [True: 199, False: 24]
  ------------------
  308|    199|		{
  309|    199|			scb.block_type = SYM_BTYPE_CONST_F16;
  310|    199|		}
  311|     24|		else
  312|     24|		{
  313|     24|			scb.block_type = SYM_BTYPE_CONST_U16;
  314|     24|		}
  315|       |
  316|    223|		scb.partition_count = 0;
  317|  1.11k|		for (int i = 0; i < 4; i++)
  ------------------
  |  Branch (317:19): [True: 892, False: 223]
  ------------------
  318|    892|		{
  319|    892|			scb.constant_color[i] = pcb[2 * i + 8] | (pcb[2 * i + 9] << 8);
  320|    892|		}
  321|       |
  322|       |		// Additionally, check that the void-extent
  323|    223|		if (bsd.zdim == 1)
  ------------------
  |  Branch (323:7): [True: 123, False: 100]
  ------------------
  324|    123|		{
  325|       |			// 2D void-extent
  326|    123|			int rsvbits = read_bits(2, 10, pcb);
  327|    123|			if (rsvbits != 3)
  ------------------
  |  Branch (327:8): [True: 5, False: 118]
  ------------------
  328|      5|			{
  329|      5|				scb.block_type = SYM_BTYPE_ERROR;
  330|      5|				return;
  331|      5|			}
  332|       |
  333|       |			// Low values span 3 bytes so need two read_bits calls
  334|    118|			int vx_low_s = read_bits(8, 12, pcb) | (read_bits(5, 12 + 8, pcb) << 8);
  335|    118|			int vx_high_s = read_bits(13, 25, pcb);
  336|    118|			int vx_low_t = read_bits(8, 38, pcb) | (read_bits(5, 38 + 8, pcb) << 8);
  337|    118|			int vx_high_t = read_bits(13, 51, pcb);
  338|       |
  339|    118|			int all_ones = vx_low_s == 0x1FFF && vx_high_s == 0x1FFF &&
  ------------------
  |  Branch (339:19): [True: 74, False: 44]
  |  Branch (339:41): [True: 48, False: 26]
  ------------------
  340|     48|			               vx_low_t == 0x1FFF && vx_high_t == 0x1FFF;
  ------------------
  |  Branch (340:19): [True: 27, False: 21]
  |  Branch (340:41): [True: 2, False: 25]
  ------------------
  341|       |
  342|    118|			if ((vx_low_s >= vx_high_s || vx_low_t >= vx_high_t) && !all_ones)
  ------------------
  |  Branch (342:9): [True: 89, False: 29]
  |  Branch (342:34): [True: 21, False: 8]
  |  Branch (342:60): [True: 108, False: 2]
  ------------------
  343|    108|			{
  344|    108|				scb.block_type = SYM_BTYPE_ERROR;
  345|    108|				return;
  346|    108|			}
  347|    118|		}
  348|    100|		else
  349|    100|		{
  350|       |			// 3D void-extent
  351|    100|			int vx_low_s = read_bits(9, 10, pcb);
  352|    100|			int vx_high_s = read_bits(9, 19, pcb);
  353|    100|			int vx_low_t = read_bits(9, 28, pcb);
  354|    100|			int vx_high_t = read_bits(9, 37, pcb);
  355|    100|			int vx_low_r = read_bits(9, 46, pcb);
  356|    100|			int vx_high_r = read_bits(9, 55, pcb);
  357|       |
  358|    100|			int all_ones = vx_low_s == 0x1FF && vx_high_s == 0x1FF &&
  ------------------
  |  Branch (358:19): [True: 62, False: 38]
  |  Branch (358:40): [True: 53, False: 9]
  ------------------
  359|     53|			               vx_low_t == 0x1FF && vx_high_t == 0x1FF &&
  ------------------
  |  Branch (359:19): [True: 40, False: 13]
  |  Branch (359:40): [True: 28, False: 12]
  ------------------
  360|     28|			               vx_low_r == 0x1FF && vx_high_r == 0x1FF;
  ------------------
  |  Branch (360:19): [True: 14, False: 14]
  |  Branch (360:40): [True: 1, False: 13]
  ------------------
  361|       |
  362|    100|			if ((vx_low_s >= vx_high_s || vx_low_t >= vx_high_t || vx_low_r >= vx_high_r) && !all_ones)
  ------------------
  |  Branch (362:9): [True: 68, False: 32]
  |  Branch (362:34): [True: 14, False: 18]
  |  Branch (362:59): [True: 13, False: 5]
  |  Branch (362:85): [True: 94, False: 1]
  ------------------
  363|     94|			{
  364|     94|				scb.block_type = SYM_BTYPE_ERROR;
  365|     94|				return;
  366|     94|			}
  367|    100|		}
  368|       |
  369|     16|		return;
  370|    223|	}
  371|       |
  372|     66|	unsigned int packed_index = bsd.block_mode_packed_index[block_mode];
  373|     66|	if (packed_index == BLOCK_BAD_BLOCK_MODE)
  ------------------
  |  Branch (373:6): [True: 21, False: 45]
  ------------------
  374|     21|	{
  375|     21|		scb.block_type = SYM_BTYPE_ERROR;
  376|     21|		return;
  377|     21|	}
  378|       |
  379|     45|	const auto& bm = bsd.get_block_mode(block_mode);
  380|     45|	const auto& di = bsd.get_decimation_info(bm.decimation_mode);
  381|       |
  382|     45|	int weight_count = di.weight_count;
  383|     45|	promise(weight_count > 0);
  ------------------
  |  |   62|     45|	#define promise(cond) assert(cond)
  ------------------
  |  Branch (383:2): [True: 45, False: 0]
  ------------------
  384|       |
  385|     45|	quant_method weight_quant_method = static_cast<quant_method>(bm.quant_mode);
  386|     45|	int is_dual_plane = bm.is_dual_plane;
  387|       |
  388|     45|	int real_weight_count = is_dual_plane ? 2 * weight_count : weight_count;
  ------------------
  |  Branch (388:26): [True: 16, False: 29]
  ------------------
  389|       |
  390|     45|	int partition_count = read_bits(2, 11, pcb) + 1;
  391|     45|	promise(partition_count > 0);
  ------------------
  |  |   62|     45|	#define promise(cond) assert(cond)
  ------------------
  |  Branch (391:2): [True: 45, False: 0]
  ------------------
  392|       |
  393|     45|	scb.block_mode = static_cast<uint16_t>(block_mode);
  394|     45|	scb.partition_count = static_cast<uint8_t>(partition_count);
  395|       |
  396|    765|	for (int i = 0; i < 16; i++)
  ------------------
  |  Branch (396:18): [True: 720, False: 45]
  ------------------
  397|    720|	{
  398|    720|		bswapped[i] = static_cast<uint8_t>(bitrev8(pcb[15 - i]));
  399|    720|	}
  400|       |
  401|     45|	int bits_for_weights = get_ise_sequence_bitcount(real_weight_count, weight_quant_method);
  402|       |
  403|     45|	int below_weights_pos = 128 - bits_for_weights;
  404|       |
  405|     45|	uint8_t indices[64];
  406|     45|	const auto& qat = quant_and_xfer_tables[weight_quant_method];
  407|       |
  408|     45|	decode_ise(weight_quant_method, real_weight_count, bswapped, indices, 0);
  409|       |
  410|     45|	if (is_dual_plane)
  ------------------
  |  Branch (410:6): [True: 16, False: 29]
  ------------------
  411|     16|	{
  412|    253|		for (int i = 0; i < weight_count; i++)
  ------------------
  |  Branch (412:19): [True: 237, False: 16]
  ------------------
  413|    237|		{
  414|    237|			scb.weights[i] = qat.unscramble_and_unquant_map[indices[2 * i]];
  415|    237|			scb.weights[i + WEIGHTS_PLANE2_OFFSET] = qat.unscramble_and_unquant_map[indices[2 * i + 1]];
  416|    237|		}
  417|     16|	}
  418|     29|	else
  419|     29|	{
  420|    833|		for (int i = 0; i < weight_count; i++)
  ------------------
  |  Branch (420:19): [True: 804, False: 29]
  ------------------
  421|    804|		{
  422|    804|			scb.weights[i] = qat.unscramble_and_unquant_map[indices[i]];
  423|    804|		}
  424|     29|	}
  425|       |
  426|     45|	if (is_dual_plane && partition_count == 4)
  ------------------
  |  Branch (426:6): [True: 16, False: 29]
  |  Branch (426:23): [True: 8, False: 8]
  ------------------
  427|      8|	{
  428|      8|		scb.block_type = SYM_BTYPE_ERROR;
  429|      8|		return;
  430|      8|	}
  431|       |
  432|     37|	scb.color_formats_matched = 0;
  433|       |
  434|       |	// Determine the format of each endpoint pair
  435|     37|	int color_formats[BLOCK_MAX_PARTITIONS];
  436|     37|	int encoded_type_highpart_size = 0;
  437|     37|	if (partition_count == 1)
  ------------------
  |  Branch (437:6): [True: 8, False: 29]
  ------------------
  438|      8|	{
  439|      8|		color_formats[0] = read_bits(4, 13, pcb);
  440|      8|		scb.partition_index = 0;
  441|      8|	}
  442|     29|	else
  443|     29|	{
  444|     29|		encoded_type_highpart_size = (3 * partition_count) - 4;
  445|     29|		below_weights_pos -= encoded_type_highpart_size;
  446|     29|		int encoded_type = read_bits(6, 13 + PARTITION_INDEX_BITS, pcb) |
  447|     29|		                  (read_bits(encoded_type_highpart_size, below_weights_pos, pcb) << 6);
  448|     29|		int baseclass = encoded_type & 0x3;
  449|     29|		if (baseclass == 0)
  ------------------
  |  Branch (449:7): [True: 9, False: 20]
  ------------------
  450|      9|		{
  451|     32|			for (int i = 0; i < partition_count; i++)
  ------------------
  |  Branch (451:20): [True: 23, False: 9]
  ------------------
  452|     23|			{
  453|     23|				color_formats[i] = (encoded_type >> 2) & 0xF;
  454|     23|			}
  455|       |
  456|      9|			below_weights_pos += encoded_type_highpart_size;
  457|      9|			scb.color_formats_matched = 1;
  458|      9|			encoded_type_highpart_size = 0;
  459|      9|		}
  460|     20|		else
  461|     20|		{
  462|     20|			int bitpos = 2;
  463|     20|			baseclass--;
  464|       |
  465|     76|			for (int i = 0; i < partition_count; i++)
  ------------------
  |  Branch (465:20): [True: 56, False: 20]
  ------------------
  466|     56|			{
  467|     56|				color_formats[i] = (((encoded_type >> bitpos) & 1) + baseclass) << 2;
  468|     56|				bitpos++;
  469|     56|			}
  470|       |
  471|     76|			for (int i = 0; i < partition_count; i++)
  ------------------
  |  Branch (471:20): [True: 56, False: 20]
  ------------------
  472|     56|			{
  473|     56|				color_formats[i] |= (encoded_type >> bitpos) & 3;
  474|     56|				bitpos += 2;
  475|     56|			}
  476|     20|		}
  477|     29|		scb.partition_index = static_cast<uint16_t>(read_bits(10, 13, pcb));
  478|     29|	}
  479|       |
  480|    124|	for (int i = 0; i < partition_count; i++)
  ------------------
  |  Branch (480:18): [True: 87, False: 37]
  ------------------
  481|     87|	{
  482|     87|		scb.color_formats[i] = static_cast<uint8_t>(color_formats[i]);
  483|     87|	}
  484|       |
  485|       |	// Determine number of color endpoint integers
  486|     37|	int color_integer_count = 0;
  487|    124|	for (int i = 0; i < partition_count; i++)
  ------------------
  |  Branch (487:18): [True: 87, False: 37]
  ------------------
  488|     87|	{
  489|     87|		int endpoint_class = color_formats[i] >> 2;
  490|     87|		color_integer_count += (endpoint_class + 1) * 2;
  491|     87|	}
  492|       |
  493|     37|	if (color_integer_count > 18)
  ------------------
  |  Branch (493:6): [True: 1, False: 36]
  ------------------
  494|      1|	{
  495|      1|		scb.block_type = SYM_BTYPE_ERROR;
  496|      1|		return;
  497|      1|	}
  498|       |
  499|       |	// Determine the color endpoint format to use
  500|     36|	static const int color_bits_arr[5] { -1, 115 - 4, 113 - 4 - PARTITION_INDEX_BITS, 113 - 4 - PARTITION_INDEX_BITS, 113 - 4 - PARTITION_INDEX_BITS };
  501|     36|	int color_bits = color_bits_arr[partition_count] - bits_for_weights - encoded_type_highpart_size;
  502|     36|	if (is_dual_plane)
  ------------------
  |  Branch (502:6): [True: 8, False: 28]
  ------------------
  503|      8|	{
  504|      8|		color_bits -= 2;
  505|      8|	}
  506|       |
  507|     36|	if (color_bits < 0)
  ------------------
  |  Branch (507:6): [True: 4, False: 32]
  ------------------
  508|      4|	{
  509|      4|		color_bits = 0;
  510|      4|	}
  511|       |
  512|     36|	int color_quant_level = quant_mode_table[color_integer_count >> 1][color_bits];
  513|     36|	if (color_quant_level < QUANT_6)
  ------------------
  |  Branch (513:6): [True: 11, False: 25]
  ------------------
  514|     11|	{
  515|     11|		scb.block_type = SYM_BTYPE_ERROR;
  516|     11|		return;
  517|     11|	}
  518|       |
  519|       |	// Unpack the integer color values and assign to endpoints
  520|     25|	scb.quant_mode = static_cast<quant_method>(color_quant_level);
  521|       |
  522|     25|	uint8_t values_to_decode[32];
  523|     25|	decode_ise(static_cast<quant_method>(color_quant_level), color_integer_count, pcb,
  524|     25|	           values_to_decode, (partition_count == 1 ? 17 : 19 + PARTITION_INDEX_BITS));
  ------------------
  |  Branch (524:32): [True: 8, False: 17]
  ------------------
  525|       |
  526|     25|	int valuecount_to_decode = 0;
  527|     25|	const uint8_t* unpack_table = color_scrambled_pquant_to_uquant_tables[scb.quant_mode - QUANT_6];
  528|     75|	for (int i = 0; i < partition_count; i++)
  ------------------
  |  Branch (528:18): [True: 50, False: 25]
  ------------------
  529|     50|	{
  530|     50|		int vals = 2 * (color_formats[i] >> 2) + 2;
  531|    240|		for (int j = 0; j < vals; j++)
  ------------------
  |  Branch (531:19): [True: 190, False: 50]
  ------------------
  532|    190|		{
  533|    190|			scb.color_values[i][j] = unpack_table[values_to_decode[j + valuecount_to_decode]];
  534|    190|		}
  535|     50|		valuecount_to_decode += vals;
  536|     50|	}
  537|       |
  538|       |	// Fetch component for second-plane in the case of dual plane of weights.
  539|     25|	scb.plane2_component = -1;
  540|     25|	if (is_dual_plane)
  ------------------
  |  Branch (540:6): [True: 4, False: 21]
  ------------------
  541|      4|	{
  542|      4|		scb.plane2_component = static_cast<int8_t>(read_bits(2, below_weights_pos - 2, pcb));
  543|      4|	}
  544|     25|}
astcenc_symbolic_physical.cpp:_ZL7bitrev8i:
   34|    720|{
   35|    720|	p = ((p & 0x0F) << 4) | ((p >> 4) & 0x0F);
   36|    720|	p = ((p & 0x33) << 2) | ((p >> 2) & 0x33);
   37|    720|	p = ((p & 0x55) << 1) | ((p >> 1) & 0x55);
   38|    720|	return p;
   39|    720|}
astcenc_symbolic_physical.cpp:_ZL9read_bitsiiPKh:
   58|  1.86k|) {
   59|  1.86k|	int mask = (1 << bitcount) - 1;
   60|  1.86k|	ptr += bitoffset >> 3;
   61|  1.86k|	bitoffset &= 7;
   62|  1.86k|	int value = ptr[0] | (ptr[1] << 8);
   63|  1.86k|	value >>= bitoffset;
   64|  1.86k|	value &= mask;
   65|  1.86k|	return value;
   66|  1.86k|}

_Z29round_up_to_simd_multiple_vlam:
  267|  40.5k|{
  268|  40.5k|	size_t multiples = (count + ASTCENC_SIMD_WIDTH - 1) / ASTCENC_SIMD_WIDTH;
  ------------------
  |  |  227|  40.5k|	#define ASTCENC_SIMD_WIDTH 4
  ------------------
              	size_t multiples = (count + ASTCENC_SIMD_WIDTH - 1) / ASTCENC_SIMD_WIDTH;
  ------------------
  |  |  227|  40.5k|	#define ASTCENC_SIMD_WIDTH 4
  ------------------
  269|  40.5k|	return multiples * ASTCENC_SIMD_WIDTH;
  ------------------
  |  |  227|  40.5k|	#define ASTCENC_SIMD_WIDTH 4
  ------------------
  270|  40.5k|}

