Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/community/lukes.py: 14%

111 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-20 07:00 +0000

1"""Lukes Algorithm for exact optimal weighted tree partitioning.""" 

2 

3from copy import deepcopy 

4from functools import lru_cache 

5from random import choice 

6 

7import networkx as nx 

8from networkx.utils import not_implemented_for 

9 

10__all__ = ["lukes_partitioning"] 

11 

12D_EDGE_W = "weight" 

13D_EDGE_VALUE = 1.0 

14D_NODE_W = "weight" 

15D_NODE_VALUE = 1 

16PKEY = "partitions" 

17CLUSTER_EVAL_CACHE_SIZE = 2048 

18 

19 

20def _split_n_from(n, min_size_of_first_part): 

21 # splits j in two parts of which the first is at least 

22 # the second argument 

23 assert n >= min_size_of_first_part 

24 for p1 in range(min_size_of_first_part, n + 1): 

25 yield p1, n - p1 

26 

27 

28@nx._dispatch(node_attrs="node_weight", edge_attrs="edge_weight") 

29def lukes_partitioning(G, max_size, node_weight=None, edge_weight=None): 

30 """Optimal partitioning of a weighted tree using the Lukes algorithm. 

31 

32 This algorithm partitions a connected, acyclic graph featuring integer 

33 node weights and float edge weights. The resulting clusters are such 

34 that the total weight of the nodes in each cluster does not exceed 

35 max_size and that the weight of the edges that are cut by the partition 

36 is minimum. The algorithm is based on [1]_. 

37 

38 Parameters 

39 ---------- 

40 G : NetworkX graph 

41 

42 max_size : int 

43 Maximum weight a partition can have in terms of sum of 

44 node_weight for all nodes in the partition 

45 

46 edge_weight : key 

47 Edge data key to use as weight. If None, the weights are all 

48 set to one. 

49 

50 node_weight : key 

51 Node data key to use as weight. If None, the weights are all 

52 set to one. The data must be int. 

53 

54 Returns 

55 ------- 

56 partition : list 

57 A list of sets of nodes representing the clusters of the 

58 partition. 

59 

60 Raises 

61 ------ 

62 NotATree 

63 If G is not a tree. 

64 TypeError 

65 If any of the values of node_weight is not int. 

66 

67 References 

68 ---------- 

69 .. [1] Lukes, J. A. (1974). 

70 "Efficient Algorithm for the Partitioning of Trees." 

71 IBM Journal of Research and Development, 18(3), 217–224. 

72 

73 """ 

74 # First sanity check and tree preparation 

75 if not nx.is_tree(G): 

76 raise nx.NotATree("lukes_partitioning works only on trees") 

77 else: 

78 if nx.is_directed(G): 

79 root = [n for n, d in G.in_degree() if d == 0] 

80 assert len(root) == 1 

81 root = root[0] 

82 t_G = deepcopy(G) 

83 else: 

84 root = choice(list(G.nodes)) 

85 # this has the desirable side effect of not inheriting attributes 

86 t_G = nx.dfs_tree(G, root) 

87 

88 # Since we do not want to screw up the original graph, 

89 # if we have a blank attribute, we make a deepcopy 

90 if edge_weight is None or node_weight is None: 

91 safe_G = deepcopy(G) 

92 if edge_weight is None: 

93 nx.set_edge_attributes(safe_G, D_EDGE_VALUE, D_EDGE_W) 

94 edge_weight = D_EDGE_W 

95 if node_weight is None: 

96 nx.set_node_attributes(safe_G, D_NODE_VALUE, D_NODE_W) 

97 node_weight = D_NODE_W 

98 else: 

99 safe_G = G 

100 

101 # Second sanity check 

102 # The values of node_weight MUST BE int. 

103 # I cannot see any room for duck typing without incurring serious 

104 # danger of subtle bugs. 

105 all_n_attr = nx.get_node_attributes(safe_G, node_weight).values() 

106 for x in all_n_attr: 

107 if not isinstance(x, int): 

108 raise TypeError( 

109 "lukes_partitioning needs integer " 

110 f"values for node_weight ({node_weight})" 

111 ) 

112 

113 # SUBROUTINES ----------------------- 

114 # these functions are defined here for two reasons: 

115 # - brevity: we can leverage global "safe_G" 

116 # - caching: signatures are hashable 

117 

118 @not_implemented_for("undirected") 

119 # this is intended to be called only on t_G 

120 def _leaves(gr): 

121 for x in gr.nodes: 

122 if not nx.descendants(gr, x): 

123 yield x 

124 

125 @not_implemented_for("undirected") 

126 def _a_parent_of_leaves_only(gr): 

127 tleaves = set(_leaves(gr)) 

128 for n in set(gr.nodes) - tleaves: 

129 if all(x in tleaves for x in nx.descendants(gr, n)): 

130 return n 

131 

132 @lru_cache(CLUSTER_EVAL_CACHE_SIZE) 

133 def _value_of_cluster(cluster): 

134 valid_edges = [e for e in safe_G.edges if e[0] in cluster and e[1] in cluster] 

135 return sum(safe_G.edges[e][edge_weight] for e in valid_edges) 

136 

137 def _value_of_partition(partition): 

138 return sum(_value_of_cluster(frozenset(c)) for c in partition) 

139 

140 @lru_cache(CLUSTER_EVAL_CACHE_SIZE) 

141 def _weight_of_cluster(cluster): 

142 return sum(safe_G.nodes[n][node_weight] for n in cluster) 

143 

144 def _pivot(partition, node): 

145 ccx = [c for c in partition if node in c] 

146 assert len(ccx) == 1 

147 return ccx[0] 

148 

149 def _concatenate_or_merge(partition_1, partition_2, x, i, ref_weight): 

150 ccx = _pivot(partition_1, x) 

151 cci = _pivot(partition_2, i) 

152 merged_xi = ccx.union(cci) 

153 

154 # We first check if we can do the merge. 

155 # If so, we do the actual calculations, otherwise we concatenate 

156 if _weight_of_cluster(frozenset(merged_xi)) <= ref_weight: 

157 cp1 = list(filter(lambda x: x != ccx, partition_1)) 

158 cp2 = list(filter(lambda x: x != cci, partition_2)) 

159 

160 option_2 = [merged_xi] + cp1 + cp2 

161 return option_2, _value_of_partition(option_2) 

162 else: 

163 option_1 = partition_1 + partition_2 

164 return option_1, _value_of_partition(option_1) 

165 

166 # INITIALIZATION ----------------------- 

167 leaves = set(_leaves(t_G)) 

168 for lv in leaves: 

169 t_G.nodes[lv][PKEY] = {} 

170 slot = safe_G.nodes[lv][node_weight] 

171 t_G.nodes[lv][PKEY][slot] = [{lv}] 

172 t_G.nodes[lv][PKEY][0] = [{lv}] 

173 

174 for inner in [x for x in t_G.nodes if x not in leaves]: 

175 t_G.nodes[inner][PKEY] = {} 

176 slot = safe_G.nodes[inner][node_weight] 

177 t_G.nodes[inner][PKEY][slot] = [{inner}] 

178 

179 # CORE ALGORITHM ----------------------- 

180 while True: 

181 x_node = _a_parent_of_leaves_only(t_G) 

182 weight_of_x = safe_G.nodes[x_node][node_weight] 

183 best_value = 0 

184 best_partition = None 

185 bp_buffer = {} 

186 x_descendants = nx.descendants(t_G, x_node) 

187 for i_node in x_descendants: 

188 for j in range(weight_of_x, max_size + 1): 

189 for a, b in _split_n_from(j, weight_of_x): 

190 if ( 

191 a not in t_G.nodes[x_node][PKEY] 

192 or b not in t_G.nodes[i_node][PKEY] 

193 ): 

194 # it's not possible to form this particular weight sum 

195 continue 

196 

197 part1 = t_G.nodes[x_node][PKEY][a] 

198 part2 = t_G.nodes[i_node][PKEY][b] 

199 part, value = _concatenate_or_merge(part1, part2, x_node, i_node, j) 

200 

201 if j not in bp_buffer or bp_buffer[j][1] < value: 

202 # we annotate in the buffer the best partition for j 

203 bp_buffer[j] = part, value 

204 

205 # we also keep track of the overall best partition 

206 if best_value <= value: 

207 best_value = value 

208 best_partition = part 

209 

210 # as illustrated in Lukes, once we finished a child, we can 

211 # discharge the partitions we found into the graph 

212 # (the key phrase is make all x == x') 

213 # so that they are used by the subsequent children 

214 for w, (best_part_for_vl, vl) in bp_buffer.items(): 

215 t_G.nodes[x_node][PKEY][w] = best_part_for_vl 

216 bp_buffer.clear() 

217 

218 # the absolute best partition for this node 

219 # across all weights has to be stored at 0 

220 t_G.nodes[x_node][PKEY][0] = best_partition 

221 t_G.remove_nodes_from(x_descendants) 

222 

223 if x_node == root: 

224 # the 0-labeled partition of root 

225 # is the optimal one for the whole tree 

226 return t_G.nodes[root][PKEY][0]