Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/community/lukes.py: 14%

1"""Lukes Algorithm for exact optimal weighted tree partitioning."""

3from copy import deepcopy

4from functools import lru_cache

5from random import choice

7import networkx as nx

8from networkx.utils import not_implemented_for

10__all__ = ["lukes_partitioning"]

12D_EDGE_W = "weight"

13D_EDGE_VALUE = 1.0

14D_NODE_W = "weight"

15D_NODE_VALUE = 1

16PKEY = "partitions"

17CLUSTER_EVAL_CACHE_SIZE = 2048

20def _split_n_from(n, min_size_of_first_part):

21 # splits j in two parts of which the first is at least

22 # the second argument

23 assert n >= min_size_of_first_part

24 for p1 in range(min_size_of_first_part, n + 1):

25 yield p1, n - p1

28@nx._dispatch(node_attrs="node_weight", edge_attrs="edge_weight")

29def lukes_partitioning(G, max_size, node_weight=None, edge_weight=None):

30 """Optimal partitioning of a weighted tree using the Lukes algorithm.

32 This algorithm partitions a connected, acyclic graph featuring integer

33 node weights and float edge weights. The resulting clusters are such

34 that the total weight of the nodes in each cluster does not exceed

35 max_size and that the weight of the edges that are cut by the partition

36 is minimum. The algorithm is based on [1]_.

38 Parameters

39 ----------

40 G : NetworkX graph

42 max_size : int

43 Maximum weight a partition can have in terms of sum of

44 node_weight for all nodes in the partition

46 edge_weight : key

47 Edge data key to use as weight. If None, the weights are all

48 set to one.

50 node_weight : key

51 Node data key to use as weight. If None, the weights are all

52 set to one. The data must be int.

54 Returns

55 -------

56 partition : list

57 A list of sets of nodes representing the clusters of the

58 partition.

60 Raises

61 ------

62 NotATree

63 If G is not a tree.

64 TypeError

65 If any of the values of node_weight is not int.

67 References

68 ----------

69 .. [1] Lukes, J. A. (1974).

70 "Efficient Algorithm for the Partitioning of Trees."

71 IBM Journal of Research and Development, 18(3), 217–224.

73 """

74 # First sanity check and tree preparation

75 if not nx.is_tree(G):

76 raise nx.NotATree("lukes_partitioning works only on trees")

77 else:

78 if nx.is_directed(G):

79 root = [n for n, d in G.in_degree() if d == 0]

80 assert len(root) == 1

81 root = root[0]

82 t_G = deepcopy(G)

83 else:

84 root = choice(list(G.nodes))

85 # this has the desirable side effect of not inheriting attributes

86 t_G = nx.dfs_tree(G, root)

88 # Since we do not want to screw up the original graph,

89 # if we have a blank attribute, we make a deepcopy

90 if edge_weight is None or node_weight is None:

91 safe_G = deepcopy(G)

92 if edge_weight is None:

93 nx.set_edge_attributes(safe_G, D_EDGE_VALUE, D_EDGE_W)

94 edge_weight = D_EDGE_W

95 if node_weight is None:

96 nx.set_node_attributes(safe_G, D_NODE_VALUE, D_NODE_W)

97 node_weight = D_NODE_W

98 else:

99 safe_G = G

100

101 # Second sanity check

102 # The values of node_weight MUST BE int.

103 # I cannot see any room for duck typing without incurring serious

104 # danger of subtle bugs.

105 all_n_attr = nx.get_node_attributes(safe_G, node_weight).values()

106 for x in all_n_attr:

107 if not isinstance(x, int):

108 raise TypeError(

109 "lukes_partitioning needs integer "

110 f"values for node_weight ({node_weight})"

111 )

112

113 # SUBROUTINES -----------------------

114 # these functions are defined here for two reasons:

115 # - brevity: we can leverage global "safe_G"

116 # - caching: signatures are hashable

117

118 @not_implemented_for("undirected")

119 # this is intended to be called only on t_G

120 def _leaves(gr):

121 for x in gr.nodes:

122 if not nx.descendants(gr, x):

123 yield x

124

125 @not_implemented_for("undirected")

126 def _a_parent_of_leaves_only(gr):

127 tleaves = set(_leaves(gr))

128 for n in set(gr.nodes) - tleaves:

129 if all(x in tleaves for x in nx.descendants(gr, n)):

130 return n

131

132 @lru_cache(CLUSTER_EVAL_CACHE_SIZE)

133 def _value_of_cluster(cluster):

134 valid_edges = [e for e in safe_G.edges if e[0] in cluster and e[1] in cluster]

135 return sum(safe_G.edges[e][edge_weight] for e in valid_edges)

136

137 def _value_of_partition(partition):

138 return sum(_value_of_cluster(frozenset(c)) for c in partition)

139

140 @lru_cache(CLUSTER_EVAL_CACHE_SIZE)

141 def _weight_of_cluster(cluster):

142 return sum(safe_G.nodes[n][node_weight] for n in cluster)

143

144 def _pivot(partition, node):

145 ccx = [c for c in partition if node in c]

146 assert len(ccx) == 1

147 return ccx[0]

148

149 def _concatenate_or_merge(partition_1, partition_2, x, i, ref_weight):

150 ccx = _pivot(partition_1, x)

151 cci = _pivot(partition_2, i)

152 merged_xi = ccx.union(cci)

153

154 # We first check if we can do the merge.

155 # If so, we do the actual calculations, otherwise we concatenate

156 if _weight_of_cluster(frozenset(merged_xi)) <= ref_weight:

157 cp1 = list(filter(lambda x: x != ccx, partition_1))

158 cp2 = list(filter(lambda x: x != cci, partition_2))

159

160 option_2 = [merged_xi] + cp1 + cp2

161 return option_2, _value_of_partition(option_2)

162 else:

163 option_1 = partition_1 + partition_2

164 return option_1, _value_of_partition(option_1)

165

166 # INITIALIZATION -----------------------

167 leaves = set(_leaves(t_G))

168 for lv in leaves:

169 t_G.nodes[lv][PKEY] = {}

170 slot = safe_G.nodes[lv][node_weight]

171 t_G.nodes[lv][PKEY][slot] = [{lv}]

172 t_G.nodes[lv][PKEY][0] = [{lv}]

173

174 for inner in [x for x in t_G.nodes if x not in leaves]:

175 t_G.nodes[inner][PKEY] = {}

176 slot = safe_G.nodes[inner][node_weight]

177 t_G.nodes[inner][PKEY][slot] = [{inner}]

178

179 # CORE ALGORITHM -----------------------

180 while True:

181 x_node = _a_parent_of_leaves_only(t_G)

182 weight_of_x = safe_G.nodes[x_node][node_weight]

183 best_value = 0

184 best_partition = None

185 bp_buffer = {}

186 x_descendants = nx.descendants(t_G, x_node)

187 for i_node in x_descendants:

188 for j in range(weight_of_x, max_size + 1):

189 for a, b in _split_n_from(j, weight_of_x):

190 if (

191 a not in t_G.nodes[x_node][PKEY]

192 or b not in t_G.nodes[i_node][PKEY]

193 ):

194 # it's not possible to form this particular weight sum

195 continue

196

197 part1 = t_G.nodes[x_node][PKEY][a]

198 part2 = t_G.nodes[i_node][PKEY][b]

199 part, value = _concatenate_or_merge(part1, part2, x_node, i_node, j)

200

201 if j not in bp_buffer or bp_buffer[j][1] < value:

202 # we annotate in the buffer the best partition for j

203 bp_buffer[j] = part, value

204

205 # we also keep track of the overall best partition

206 if best_value <= value:

207 best_value = value

208 best_partition = part

209

210 # as illustrated in Lukes, once we finished a child, we can

211 # discharge the partitions we found into the graph

212 # (the key phrase is make all x == x')

213 # so that they are used by the subsequent children

214 for w, (best_part_for_vl, vl) in bp_buffer.items():

215 t_G.nodes[x_node][PKEY][w] = best_part_for_vl

216 bp_buffer.clear()

217

218 # the absolute best partition for this node

219 # across all weights has to be stored at 0

220 t_G.nodes[x_node][PKEY][0] = best_partition

221 t_G.remove_nodes_from(x_descendants)

222

223 if x_node == root:

224 # the 0-labeled partition of root

225 # is the optimal one for the whole tree

226 return t_G.nodes[root][PKEY][0]