Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/community/lukes.py: 14%
111 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-20 07:00 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-20 07:00 +0000
1"""Lukes Algorithm for exact optimal weighted tree partitioning."""
3from copy import deepcopy
4from functools import lru_cache
5from random import choice
7import networkx as nx
8from networkx.utils import not_implemented_for
10__all__ = ["lukes_partitioning"]
12D_EDGE_W = "weight"
13D_EDGE_VALUE = 1.0
14D_NODE_W = "weight"
15D_NODE_VALUE = 1
16PKEY = "partitions"
17CLUSTER_EVAL_CACHE_SIZE = 2048
20def _split_n_from(n, min_size_of_first_part):
21 # splits j in two parts of which the first is at least
22 # the second argument
23 assert n >= min_size_of_first_part
24 for p1 in range(min_size_of_first_part, n + 1):
25 yield p1, n - p1
28@nx._dispatch(node_attrs="node_weight", edge_attrs="edge_weight")
29def lukes_partitioning(G, max_size, node_weight=None, edge_weight=None):
30 """Optimal partitioning of a weighted tree using the Lukes algorithm.
32 This algorithm partitions a connected, acyclic graph featuring integer
33 node weights and float edge weights. The resulting clusters are such
34 that the total weight of the nodes in each cluster does not exceed
35 max_size and that the weight of the edges that are cut by the partition
36 is minimum. The algorithm is based on [1]_.
38 Parameters
39 ----------
40 G : NetworkX graph
42 max_size : int
43 Maximum weight a partition can have in terms of sum of
44 node_weight for all nodes in the partition
46 edge_weight : key
47 Edge data key to use as weight. If None, the weights are all
48 set to one.
50 node_weight : key
51 Node data key to use as weight. If None, the weights are all
52 set to one. The data must be int.
54 Returns
55 -------
56 partition : list
57 A list of sets of nodes representing the clusters of the
58 partition.
60 Raises
61 ------
62 NotATree
63 If G is not a tree.
64 TypeError
65 If any of the values of node_weight is not int.
67 References
68 ----------
69 .. [1] Lukes, J. A. (1974).
70 "Efficient Algorithm for the Partitioning of Trees."
71 IBM Journal of Research and Development, 18(3), 217–224.
73 """
74 # First sanity check and tree preparation
75 if not nx.is_tree(G):
76 raise nx.NotATree("lukes_partitioning works only on trees")
77 else:
78 if nx.is_directed(G):
79 root = [n for n, d in G.in_degree() if d == 0]
80 assert len(root) == 1
81 root = root[0]
82 t_G = deepcopy(G)
83 else:
84 root = choice(list(G.nodes))
85 # this has the desirable side effect of not inheriting attributes
86 t_G = nx.dfs_tree(G, root)
88 # Since we do not want to screw up the original graph,
89 # if we have a blank attribute, we make a deepcopy
90 if edge_weight is None or node_weight is None:
91 safe_G = deepcopy(G)
92 if edge_weight is None:
93 nx.set_edge_attributes(safe_G, D_EDGE_VALUE, D_EDGE_W)
94 edge_weight = D_EDGE_W
95 if node_weight is None:
96 nx.set_node_attributes(safe_G, D_NODE_VALUE, D_NODE_W)
97 node_weight = D_NODE_W
98 else:
99 safe_G = G
101 # Second sanity check
102 # The values of node_weight MUST BE int.
103 # I cannot see any room for duck typing without incurring serious
104 # danger of subtle bugs.
105 all_n_attr = nx.get_node_attributes(safe_G, node_weight).values()
106 for x in all_n_attr:
107 if not isinstance(x, int):
108 raise TypeError(
109 "lukes_partitioning needs integer "
110 f"values for node_weight ({node_weight})"
111 )
113 # SUBROUTINES -----------------------
114 # these functions are defined here for two reasons:
115 # - brevity: we can leverage global "safe_G"
116 # - caching: signatures are hashable
118 @not_implemented_for("undirected")
119 # this is intended to be called only on t_G
120 def _leaves(gr):
121 for x in gr.nodes:
122 if not nx.descendants(gr, x):
123 yield x
125 @not_implemented_for("undirected")
126 def _a_parent_of_leaves_only(gr):
127 tleaves = set(_leaves(gr))
128 for n in set(gr.nodes) - tleaves:
129 if all(x in tleaves for x in nx.descendants(gr, n)):
130 return n
132 @lru_cache(CLUSTER_EVAL_CACHE_SIZE)
133 def _value_of_cluster(cluster):
134 valid_edges = [e for e in safe_G.edges if e[0] in cluster and e[1] in cluster]
135 return sum(safe_G.edges[e][edge_weight] for e in valid_edges)
137 def _value_of_partition(partition):
138 return sum(_value_of_cluster(frozenset(c)) for c in partition)
140 @lru_cache(CLUSTER_EVAL_CACHE_SIZE)
141 def _weight_of_cluster(cluster):
142 return sum(safe_G.nodes[n][node_weight] for n in cluster)
144 def _pivot(partition, node):
145 ccx = [c for c in partition if node in c]
146 assert len(ccx) == 1
147 return ccx[0]
149 def _concatenate_or_merge(partition_1, partition_2, x, i, ref_weight):
150 ccx = _pivot(partition_1, x)
151 cci = _pivot(partition_2, i)
152 merged_xi = ccx.union(cci)
154 # We first check if we can do the merge.
155 # If so, we do the actual calculations, otherwise we concatenate
156 if _weight_of_cluster(frozenset(merged_xi)) <= ref_weight:
157 cp1 = list(filter(lambda x: x != ccx, partition_1))
158 cp2 = list(filter(lambda x: x != cci, partition_2))
160 option_2 = [merged_xi] + cp1 + cp2
161 return option_2, _value_of_partition(option_2)
162 else:
163 option_1 = partition_1 + partition_2
164 return option_1, _value_of_partition(option_1)
166 # INITIALIZATION -----------------------
167 leaves = set(_leaves(t_G))
168 for lv in leaves:
169 t_G.nodes[lv][PKEY] = {}
170 slot = safe_G.nodes[lv][node_weight]
171 t_G.nodes[lv][PKEY][slot] = [{lv}]
172 t_G.nodes[lv][PKEY][0] = [{lv}]
174 for inner in [x for x in t_G.nodes if x not in leaves]:
175 t_G.nodes[inner][PKEY] = {}
176 slot = safe_G.nodes[inner][node_weight]
177 t_G.nodes[inner][PKEY][slot] = [{inner}]
179 # CORE ALGORITHM -----------------------
180 while True:
181 x_node = _a_parent_of_leaves_only(t_G)
182 weight_of_x = safe_G.nodes[x_node][node_weight]
183 best_value = 0
184 best_partition = None
185 bp_buffer = {}
186 x_descendants = nx.descendants(t_G, x_node)
187 for i_node in x_descendants:
188 for j in range(weight_of_x, max_size + 1):
189 for a, b in _split_n_from(j, weight_of_x):
190 if (
191 a not in t_G.nodes[x_node][PKEY]
192 or b not in t_G.nodes[i_node][PKEY]
193 ):
194 # it's not possible to form this particular weight sum
195 continue
197 part1 = t_G.nodes[x_node][PKEY][a]
198 part2 = t_G.nodes[i_node][PKEY][b]
199 part, value = _concatenate_or_merge(part1, part2, x_node, i_node, j)
201 if j not in bp_buffer or bp_buffer[j][1] < value:
202 # we annotate in the buffer the best partition for j
203 bp_buffer[j] = part, value
205 # we also keep track of the overall best partition
206 if best_value <= value:
207 best_value = value
208 best_partition = part
210 # as illustrated in Lukes, once we finished a child, we can
211 # discharge the partitions we found into the graph
212 # (the key phrase is make all x == x')
213 # so that they are used by the subsequent children
214 for w, (best_part_for_vl, vl) in bp_buffer.items():
215 t_G.nodes[x_node][PKEY][w] = best_part_for_vl
216 bp_buffer.clear()
218 # the absolute best partition for this node
219 # across all weights has to be stored at 0
220 t_G.nodes[x_node][PKEY][0] = best_partition
221 t_G.remove_nodes_from(x_descendants)
223 if x_node == root:
224 # the 0-labeled partition of root
225 # is the optimal one for the whole tree
226 return t_G.nodes[root][PKEY][0]