Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/approximation/kcomponents.py: 26%
148 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-20 07:00 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-20 07:00 +0000
1""" Fast approximation for k-component structure
2"""
3import itertools
4from collections import defaultdict
5from collections.abc import Mapping
6from functools import cached_property
8import networkx as nx
9from networkx.algorithms.approximation import local_node_connectivity
10from networkx.exception import NetworkXError
11from networkx.utils import not_implemented_for
13__all__ = ["k_components"]
16@not_implemented_for("directed")
17@nx._dispatch(name="approximate_k_components")
18def k_components(G, min_density=0.95):
19 r"""Returns the approximate k-component structure of a graph G.
21 A `k`-component is a maximal subgraph of a graph G that has, at least,
22 node connectivity `k`: we need to remove at least `k` nodes to break it
23 into more components. `k`-components have an inherent hierarchical
24 structure because they are nested in terms of connectivity: a connected
25 graph can contain several 2-components, each of which can contain
26 one or more 3-components, and so forth.
28 This implementation is based on the fast heuristics to approximate
29 the `k`-component structure of a graph [1]_. Which, in turn, it is based on
30 a fast approximation algorithm for finding good lower bounds of the number
31 of node independent paths between two nodes [2]_.
33 Parameters
34 ----------
35 G : NetworkX graph
36 Undirected graph
38 min_density : Float
39 Density relaxation threshold. Default value 0.95
41 Returns
42 -------
43 k_components : dict
44 Dictionary with connectivity level `k` as key and a list of
45 sets of nodes that form a k-component of level `k` as values.
47 Raises
48 ------
49 NetworkXNotImplemented
50 If G is directed.
52 Examples
53 --------
54 >>> # Petersen graph has 10 nodes and it is triconnected, thus all
55 >>> # nodes are in a single component on all three connectivity levels
56 >>> from networkx.algorithms import approximation as apxa
57 >>> G = nx.petersen_graph()
58 >>> k_components = apxa.k_components(G)
60 Notes
61 -----
62 The logic of the approximation algorithm for computing the `k`-component
63 structure [1]_ is based on repeatedly applying simple and fast algorithms
64 for `k`-cores and biconnected components in order to narrow down the
65 number of pairs of nodes over which we have to compute White and Newman's
66 approximation algorithm for finding node independent paths [2]_. More
67 formally, this algorithm is based on Whitney's theorem, which states
68 an inclusion relation among node connectivity, edge connectivity, and
69 minimum degree for any graph G. This theorem implies that every
70 `k`-component is nested inside a `k`-edge-component, which in turn,
71 is contained in a `k`-core. Thus, this algorithm computes node independent
72 paths among pairs of nodes in each biconnected part of each `k`-core,
73 and repeats this procedure for each `k` from 3 to the maximal core number
74 of a node in the input graph.
76 Because, in practice, many nodes of the core of level `k` inside a
77 bicomponent actually are part of a component of level k, the auxiliary
78 graph needed for the algorithm is likely to be very dense. Thus, we use
79 a complement graph data structure (see `AntiGraph`) to save memory.
80 AntiGraph only stores information of the edges that are *not* present
81 in the actual auxiliary graph. When applying algorithms to this
82 complement graph data structure, it behaves as if it were the dense
83 version.
85 See also
86 --------
87 k_components
89 References
90 ----------
91 .. [1] Torrents, J. and F. Ferraro (2015) Structural Cohesion:
92 Visualization and Heuristics for Fast Computation.
93 https://arxiv.org/pdf/1503.04476v1
95 .. [2] White, Douglas R., and Mark Newman (2001) A Fast Algorithm for
96 Node-Independent Paths. Santa Fe Institute Working Paper #01-07-035
97 https://www.santafe.edu/research/results/working-papers/fast-approximation-algorithms-for-finding-node-ind
99 .. [3] Moody, J. and D. White (2003). Social cohesion and embeddedness:
100 A hierarchical conception of social groups.
101 American Sociological Review 68(1), 103--28.
102 https://doi.org/10.2307/3088904
104 """
105 # Dictionary with connectivity level (k) as keys and a list of
106 # sets of nodes that form a k-component as values
107 k_components = defaultdict(list)
108 # make a few functions local for speed
109 node_connectivity = local_node_connectivity
110 k_core = nx.k_core
111 core_number = nx.core_number
112 biconnected_components = nx.biconnected_components
113 combinations = itertools.combinations
114 # Exact solution for k = {1,2}
115 # There is a linear time algorithm for triconnectivity, if we had an
116 # implementation available we could start from k = 4.
117 for component in nx.connected_components(G):
118 # isolated nodes have connectivity 0
119 comp = set(component)
120 if len(comp) > 1:
121 k_components[1].append(comp)
122 for bicomponent in nx.biconnected_components(G):
123 # avoid considering dyads as bicomponents
124 bicomp = set(bicomponent)
125 if len(bicomp) > 2:
126 k_components[2].append(bicomp)
127 # There is no k-component of k > maximum core number
128 # \kappa(G) <= \lambda(G) <= \delta(G)
129 g_cnumber = core_number(G)
130 max_core = max(g_cnumber.values())
131 for k in range(3, max_core + 1):
132 C = k_core(G, k, core_number=g_cnumber)
133 for nodes in biconnected_components(C):
134 # Build a subgraph SG induced by the nodes that are part of
135 # each biconnected component of the k-core subgraph C.
136 if len(nodes) < k:
137 continue
138 SG = G.subgraph(nodes)
139 # Build auxiliary graph
140 H = _AntiGraph()
141 H.add_nodes_from(SG.nodes())
142 for u, v in combinations(SG, 2):
143 K = node_connectivity(SG, u, v, cutoff=k)
144 if k > K:
145 H.add_edge(u, v)
146 for h_nodes in biconnected_components(H):
147 if len(h_nodes) <= k:
148 continue
149 SH = H.subgraph(h_nodes)
150 for Gc in _cliques_heuristic(SG, SH, k, min_density):
151 for k_nodes in biconnected_components(Gc):
152 Gk = nx.k_core(SG.subgraph(k_nodes), k)
153 if len(Gk) <= k:
154 continue
155 k_components[k].append(set(Gk))
156 return k_components
159def _cliques_heuristic(G, H, k, min_density):
160 h_cnumber = nx.core_number(H)
161 for i, c_value in enumerate(sorted(set(h_cnumber.values()), reverse=True)):
162 cands = {n for n, c in h_cnumber.items() if c == c_value}
163 # Skip checking for overlap for the highest core value
164 if i == 0:
165 overlap = False
166 else:
167 overlap = set.intersection(
168 *[{x for x in H[n] if x not in cands} for n in cands]
169 )
170 if overlap and len(overlap) < k:
171 SH = H.subgraph(cands | overlap)
172 else:
173 SH = H.subgraph(cands)
174 sh_cnumber = nx.core_number(SH)
175 SG = nx.k_core(G.subgraph(SH), k)
176 while not (_same(sh_cnumber) and nx.density(SH) >= min_density):
177 # This subgraph must be writable => .copy()
178 SH = H.subgraph(SG).copy()
179 if len(SH) <= k:
180 break
181 sh_cnumber = nx.core_number(SH)
182 sh_deg = dict(SH.degree())
183 min_deg = min(sh_deg.values())
184 SH.remove_nodes_from(n for n, d in sh_deg.items() if d == min_deg)
185 SG = nx.k_core(G.subgraph(SH), k)
186 else:
187 yield SG
190def _same(measure, tol=0):
191 vals = set(measure.values())
192 if (max(vals) - min(vals)) <= tol:
193 return True
194 return False
197class _AntiGraph(nx.Graph):
198 """
199 Class for complement graphs.
201 The main goal is to be able to work with big and dense graphs with
202 a low memory footprint.
204 In this class you add the edges that *do not exist* in the dense graph,
205 the report methods of the class return the neighbors, the edges and
206 the degree as if it was the dense graph. Thus it's possible to use
207 an instance of this class with some of NetworkX functions. In this
208 case we only use k-core, connected_components, and biconnected_components.
209 """
211 all_edge_dict = {"weight": 1}
213 def single_edge_dict(self):
214 return self.all_edge_dict
216 edge_attr_dict_factory = single_edge_dict # type: ignore[assignment]
218 def __getitem__(self, n):
219 """Returns a dict of neighbors of node n in the dense graph.
221 Parameters
222 ----------
223 n : node
224 A node in the graph.
226 Returns
227 -------
228 adj_dict : dictionary
229 The adjacency dictionary for nodes connected to n.
231 """
232 all_edge_dict = self.all_edge_dict
233 return {
234 node: all_edge_dict for node in set(self._adj) - set(self._adj[n]) - {n}
235 }
237 def neighbors(self, n):
238 """Returns an iterator over all neighbors of node n in the
239 dense graph.
240 """
241 try:
242 return iter(set(self._adj) - set(self._adj[n]) - {n})
243 except KeyError as err:
244 raise NetworkXError(f"The node {n} is not in the graph.") from err
246 class AntiAtlasView(Mapping):
247 """An adjacency inner dict for AntiGraph"""
249 def __init__(self, graph, node):
250 self._graph = graph
251 self._atlas = graph._adj[node]
252 self._node = node
254 def __len__(self):
255 return len(self._graph) - len(self._atlas) - 1
257 def __iter__(self):
258 return (n for n in self._graph if n not in self._atlas and n != self._node)
260 def __getitem__(self, nbr):
261 nbrs = set(self._graph._adj) - set(self._atlas) - {self._node}
262 if nbr in nbrs:
263 return self._graph.all_edge_dict
264 raise KeyError(nbr)
266 class AntiAdjacencyView(AntiAtlasView):
267 """An adjacency outer dict for AntiGraph"""
269 def __init__(self, graph):
270 self._graph = graph
271 self._atlas = graph._adj
273 def __len__(self):
274 return len(self._atlas)
276 def __iter__(self):
277 return iter(self._graph)
279 def __getitem__(self, node):
280 if node not in self._graph:
281 raise KeyError(node)
282 return self._graph.AntiAtlasView(self._graph, node)
284 @cached_property
285 def adj(self):
286 return self.AntiAdjacencyView(self)
288 def subgraph(self, nodes):
289 """This subgraph method returns a full AntiGraph. Not a View"""
290 nodes = set(nodes)
291 G = _AntiGraph()
292 G.add_nodes_from(nodes)
293 for n in G:
294 Gnbrs = G.adjlist_inner_dict_factory()
295 G._adj[n] = Gnbrs
296 for nbr, d in self._adj[n].items():
297 if nbr in G._adj:
298 Gnbrs[nbr] = d
299 G._adj[nbr][n] = d
300 G.graph = self.graph
301 return G
303 class AntiDegreeView(nx.reportviews.DegreeView):
304 def __iter__(self):
305 all_nodes = set(self._succ)
306 for n in self._nodes:
307 nbrs = all_nodes - set(self._succ[n]) - {n}
308 yield (n, len(nbrs))
310 def __getitem__(self, n):
311 nbrs = set(self._succ) - set(self._succ[n]) - {n}
312 # AntiGraph is a ThinGraph so all edges have weight 1
313 return len(nbrs) + (n in nbrs)
315 @cached_property
316 def degree(self):
317 """Returns an iterator for (node, degree) and degree for single node.
319 The node degree is the number of edges adjacent to the node.
321 Parameters
322 ----------
323 nbunch : iterable container, optional (default=all nodes)
324 A container of nodes. The container will be iterated
325 through once.
327 weight : string or None, optional (default=None)
328 The edge attribute that holds the numerical value used
329 as a weight. If None, then each edge has weight 1.
330 The degree is the sum of the edge weights adjacent to the node.
332 Returns
333 -------
334 deg:
335 Degree of the node, if a single node is passed as argument.
336 nd_iter : an iterator
337 The iterator returns two-tuples of (node, degree).
339 See Also
340 --------
341 degree
343 Examples
344 --------
345 >>> G = nx.path_graph(4)
346 >>> G.degree(0) # node 0 with degree 1
347 1
348 >>> list(G.degree([0, 1]))
349 [(0, 1), (1, 2)]
351 """
352 return self.AntiDegreeView(self)
354 def adjacency(self):
355 """Returns an iterator of (node, adjacency set) tuples for all nodes
356 in the dense graph.
358 This is the fastest way to look at every edge.
359 For directed graphs, only outgoing adjacencies are included.
361 Returns
362 -------
363 adj_iter : iterator
364 An iterator of (node, adjacency set) for all nodes in
365 the graph.
367 """
368 for n in self._adj:
369 yield (n, set(self._adj) - set(self._adj[n]) - {n})