Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/approximation/kcomponents.py: 26%

148 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-20 07:00 +0000

1""" Fast approximation for k-component structure 

2""" 

3import itertools 

4from collections import defaultdict 

5from collections.abc import Mapping 

6from functools import cached_property 

7 

8import networkx as nx 

9from networkx.algorithms.approximation import local_node_connectivity 

10from networkx.exception import NetworkXError 

11from networkx.utils import not_implemented_for 

12 

13__all__ = ["k_components"] 

14 

15 

16@not_implemented_for("directed") 

17@nx._dispatch(name="approximate_k_components") 

18def k_components(G, min_density=0.95): 

19 r"""Returns the approximate k-component structure of a graph G. 

20 

21 A `k`-component is a maximal subgraph of a graph G that has, at least, 

22 node connectivity `k`: we need to remove at least `k` nodes to break it 

23 into more components. `k`-components have an inherent hierarchical 

24 structure because they are nested in terms of connectivity: a connected 

25 graph can contain several 2-components, each of which can contain 

26 one or more 3-components, and so forth. 

27 

28 This implementation is based on the fast heuristics to approximate 

29 the `k`-component structure of a graph [1]_. Which, in turn, it is based on 

30 a fast approximation algorithm for finding good lower bounds of the number 

31 of node independent paths between two nodes [2]_. 

32 

33 Parameters 

34 ---------- 

35 G : NetworkX graph 

36 Undirected graph 

37 

38 min_density : Float 

39 Density relaxation threshold. Default value 0.95 

40 

41 Returns 

42 ------- 

43 k_components : dict 

44 Dictionary with connectivity level `k` as key and a list of 

45 sets of nodes that form a k-component of level `k` as values. 

46 

47 Raises 

48 ------ 

49 NetworkXNotImplemented 

50 If G is directed. 

51 

52 Examples 

53 -------- 

54 >>> # Petersen graph has 10 nodes and it is triconnected, thus all 

55 >>> # nodes are in a single component on all three connectivity levels 

56 >>> from networkx.algorithms import approximation as apxa 

57 >>> G = nx.petersen_graph() 

58 >>> k_components = apxa.k_components(G) 

59 

60 Notes 

61 ----- 

62 The logic of the approximation algorithm for computing the `k`-component 

63 structure [1]_ is based on repeatedly applying simple and fast algorithms 

64 for `k`-cores and biconnected components in order to narrow down the 

65 number of pairs of nodes over which we have to compute White and Newman's 

66 approximation algorithm for finding node independent paths [2]_. More 

67 formally, this algorithm is based on Whitney's theorem, which states 

68 an inclusion relation among node connectivity, edge connectivity, and 

69 minimum degree for any graph G. This theorem implies that every 

70 `k`-component is nested inside a `k`-edge-component, which in turn, 

71 is contained in a `k`-core. Thus, this algorithm computes node independent 

72 paths among pairs of nodes in each biconnected part of each `k`-core, 

73 and repeats this procedure for each `k` from 3 to the maximal core number 

74 of a node in the input graph. 

75 

76 Because, in practice, many nodes of the core of level `k` inside a 

77 bicomponent actually are part of a component of level k, the auxiliary 

78 graph needed for the algorithm is likely to be very dense. Thus, we use 

79 a complement graph data structure (see `AntiGraph`) to save memory. 

80 AntiGraph only stores information of the edges that are *not* present 

81 in the actual auxiliary graph. When applying algorithms to this 

82 complement graph data structure, it behaves as if it were the dense 

83 version. 

84 

85 See also 

86 -------- 

87 k_components 

88 

89 References 

90 ---------- 

91 .. [1] Torrents, J. and F. Ferraro (2015) Structural Cohesion: 

92 Visualization and Heuristics for Fast Computation. 

93 https://arxiv.org/pdf/1503.04476v1 

94 

95 .. [2] White, Douglas R., and Mark Newman (2001) A Fast Algorithm for 

96 Node-Independent Paths. Santa Fe Institute Working Paper #01-07-035 

97 https://www.santafe.edu/research/results/working-papers/fast-approximation-algorithms-for-finding-node-ind 

98 

99 .. [3] Moody, J. and D. White (2003). Social cohesion and embeddedness: 

100 A hierarchical conception of social groups. 

101 American Sociological Review 68(1), 103--28. 

102 https://doi.org/10.2307/3088904 

103 

104 """ 

105 # Dictionary with connectivity level (k) as keys and a list of 

106 # sets of nodes that form a k-component as values 

107 k_components = defaultdict(list) 

108 # make a few functions local for speed 

109 node_connectivity = local_node_connectivity 

110 k_core = nx.k_core 

111 core_number = nx.core_number 

112 biconnected_components = nx.biconnected_components 

113 combinations = itertools.combinations 

114 # Exact solution for k = {1,2} 

115 # There is a linear time algorithm for triconnectivity, if we had an 

116 # implementation available we could start from k = 4. 

117 for component in nx.connected_components(G): 

118 # isolated nodes have connectivity 0 

119 comp = set(component) 

120 if len(comp) > 1: 

121 k_components[1].append(comp) 

122 for bicomponent in nx.biconnected_components(G): 

123 # avoid considering dyads as bicomponents 

124 bicomp = set(bicomponent) 

125 if len(bicomp) > 2: 

126 k_components[2].append(bicomp) 

127 # There is no k-component of k > maximum core number 

128 # \kappa(G) <= \lambda(G) <= \delta(G) 

129 g_cnumber = core_number(G) 

130 max_core = max(g_cnumber.values()) 

131 for k in range(3, max_core + 1): 

132 C = k_core(G, k, core_number=g_cnumber) 

133 for nodes in biconnected_components(C): 

134 # Build a subgraph SG induced by the nodes that are part of 

135 # each biconnected component of the k-core subgraph C. 

136 if len(nodes) < k: 

137 continue 

138 SG = G.subgraph(nodes) 

139 # Build auxiliary graph 

140 H = _AntiGraph() 

141 H.add_nodes_from(SG.nodes()) 

142 for u, v in combinations(SG, 2): 

143 K = node_connectivity(SG, u, v, cutoff=k) 

144 if k > K: 

145 H.add_edge(u, v) 

146 for h_nodes in biconnected_components(H): 

147 if len(h_nodes) <= k: 

148 continue 

149 SH = H.subgraph(h_nodes) 

150 for Gc in _cliques_heuristic(SG, SH, k, min_density): 

151 for k_nodes in biconnected_components(Gc): 

152 Gk = nx.k_core(SG.subgraph(k_nodes), k) 

153 if len(Gk) <= k: 

154 continue 

155 k_components[k].append(set(Gk)) 

156 return k_components 

157 

158 

159def _cliques_heuristic(G, H, k, min_density): 

160 h_cnumber = nx.core_number(H) 

161 for i, c_value in enumerate(sorted(set(h_cnumber.values()), reverse=True)): 

162 cands = {n for n, c in h_cnumber.items() if c == c_value} 

163 # Skip checking for overlap for the highest core value 

164 if i == 0: 

165 overlap = False 

166 else: 

167 overlap = set.intersection( 

168 *[{x for x in H[n] if x not in cands} for n in cands] 

169 ) 

170 if overlap and len(overlap) < k: 

171 SH = H.subgraph(cands | overlap) 

172 else: 

173 SH = H.subgraph(cands) 

174 sh_cnumber = nx.core_number(SH) 

175 SG = nx.k_core(G.subgraph(SH), k) 

176 while not (_same(sh_cnumber) and nx.density(SH) >= min_density): 

177 # This subgraph must be writable => .copy() 

178 SH = H.subgraph(SG).copy() 

179 if len(SH) <= k: 

180 break 

181 sh_cnumber = nx.core_number(SH) 

182 sh_deg = dict(SH.degree()) 

183 min_deg = min(sh_deg.values()) 

184 SH.remove_nodes_from(n for n, d in sh_deg.items() if d == min_deg) 

185 SG = nx.k_core(G.subgraph(SH), k) 

186 else: 

187 yield SG 

188 

189 

190def _same(measure, tol=0): 

191 vals = set(measure.values()) 

192 if (max(vals) - min(vals)) <= tol: 

193 return True 

194 return False 

195 

196 

197class _AntiGraph(nx.Graph): 

198 """ 

199 Class for complement graphs. 

200 

201 The main goal is to be able to work with big and dense graphs with 

202 a low memory footprint. 

203 

204 In this class you add the edges that *do not exist* in the dense graph, 

205 the report methods of the class return the neighbors, the edges and 

206 the degree as if it was the dense graph. Thus it's possible to use 

207 an instance of this class with some of NetworkX functions. In this 

208 case we only use k-core, connected_components, and biconnected_components. 

209 """ 

210 

211 all_edge_dict = {"weight": 1} 

212 

213 def single_edge_dict(self): 

214 return self.all_edge_dict 

215 

216 edge_attr_dict_factory = single_edge_dict # type: ignore[assignment] 

217 

218 def __getitem__(self, n): 

219 """Returns a dict of neighbors of node n in the dense graph. 

220 

221 Parameters 

222 ---------- 

223 n : node 

224 A node in the graph. 

225 

226 Returns 

227 ------- 

228 adj_dict : dictionary 

229 The adjacency dictionary for nodes connected to n. 

230 

231 """ 

232 all_edge_dict = self.all_edge_dict 

233 return { 

234 node: all_edge_dict for node in set(self._adj) - set(self._adj[n]) - {n} 

235 } 

236 

237 def neighbors(self, n): 

238 """Returns an iterator over all neighbors of node n in the 

239 dense graph. 

240 """ 

241 try: 

242 return iter(set(self._adj) - set(self._adj[n]) - {n}) 

243 except KeyError as err: 

244 raise NetworkXError(f"The node {n} is not in the graph.") from err 

245 

246 class AntiAtlasView(Mapping): 

247 """An adjacency inner dict for AntiGraph""" 

248 

249 def __init__(self, graph, node): 

250 self._graph = graph 

251 self._atlas = graph._adj[node] 

252 self._node = node 

253 

254 def __len__(self): 

255 return len(self._graph) - len(self._atlas) - 1 

256 

257 def __iter__(self): 

258 return (n for n in self._graph if n not in self._atlas and n != self._node) 

259 

260 def __getitem__(self, nbr): 

261 nbrs = set(self._graph._adj) - set(self._atlas) - {self._node} 

262 if nbr in nbrs: 

263 return self._graph.all_edge_dict 

264 raise KeyError(nbr) 

265 

266 class AntiAdjacencyView(AntiAtlasView): 

267 """An adjacency outer dict for AntiGraph""" 

268 

269 def __init__(self, graph): 

270 self._graph = graph 

271 self._atlas = graph._adj 

272 

273 def __len__(self): 

274 return len(self._atlas) 

275 

276 def __iter__(self): 

277 return iter(self._graph) 

278 

279 def __getitem__(self, node): 

280 if node not in self._graph: 

281 raise KeyError(node) 

282 return self._graph.AntiAtlasView(self._graph, node) 

283 

284 @cached_property 

285 def adj(self): 

286 return self.AntiAdjacencyView(self) 

287 

288 def subgraph(self, nodes): 

289 """This subgraph method returns a full AntiGraph. Not a View""" 

290 nodes = set(nodes) 

291 G = _AntiGraph() 

292 G.add_nodes_from(nodes) 

293 for n in G: 

294 Gnbrs = G.adjlist_inner_dict_factory() 

295 G._adj[n] = Gnbrs 

296 for nbr, d in self._adj[n].items(): 

297 if nbr in G._adj: 

298 Gnbrs[nbr] = d 

299 G._adj[nbr][n] = d 

300 G.graph = self.graph 

301 return G 

302 

303 class AntiDegreeView(nx.reportviews.DegreeView): 

304 def __iter__(self): 

305 all_nodes = set(self._succ) 

306 for n in self._nodes: 

307 nbrs = all_nodes - set(self._succ[n]) - {n} 

308 yield (n, len(nbrs)) 

309 

310 def __getitem__(self, n): 

311 nbrs = set(self._succ) - set(self._succ[n]) - {n} 

312 # AntiGraph is a ThinGraph so all edges have weight 1 

313 return len(nbrs) + (n in nbrs) 

314 

315 @cached_property 

316 def degree(self): 

317 """Returns an iterator for (node, degree) and degree for single node. 

318 

319 The node degree is the number of edges adjacent to the node. 

320 

321 Parameters 

322 ---------- 

323 nbunch : iterable container, optional (default=all nodes) 

324 A container of nodes. The container will be iterated 

325 through once. 

326 

327 weight : string or None, optional (default=None) 

328 The edge attribute that holds the numerical value used 

329 as a weight. If None, then each edge has weight 1. 

330 The degree is the sum of the edge weights adjacent to the node. 

331 

332 Returns 

333 ------- 

334 deg: 

335 Degree of the node, if a single node is passed as argument. 

336 nd_iter : an iterator 

337 The iterator returns two-tuples of (node, degree). 

338 

339 See Also 

340 -------- 

341 degree 

342 

343 Examples 

344 -------- 

345 >>> G = nx.path_graph(4) 

346 >>> G.degree(0) # node 0 with degree 1 

347 1 

348 >>> list(G.degree([0, 1])) 

349 [(0, 1), (1, 2)] 

350 

351 """ 

352 return self.AntiDegreeView(self) 

353 

354 def adjacency(self): 

355 """Returns an iterator of (node, adjacency set) tuples for all nodes 

356 in the dense graph. 

357 

358 This is the fastest way to look at every edge. 

359 For directed graphs, only outgoing adjacencies are included. 

360 

361 Returns 

362 ------- 

363 adj_iter : iterator 

364 An iterator of (node, adjacency set) for all nodes in 

365 the graph. 

366 

367 """ 

368 for n in self._adj: 

369 yield (n, set(self._adj) - set(self._adj[n]) - {n})