Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/community/quality.py: 28%

75 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-20 07:00 +0000

1"""Functions for measuring the quality of a partition (into 

2communities). 

3 

4""" 

5 

6from itertools import combinations 

7 

8import networkx as nx 

9from networkx import NetworkXError 

10from networkx.algorithms.community.community_utils import is_partition 

11from networkx.utils.decorators import argmap 

12 

13__all__ = ["modularity", "partition_quality"] 

14 

15 

16class NotAPartition(NetworkXError): 

17 """Raised if a given collection is not a partition.""" 

18 

19 def __init__(self, G, collection): 

20 msg = f"{collection} is not a valid partition of the graph {G}" 

21 super().__init__(msg) 

22 

23 

24def _require_partition(G, partition): 

25 """Decorator to check that a valid partition is input to a function 

26 

27 Raises :exc:`networkx.NetworkXError` if the partition is not valid. 

28 

29 This decorator should be used on functions whose first two arguments 

30 are a graph and a partition of the nodes of that graph (in that 

31 order):: 

32 

33 >>> @require_partition 

34 ... def foo(G, partition): 

35 ... print("partition is valid!") 

36 ... 

37 >>> G = nx.complete_graph(5) 

38 >>> partition = [{0, 1}, {2, 3}, {4}] 

39 >>> foo(G, partition) 

40 partition is valid! 

41 >>> partition = [{0}, {2, 3}, {4}] 

42 >>> foo(G, partition) 

43 Traceback (most recent call last): 

44 ... 

45 networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G 

46 >>> partition = [{0, 1}, {1, 2, 3}, {4}] 

47 >>> foo(G, partition) 

48 Traceback (most recent call last): 

49 ... 

50 networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G 

51 

52 """ 

53 if is_partition(G, partition): 

54 return G, partition 

55 raise nx.NetworkXError("`partition` is not a valid partition of the nodes of G") 

56 

57 

58require_partition = argmap(_require_partition, (0, 1)) 

59 

60 

61@nx._dispatch 

62def intra_community_edges(G, partition): 

63 """Returns the number of intra-community edges for a partition of `G`. 

64 

65 Parameters 

66 ---------- 

67 G : NetworkX graph. 

68 

69 partition : iterable of sets of nodes 

70 This must be a partition of the nodes of `G`. 

71 

72 The "intra-community edges" are those edges joining a pair of nodes 

73 in the same block of the partition. 

74 

75 """ 

76 return sum(G.subgraph(block).size() for block in partition) 

77 

78 

79@nx._dispatch 

80def inter_community_edges(G, partition): 

81 """Returns the number of inter-community edges for a partition of `G`. 

82 according to the given 

83 partition of the nodes of `G`. 

84 

85 Parameters 

86 ---------- 

87 G : NetworkX graph. 

88 

89 partition : iterable of sets of nodes 

90 This must be a partition of the nodes of `G`. 

91 

92 The *inter-community edges* are those edges joining a pair of nodes 

93 in different blocks of the partition. 

94 

95 Implementation note: this function creates an intermediate graph 

96 that may require the same amount of memory as that of `G`. 

97 

98 """ 

99 # Alternate implementation that does not require constructing a new 

100 # graph object (but does require constructing an affiliation 

101 # dictionary): 

102 # 

103 # aff = dict(chain.from_iterable(((v, block) for v in block) 

104 # for block in partition)) 

105 # return sum(1 for u, v in G.edges() if aff[u] != aff[v]) 

106 # 

107 MG = nx.MultiDiGraph if G.is_directed() else nx.MultiGraph 

108 return nx.quotient_graph(G, partition, create_using=MG).size() 

109 

110 

111@nx._dispatch 

112def inter_community_non_edges(G, partition): 

113 """Returns the number of inter-community non-edges according to the 

114 given partition of the nodes of `G`. 

115 

116 Parameters 

117 ---------- 

118 G : NetworkX graph. 

119 

120 partition : iterable of sets of nodes 

121 This must be a partition of the nodes of `G`. 

122 

123 A *non-edge* is a pair of nodes (undirected if `G` is undirected) 

124 that are not adjacent in `G`. The *inter-community non-edges* are 

125 those non-edges on a pair of nodes in different blocks of the 

126 partition. 

127 

128 Implementation note: this function creates two intermediate graphs, 

129 which may require up to twice the amount of memory as required to 

130 store `G`. 

131 

132 """ 

133 # Alternate implementation that does not require constructing two 

134 # new graph objects (but does require constructing an affiliation 

135 # dictionary): 

136 # 

137 # aff = dict(chain.from_iterable(((v, block) for v in block) 

138 # for block in partition)) 

139 # return sum(1 for u, v in nx.non_edges(G) if aff[u] != aff[v]) 

140 # 

141 return inter_community_edges(nx.complement(G), partition) 

142 

143 

144@nx._dispatch(edge_attrs="weight") 

145def modularity(G, communities, weight="weight", resolution=1): 

146 r"""Returns the modularity of the given partition of the graph. 

147 

148 Modularity is defined in [1]_ as 

149 

150 .. math:: 

151 Q = \frac{1}{2m} \sum_{ij} \left( A_{ij} - \gamma\frac{k_ik_j}{2m}\right) 

152 \delta(c_i,c_j) 

153 

154 where $m$ is the number of edges (or sum of all edge weights as in [5]_), 

155 $A$ is the adjacency matrix of `G`, $k_i$ is the (weighted) degree of $i$, 

156 $\gamma$ is the resolution parameter, and $\delta(c_i, c_j)$ is 1 if $i$ and 

157 $j$ are in the same community else 0. 

158 

159 According to [2]_ (and verified by some algebra) this can be reduced to 

160 

161 .. math:: 

162 Q = \sum_{c=1}^{n} 

163 \left[ \frac{L_c}{m} - \gamma\left( \frac{k_c}{2m} \right) ^2 \right] 

164 

165 where the sum iterates over all communities $c$, $m$ is the number of edges, 

166 $L_c$ is the number of intra-community links for community $c$, 

167 $k_c$ is the sum of degrees of the nodes in community $c$, 

168 and $\gamma$ is the resolution parameter. 

169 

170 The resolution parameter sets an arbitrary tradeoff between intra-group 

171 edges and inter-group edges. More complex grouping patterns can be 

172 discovered by analyzing the same network with multiple values of gamma 

173 and then combining the results [3]_. That said, it is very common to 

174 simply use gamma=1. More on the choice of gamma is in [4]_. 

175 

176 The second formula is the one actually used in calculation of the modularity. 

177 For directed graphs the second formula replaces $k_c$ with $k^{in}_c k^{out}_c$. 

178 

179 Parameters 

180 ---------- 

181 G : NetworkX Graph 

182 

183 communities : list or iterable of set of nodes 

184 These node sets must represent a partition of G's nodes. 

185 

186 weight : string or None, optional (default="weight") 

187 The edge attribute that holds the numerical value used 

188 as a weight. If None or an edge does not have that attribute, 

189 then that edge has weight 1. 

190 

191 resolution : float (default=1) 

192 If resolution is less than 1, modularity favors larger communities. 

193 Greater than 1 favors smaller communities. 

194 

195 Returns 

196 ------- 

197 Q : float 

198 The modularity of the partition. 

199 

200 Raises 

201 ------ 

202 NotAPartition 

203 If `communities` is not a partition of the nodes of `G`. 

204 

205 Examples 

206 -------- 

207 >>> G = nx.barbell_graph(3, 0) 

208 >>> nx.community.modularity(G, [{0, 1, 2}, {3, 4, 5}]) 

209 0.35714285714285715 

210 >>> nx.community.modularity(G, nx.community.label_propagation_communities(G)) 

211 0.35714285714285715 

212 

213 References 

214 ---------- 

215 .. [1] M. E. J. Newman "Networks: An Introduction", page 224. 

216 Oxford University Press, 2011. 

217 .. [2] Clauset, Aaron, Mark EJ Newman, and Cristopher Moore. 

218 "Finding community structure in very large networks." 

219 Phys. Rev. E 70.6 (2004). <https://arxiv.org/abs/cond-mat/0408187> 

220 .. [3] Reichardt and Bornholdt "Statistical Mechanics of Community Detection" 

221 Phys. Rev. E 74, 016110, 2006. https://doi.org/10.1103/PhysRevE.74.016110 

222 .. [4] M. E. J. Newman, "Equivalence between modularity optimization and 

223 maximum likelihood methods for community detection" 

224 Phys. Rev. E 94, 052315, 2016. https://doi.org/10.1103/PhysRevE.94.052315 

225 .. [5] Blondel, V.D. et al. "Fast unfolding of communities in large 

226 networks" J. Stat. Mech 10008, 1-12 (2008). 

227 https://doi.org/10.1088/1742-5468/2008/10/P10008 

228 """ 

229 if not isinstance(communities, list): 

230 communities = list(communities) 

231 if not is_partition(G, communities): 

232 raise NotAPartition(G, communities) 

233 

234 directed = G.is_directed() 

235 if directed: 

236 out_degree = dict(G.out_degree(weight=weight)) 

237 in_degree = dict(G.in_degree(weight=weight)) 

238 m = sum(out_degree.values()) 

239 norm = 1 / m**2 

240 else: 

241 out_degree = in_degree = dict(G.degree(weight=weight)) 

242 deg_sum = sum(out_degree.values()) 

243 m = deg_sum / 2 

244 norm = 1 / deg_sum**2 

245 

246 def community_contribution(community): 

247 comm = set(community) 

248 L_c = sum(wt for u, v, wt in G.edges(comm, data=weight, default=1) if v in comm) 

249 

250 out_degree_sum = sum(out_degree[u] for u in comm) 

251 in_degree_sum = sum(in_degree[u] for u in comm) if directed else out_degree_sum 

252 

253 return L_c / m - resolution * out_degree_sum * in_degree_sum * norm 

254 

255 return sum(map(community_contribution, communities)) 

256 

257 

258@require_partition 

259@nx._dispatch 

260def partition_quality(G, partition): 

261 """Returns the coverage and performance of a partition of G. 

262 

263 The *coverage* of a partition is the ratio of the number of 

264 intra-community edges to the total number of edges in the graph. 

265 

266 The *performance* of a partition is the number of 

267 intra-community edges plus inter-community non-edges divided by the total 

268 number of potential edges. 

269 

270 This algorithm has complexity $O(C^2 + L)$ where C is the number of communities and L is the number of links. 

271 

272 Parameters 

273 ---------- 

274 G : NetworkX graph 

275 

276 partition : sequence 

277 Partition of the nodes of `G`, represented as a sequence of 

278 sets of nodes (blocks). Each block of the partition represents a 

279 community. 

280 

281 Returns 

282 ------- 

283 (float, float) 

284 The (coverage, performance) tuple of the partition, as defined above. 

285 

286 Raises 

287 ------ 

288 NetworkXError 

289 If `partition` is not a valid partition of the nodes of `G`. 

290 

291 Notes 

292 ----- 

293 If `G` is a multigraph; 

294 - for coverage, the multiplicity of edges is counted 

295 - for performance, the result is -1 (total number of possible edges is not defined) 

296 

297 References 

298 ---------- 

299 .. [1] Santo Fortunato. 

300 "Community Detection in Graphs". 

301 *Physical Reports*, Volume 486, Issue 3--5 pp. 75--174 

302 <https://arxiv.org/abs/0906.0612> 

303 """ 

304 

305 node_community = {} 

306 for i, community in enumerate(partition): 

307 for node in community: 

308 node_community[node] = i 

309 

310 # `performance` is not defined for multigraphs 

311 if not G.is_multigraph(): 

312 # Iterate over the communities, quadratic, to calculate `possible_inter_community_edges` 

313 possible_inter_community_edges = sum( 

314 len(p1) * len(p2) for p1, p2 in combinations(partition, 2) 

315 ) 

316 

317 if G.is_directed(): 

318 possible_inter_community_edges *= 2 

319 else: 

320 possible_inter_community_edges = 0 

321 

322 # Compute the number of edges in the complete graph -- `n` nodes, 

323 # directed or undirected, depending on `G` 

324 n = len(G) 

325 total_pairs = n * (n - 1) 

326 if not G.is_directed(): 

327 total_pairs //= 2 

328 

329 intra_community_edges = 0 

330 inter_community_non_edges = possible_inter_community_edges 

331 

332 # Iterate over the links to count `intra_community_edges` and `inter_community_non_edges` 

333 for e in G.edges(): 

334 if node_community[e[0]] == node_community[e[1]]: 

335 intra_community_edges += 1 

336 else: 

337 inter_community_non_edges -= 1 

338 

339 coverage = intra_community_edges / len(G.edges) 

340 

341 if G.is_multigraph(): 

342 performance = -1.0 

343 else: 

344 performance = (intra_community_edges + inter_community_non_edges) / total_pairs 

345 

346 return coverage, performance