Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/networkx/algorithms/community/label_propagation.py: 18%

101 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-20 07:00 +0000

1""" 

2Label propagation community detection algorithms. 

3""" 

4from collections import Counter, defaultdict, deque 

5 

6import networkx as nx 

7from networkx.utils import groups, not_implemented_for, py_random_state 

8 

9__all__ = [ 

10 "label_propagation_communities", 

11 "asyn_lpa_communities", 

12 "fast_label_propagation_communities", 

13] 

14 

15 

16@py_random_state("seed") 

17@nx._dispatch(edge_attrs="weight") 

18def fast_label_propagation_communities(G, *, weight=None, seed=None): 

19 """Returns communities in `G` as detected by fast label propagation. 

20 

21 The fast label propagation algorithm is described in [1]_. The algorithm is 

22 probabilistic and the found communities may vary in different executions. 

23 

24 The algorithm operates as follows. First, the community label of each node is 

25 set to a unique label. The algorithm then repeatedly updates the labels of 

26 the nodes to the most frequent label in their neighborhood. In case of ties, 

27 a random label is chosen from the most frequent labels. 

28 

29 The algorithm maintains a queue of nodes that still need to be processed. 

30 Initially, all nodes are added to the queue in a random order. Then the nodes 

31 are removed from the queue one by one and processed. If a node updates its label, 

32 all its neighbors that have a different label are added to the queue (if not 

33 already in the queue). The algorithm stops when the queue is empty. 

34 

35 Parameters 

36 ---------- 

37 G : Graph, DiGraph, MultiGraph, or MultiDiGraph 

38 Any NetworkX graph. 

39 

40 weight : string, or None (default) 

41 The edge attribute representing a non-negative weight of an edge. If None, 

42 each edge is assumed to have weight one. The weight of an edge is used in 

43 determining the frequency with which a label appears among the neighbors of 

44 a node (edge with weight `w` is equivalent to `w` unweighted edges). 

45 

46 seed : integer, random_state, or None (default) 

47 Indicator of random number generation state. See :ref:`Randomness<randomness>`. 

48 

49 Returns 

50 ------- 

51 communities : iterable 

52 Iterable of communities given as sets of nodes. 

53 

54 Notes 

55 ----- 

56 Edge directions are ignored for directed graphs. 

57 Edge weights must be non-negative numbers. 

58 

59 References 

60 ---------- 

61 .. [1] Vincent A. Traag & Lovro Šubelj. "Large network community detection by 

62 fast label propagation." Scientific Reports 13 (2023): 2701. 

63 https://doi.org/10.1038/s41598-023-29610-z 

64 """ 

65 

66 # Queue of nodes to be processed. 

67 nodes_queue = deque(G) 

68 seed.shuffle(nodes_queue) 

69 

70 # Set of nodes in the queue. 

71 nodes_set = set(G) 

72 

73 # Assign unique label to each node. 

74 comms = {node: i for i, node in enumerate(G)} 

75 

76 while nodes_queue: 

77 # Remove next node from the queue to process. 

78 node = nodes_queue.popleft() 

79 nodes_set.remove(node) 

80 

81 # Isolated nodes retain their initial label. 

82 if G.degree(node) > 0: 

83 # Compute frequency of labels in node's neighborhood. 

84 label_freqs = _fast_label_count(G, comms, node, weight) 

85 max_freq = max(label_freqs.values()) 

86 

87 # Always sample new label from most frequent labels. 

88 comm = seed.choice( 

89 [comm for comm in label_freqs if label_freqs[comm] == max_freq] 

90 ) 

91 

92 if comms[node] != comm: 

93 comms[node] = comm 

94 

95 # Add neighbors that have different label to the queue. 

96 for nbr in nx.all_neighbors(G, node): 

97 if comms[nbr] != comm and nbr not in nodes_set: 

98 nodes_queue.append(nbr) 

99 nodes_set.add(nbr) 

100 

101 yield from groups(comms).values() 

102 

103 

104def _fast_label_count(G, comms, node, weight=None): 

105 """Computes the frequency of labels in the neighborhood of a node. 

106 

107 Returns a dictionary keyed by label to the frequency of that label. 

108 """ 

109 

110 if weight is None: 

111 # Unweighted (un)directed simple graph. 

112 if not G.is_multigraph(): 

113 label_freqs = Counter(map(comms.get, nx.all_neighbors(G, node))) 

114 

115 # Unweighted (un)directed multigraph. 

116 else: 

117 label_freqs = defaultdict(int) 

118 for nbr in G[node]: 

119 label_freqs[comms[nbr]] += len(G[node][nbr]) 

120 

121 if G.is_directed(): 

122 for nbr in G.pred[node]: 

123 label_freqs[comms[nbr]] += len(G.pred[node][nbr]) 

124 

125 else: 

126 # Weighted undirected simple/multigraph. 

127 label_freqs = defaultdict(float) 

128 for _, nbr, w in G.edges(node, data=weight, default=1): 

129 label_freqs[comms[nbr]] += w 

130 

131 # Weighted directed simple/multigraph. 

132 if G.is_directed(): 

133 for nbr, _, w in G.in_edges(node, data=weight, default=1): 

134 label_freqs[comms[nbr]] += w 

135 

136 return label_freqs 

137 

138 

139@py_random_state(2) 

140@nx._dispatch(edge_attrs="weight") 

141def asyn_lpa_communities(G, weight=None, seed=None): 

142 """Returns communities in `G` as detected by asynchronous label 

143 propagation. 

144 

145 The asynchronous label propagation algorithm is described in 

146 [1]_. The algorithm is probabilistic and the found communities may 

147 vary on different executions. 

148 

149 The algorithm proceeds as follows. After initializing each node with 

150 a unique label, the algorithm repeatedly sets the label of a node to 

151 be the label that appears most frequently among that nodes 

152 neighbors. The algorithm halts when each node has the label that 

153 appears most frequently among its neighbors. The algorithm is 

154 asynchronous because each node is updated without waiting for 

155 updates on the remaining nodes. 

156 

157 This generalized version of the algorithm in [1]_ accepts edge 

158 weights. 

159 

160 Parameters 

161 ---------- 

162 G : Graph 

163 

164 weight : string 

165 The edge attribute representing the weight of an edge. 

166 If None, each edge is assumed to have weight one. In this 

167 algorithm, the weight of an edge is used in determining the 

168 frequency with which a label appears among the neighbors of a 

169 node: a higher weight means the label appears more often. 

170 

171 seed : integer, random_state, or None (default) 

172 Indicator of random number generation state. 

173 See :ref:`Randomness<randomness>`. 

174 

175 Returns 

176 ------- 

177 communities : iterable 

178 Iterable of communities given as sets of nodes. 

179 

180 Notes 

181 ----- 

182 Edge weight attributes must be numerical. 

183 

184 References 

185 ---------- 

186 .. [1] Raghavan, Usha Nandini, Réka Albert, and Soundar Kumara. "Near 

187 linear time algorithm to detect community structures in large-scale 

188 networks." Physical Review E 76.3 (2007): 036106. 

189 """ 

190 

191 labels = {n: i for i, n in enumerate(G)} 

192 cont = True 

193 

194 while cont: 

195 cont = False 

196 nodes = list(G) 

197 seed.shuffle(nodes) 

198 

199 for node in nodes: 

200 if not G[node]: 

201 continue 

202 

203 # Get label frequencies among adjacent nodes. 

204 # Depending on the order they are processed in, 

205 # some nodes will be in iteration t and others in t-1, 

206 # making the algorithm asynchronous. 

207 if weight is None: 

208 # initialising a Counter from an iterator of labels is 

209 # faster for getting unweighted label frequencies 

210 label_freq = Counter(map(labels.get, G[node])) 

211 else: 

212 # updating a defaultdict is substantially faster 

213 # for getting weighted label frequencies 

214 label_freq = defaultdict(float) 

215 for _, v, wt in G.edges(node, data=weight, default=1): 

216 label_freq[labels[v]] += wt 

217 

218 # Get the labels that appear with maximum frequency. 

219 max_freq = max(label_freq.values()) 

220 best_labels = [ 

221 label for label, freq in label_freq.items() if freq == max_freq 

222 ] 

223 

224 # If the node does not have one of the maximum frequency labels, 

225 # randomly choose one of them and update the node's label. 

226 # Continue the iteration as long as at least one node 

227 # doesn't have a maximum frequency label. 

228 if labels[node] not in best_labels: 

229 labels[node] = seed.choice(best_labels) 

230 cont = True 

231 

232 yield from groups(labels).values() 

233 

234 

235@not_implemented_for("directed") 

236@nx._dispatch 

237def label_propagation_communities(G): 

238 """Generates community sets determined by label propagation 

239 

240 Finds communities in `G` using a semi-synchronous label propagation 

241 method [1]_. This method combines the advantages of both the synchronous 

242 and asynchronous models. Not implemented for directed graphs. 

243 

244 Parameters 

245 ---------- 

246 G : graph 

247 An undirected NetworkX graph. 

248 

249 Returns 

250 ------- 

251 communities : iterable 

252 A dict_values object that contains a set of nodes for each community. 

253 

254 Raises 

255 ------ 

256 NetworkXNotImplemented 

257 If the graph is directed 

258 

259 References 

260 ---------- 

261 .. [1] Cordasco, G., & Gargano, L. (2010, December). Community detection 

262 via semi-synchronous label propagation algorithms. In Business 

263 Applications of Social Network Analysis (BASNA), 2010 IEEE International 

264 Workshop on (pp. 1-8). IEEE. 

265 """ 

266 coloring = _color_network(G) 

267 # Create a unique label for each node in the graph 

268 labeling = {v: k for k, v in enumerate(G)} 

269 while not _labeling_complete(labeling, G): 

270 # Update the labels of every node with the same color. 

271 for color, nodes in coloring.items(): 

272 for n in nodes: 

273 _update_label(n, labeling, G) 

274 

275 clusters = defaultdict(set) 

276 for node, label in labeling.items(): 

277 clusters[label].add(node) 

278 return clusters.values() 

279 

280 

281def _color_network(G): 

282 """Colors the network so that neighboring nodes all have distinct colors. 

283 

284 Returns a dict keyed by color to a set of nodes with that color. 

285 """ 

286 coloring = {} # color => set(node) 

287 colors = nx.coloring.greedy_color(G) 

288 for node, color in colors.items(): 

289 if color in coloring: 

290 coloring[color].add(node) 

291 else: 

292 coloring[color] = {node} 

293 return coloring 

294 

295 

296def _labeling_complete(labeling, G): 

297 """Determines whether or not LPA is done. 

298 

299 Label propagation is complete when all nodes have a label that is 

300 in the set of highest frequency labels amongst its neighbors. 

301 

302 Nodes with no neighbors are considered complete. 

303 """ 

304 return all( 

305 labeling[v] in _most_frequent_labels(v, labeling, G) for v in G if len(G[v]) > 0 

306 ) 

307 

308 

309def _most_frequent_labels(node, labeling, G): 

310 """Returns a set of all labels with maximum frequency in `labeling`. 

311 

312 Input `labeling` should be a dict keyed by node to labels. 

313 """ 

314 if not G[node]: 

315 # Nodes with no neighbors are themselves a community and are labeled 

316 # accordingly, hence the immediate if statement. 

317 return {labeling[node]} 

318 

319 # Compute the frequencies of all neighbours of node 

320 freqs = Counter(labeling[q] for q in G[node]) 

321 max_freq = max(freqs.values()) 

322 return {label for label, freq in freqs.items() if freq == max_freq} 

323 

324 

325def _update_label(node, labeling, G): 

326 """Updates the label of a node using the Prec-Max tie breaking algorithm 

327 

328 The algorithm is explained in: 'Community Detection via Semi-Synchronous 

329 Label Propagation Algorithms' Cordasco and Gargano, 2011 

330 """ 

331 high_labels = _most_frequent_labels(node, labeling, G) 

332 if len(high_labels) == 1: 

333 labeling[node] = high_labels.pop() 

334 elif len(high_labels) > 1: 

335 # Prec-Max 

336 if labeling[node] not in high_labels: 

337 labeling[node] = max(high_labels)