Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/networkx/algorithms/community/label_propagation.py: 19%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

102 statements  

1""" 

2Label propagation community detection algorithms. 

3""" 

4 

5from collections import Counter, defaultdict, deque 

6 

7import networkx as nx 

8from networkx.utils import groups, not_implemented_for, py_random_state 

9 

10__all__ = [ 

11 "label_propagation_communities", 

12 "asyn_lpa_communities", 

13 "fast_label_propagation_communities", 

14] 

15 

16 

17@py_random_state("seed") 

18@nx._dispatchable(edge_attrs="weight") 

19def fast_label_propagation_communities(G, *, weight=None, seed=None): 

20 """Returns communities in `G` as detected by fast label propagation. 

21 

22 The fast label propagation algorithm is described in [1]_. The algorithm is 

23 probabilistic and the found communities may vary in different executions. 

24 

25 The algorithm operates as follows. First, the community label of each node is 

26 set to a unique label. The algorithm then repeatedly updates the labels of 

27 the nodes to the most frequent label in their neighborhood. In case of ties, 

28 a random label is chosen from the most frequent labels. 

29 

30 The algorithm maintains a queue of nodes that still need to be processed. 

31 Initially, all nodes are added to the queue in a random order. Then the nodes 

32 are removed from the queue one by one and processed. If a node updates its label, 

33 all its neighbors that have a different label are added to the queue (if not 

34 already in the queue). The algorithm stops when the queue is empty. 

35 

36 Parameters 

37 ---------- 

38 G : Graph, DiGraph, MultiGraph, or MultiDiGraph 

39 Any NetworkX graph. 

40 

41 weight : string, or None (default) 

42 The edge attribute representing a non-negative weight of an edge. If None, 

43 each edge is assumed to have weight one. The weight of an edge is used in 

44 determining the frequency with which a label appears among the neighbors of 

45 a node (edge with weight `w` is equivalent to `w` unweighted edges). 

46 

47 seed : integer, random_state, or None (default) 

48 Indicator of random number generation state. See :ref:`Randomness<randomness>`. 

49 

50 Returns 

51 ------- 

52 communities : iterable 

53 Iterable of communities given as sets of nodes. 

54 

55 Notes 

56 ----- 

57 Edge directions are ignored for directed graphs. 

58 Edge weights must be non-negative numbers. 

59 

60 References 

61 ---------- 

62 .. [1] Vincent A. Traag & Lovro Šubelj. "Large network community detection by 

63 fast label propagation." Scientific Reports 13 (2023): 2701. 

64 https://doi.org/10.1038/s41598-023-29610-z 

65 """ 

66 

67 # Queue of nodes to be processed. 

68 nodes_queue = deque(G) 

69 seed.shuffle(nodes_queue) 

70 

71 # Set of nodes in the queue. 

72 nodes_set = set(G) 

73 

74 # Assign unique label to each node. 

75 comms = {node: i for i, node in enumerate(G)} 

76 

77 while nodes_queue: 

78 # Remove next node from the queue to process. 

79 node = nodes_queue.popleft() 

80 nodes_set.remove(node) 

81 

82 # Isolated nodes retain their initial label. 

83 if G.degree(node) > 0: 

84 # Compute frequency of labels in node's neighborhood. 

85 label_freqs = _fast_label_count(G, comms, node, weight) 

86 max_freq = max(label_freqs.values()) 

87 

88 # Always sample new label from most frequent labels. 

89 comm = seed.choice( 

90 [comm for comm in label_freqs if label_freqs[comm] == max_freq] 

91 ) 

92 

93 if comms[node] != comm: 

94 comms[node] = comm 

95 

96 # Add neighbors that have different label to the queue. 

97 for nbr in nx.all_neighbors(G, node): 

98 if comms[nbr] != comm and nbr not in nodes_set: 

99 nodes_queue.append(nbr) 

100 nodes_set.add(nbr) 

101 

102 yield from groups(comms).values() 

103 

104 

105def _fast_label_count(G, comms, node, weight=None): 

106 """Computes the frequency of labels in the neighborhood of a node. 

107 

108 Returns a dictionary keyed by label to the frequency of that label. 

109 """ 

110 

111 if weight is None: 

112 # Unweighted (un)directed simple graph. 

113 if not G.is_multigraph(): 

114 label_freqs = Counter(map(comms.get, nx.all_neighbors(G, node))) 

115 

116 # Unweighted (un)directed multigraph. 

117 else: 

118 label_freqs = defaultdict(int) 

119 for nbr in G[node]: 

120 label_freqs[comms[nbr]] += len(G[node][nbr]) 

121 

122 if G.is_directed(): 

123 for nbr in G.pred[node]: 

124 label_freqs[comms[nbr]] += len(G.pred[node][nbr]) 

125 

126 else: 

127 # Weighted undirected simple/multigraph. 

128 label_freqs = defaultdict(float) 

129 for _, nbr, w in G.edges(node, data=weight, default=1): 

130 label_freqs[comms[nbr]] += w 

131 

132 # Weighted directed simple/multigraph. 

133 if G.is_directed(): 

134 for nbr, _, w in G.in_edges(node, data=weight, default=1): 

135 label_freqs[comms[nbr]] += w 

136 

137 return label_freqs 

138 

139 

140@py_random_state(2) 

141@nx._dispatchable(edge_attrs="weight") 

142def asyn_lpa_communities(G, weight=None, seed=None): 

143 """Returns communities in `G` as detected by asynchronous label 

144 propagation. 

145 

146 The asynchronous label propagation algorithm is described in 

147 [1]_. The algorithm is probabilistic and the found communities may 

148 vary on different executions. 

149 

150 The algorithm proceeds as follows. After initializing each node with 

151 a unique label, the algorithm repeatedly sets the label of a node to 

152 be the label that appears most frequently among that nodes 

153 neighbors. The algorithm halts when each node has the label that 

154 appears most frequently among its neighbors. The algorithm is 

155 asynchronous because each node is updated without waiting for 

156 updates on the remaining nodes. 

157 

158 This generalized version of the algorithm in [1]_ accepts edge 

159 weights. 

160 

161 Parameters 

162 ---------- 

163 G : Graph 

164 

165 weight : string 

166 The edge attribute representing the weight of an edge. 

167 If None, each edge is assumed to have weight one. In this 

168 algorithm, the weight of an edge is used in determining the 

169 frequency with which a label appears among the neighbors of a 

170 node: a higher weight means the label appears more often. 

171 

172 seed : integer, random_state, or None (default) 

173 Indicator of random number generation state. 

174 See :ref:`Randomness<randomness>`. 

175 

176 Returns 

177 ------- 

178 communities : iterable 

179 Iterable of communities given as sets of nodes. 

180 

181 Notes 

182 ----- 

183 Edge weight attributes must be numerical. 

184 

185 References 

186 ---------- 

187 .. [1] Raghavan, Usha Nandini, Réka Albert, and Soundar Kumara. "Near 

188 linear time algorithm to detect community structures in large-scale 

189 networks." Physical Review E 76.3 (2007): 036106. 

190 """ 

191 

192 labels = {n: i for i, n in enumerate(G)} 

193 cont = True 

194 

195 while cont: 

196 cont = False 

197 nodes = list(G) 

198 seed.shuffle(nodes) 

199 

200 for node in nodes: 

201 if not G[node]: 

202 continue 

203 

204 # Get label frequencies among adjacent nodes. 

205 # Depending on the order they are processed in, 

206 # some nodes will be in iteration t and others in t-1, 

207 # making the algorithm asynchronous. 

208 if weight is None: 

209 # initialising a Counter from an iterator of labels is 

210 # faster for getting unweighted label frequencies 

211 label_freq = Counter(map(labels.get, G[node])) 

212 else: 

213 # updating a defaultdict is substantially faster 

214 # for getting weighted label frequencies 

215 label_freq = defaultdict(float) 

216 for _, v, wt in G.edges(node, data=weight, default=1): 

217 label_freq[labels[v]] += wt 

218 

219 # Get the labels that appear with maximum frequency. 

220 max_freq = max(label_freq.values()) 

221 best_labels = [ 

222 label for label, freq in label_freq.items() if freq == max_freq 

223 ] 

224 

225 # If the node does not have one of the maximum frequency labels, 

226 # randomly choose one of them and update the node's label. 

227 # Continue the iteration as long as at least one node 

228 # doesn't have a maximum frequency label. 

229 if labels[node] not in best_labels: 

230 labels[node] = seed.choice(best_labels) 

231 cont = True 

232 

233 yield from groups(labels).values() 

234 

235 

236@not_implemented_for("directed") 

237@nx._dispatchable 

238def label_propagation_communities(G): 

239 """Generates community sets determined by label propagation 

240 

241 Finds communities in `G` using a semi-synchronous label propagation 

242 method [1]_. This method combines the advantages of both the synchronous 

243 and asynchronous models. Not implemented for directed graphs. 

244 

245 Parameters 

246 ---------- 

247 G : graph 

248 An undirected NetworkX graph. 

249 

250 Returns 

251 ------- 

252 communities : iterable 

253 A dict_values object that contains a set of nodes for each community. 

254 

255 Raises 

256 ------ 

257 NetworkXNotImplemented 

258 If the graph is directed 

259 

260 References 

261 ---------- 

262 .. [1] Cordasco, G., & Gargano, L. (2010, December). Community detection 

263 via semi-synchronous label propagation algorithms. In Business 

264 Applications of Social Network Analysis (BASNA), 2010 IEEE International 

265 Workshop on (pp. 1-8). IEEE. 

266 """ 

267 coloring = _color_network(G) 

268 # Create a unique label for each node in the graph 

269 labeling = {v: k for k, v in enumerate(G)} 

270 while not _labeling_complete(labeling, G): 

271 # Update the labels of every node with the same color. 

272 for color, nodes in coloring.items(): 

273 for n in nodes: 

274 _update_label(n, labeling, G) 

275 

276 clusters = defaultdict(set) 

277 for node, label in labeling.items(): 

278 clusters[label].add(node) 

279 return clusters.values() 

280 

281 

282def _color_network(G): 

283 """Colors the network so that neighboring nodes all have distinct colors. 

284 

285 Returns a dict keyed by color to a set of nodes with that color. 

286 """ 

287 coloring = {} # color => set(node) 

288 colors = nx.coloring.greedy_color(G) 

289 for node, color in colors.items(): 

290 if color in coloring: 

291 coloring[color].add(node) 

292 else: 

293 coloring[color] = {node} 

294 return coloring 

295 

296 

297def _labeling_complete(labeling, G): 

298 """Determines whether or not LPA is done. 

299 

300 Label propagation is complete when all nodes have a label that is 

301 in the set of highest frequency labels amongst its neighbors. 

302 

303 Nodes with no neighbors are considered complete. 

304 """ 

305 return all( 

306 labeling[v] in _most_frequent_labels(v, labeling, G) for v in G if len(G[v]) > 0 

307 ) 

308 

309 

310def _most_frequent_labels(node, labeling, G): 

311 """Returns a set of all labels with maximum frequency in `labeling`. 

312 

313 Input `labeling` should be a dict keyed by node to labels. 

314 """ 

315 if not G[node]: 

316 # Nodes with no neighbors are themselves a community and are labeled 

317 # accordingly, hence the immediate if statement. 

318 return {labeling[node]} 

319 

320 # Compute the frequencies of all neighbors of node 

321 freqs = Counter(labeling[q] for q in G[node]) 

322 max_freq = max(freqs.values()) 

323 return {label for label, freq in freqs.items() if freq == max_freq} 

324 

325 

326def _update_label(node, labeling, G): 

327 """Updates the label of a node using the Prec-Max tie breaking algorithm 

328 

329 The algorithm is explained in: 'Community Detection via Semi-Synchronous 

330 Label Propagation Algorithms' Cordasco and Gargano, 2011 

331 """ 

332 high_labels = _most_frequent_labels(node, labeling, G) 

333 if len(high_labels) == 1: 

334 labeling[node] = high_labels.pop() 

335 elif len(high_labels) > 1: 

336 # Prec-Max 

337 if labeling[node] not in high_labels: 

338 labeling[node] = max(high_labels)