Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/networkx/algorithms/community/local.py: 9%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

66 statements  

1""" 

2Local Community Detection Algorithms 

3 

4Local Community Detection (LCD) aims to detected one or a few communities 

5starting from certain source nodes in the network. This differs from Global 

6Community Detection (GCD), which aims to partition an entire network into 

7communities. 

8 

9LCD is often useful when only a portion of the graph is known or the 

10graph is large enough that GCD is infeasable 

11 

12[1]_ Gives a good introduction and overview of LCD 

13 

14References 

15---------- 

16.. [1] Baltsou, Georgia, Konstantinos Christopoulos, and Konstantinos Tsichlas. 

17 Local community detection: A survey. IEEE Access 10 (2022): 110701-110726. 

18 https://doi.org/10.1109/ACCESS.2022.3213980 

19 

20 

21""" 

22 

23__all__ = ["greedy_source_expansion"] 

24 

25 

26def _clauset_greedy_source_expansion(G, *, source, cutoff=None): 

27 if cutoff is None: 

28 cutoff = float("inf") 

29 C = {source} 

30 B = {source} 

31 U = G[source].keys() - C 

32 T = {frozenset([node, nbr]) for node in B for nbr in G.neighbors(node)} 

33 I = {edge for edge in T if all(node in C for node in edge)} 

34 

35 R_value = 0 

36 while len(C) < cutoff: 

37 if not U: 

38 break 

39 

40 max_R = 0 

41 best_node = None 

42 best_node_B = best_node_T = best_node_I = set() 

43 

44 for v in U: 

45 R_tmp, B_tmp, T_tmp, I_tmp = _calculate_local_modularity_for_candidate( 

46 G, v, C, B, T, I 

47 ) 

48 if R_tmp > max_R: 

49 max_R = R_tmp 

50 best_node = v 

51 best_node_B = B_tmp 

52 best_node_T = T_tmp 

53 best_node_I = I_tmp 

54 

55 C = C | {best_node} 

56 U.update(G[best_node].keys() - C) 

57 U.remove(best_node) 

58 B = best_node_B 

59 T = best_node_T 

60 I = best_node_I 

61 if max_R < R_value: 

62 break 

63 R_value = max_R 

64 

65 return C 

66 

67 

68def _calculate_local_modularity_for_candidate(G, v, C, B, T, I): 

69 """ 

70 Compute the local modularity R and updated variables when adding node v to the community. 

71 

72 Parameters 

73 ---------- 

74 G : NetworkX graph 

75 The input graph. 

76 v : node 

77 The candidate node to add to the community. 

78 C : set 

79 The current set of community nodes. 

80 B : set 

81 The current set of boundary nodes. 

82 T : set of frozenset 

83 The current set of boundary edges. 

84 I : set of frozenset 

85 The current set of internal boundary edges. 

86 

87 Returns 

88 ------- 

89 R_tmp : float 

90 The local modularity after adding node v. 

91 B_tmp : set 

92 The updated set of boundary nodes. 

93 T_tmp : set of frozenset 

94 The updated set of boundary edges. 

95 I_tmp : set of frozenset 

96 The updated set of internal boundary edges. 

97 """ 

98 C_tmp = C | {v} 

99 B_tmp = B.copy() 

100 T_tmp = T.copy() 

101 I_tmp = I.copy() 

102 removed_B_nodes = set() 

103 

104 # Update boundary nodes and edges 

105 for nbr in G[v]: 

106 if nbr not in C_tmp: 

107 # v has nbrs not in the community, so it remains a boundary node 

108 B_tmp.add(v) 

109 # Add edge between v and nbr to boundary edges 

110 T_tmp.add(frozenset([v, nbr])) 

111 

112 if nbr in B: 

113 # Check if nbr should be removed from boundary nodes 

114 # Go through nbrs nbrs to see if it is still a boundary node 

115 nbr_still_in_B = any(nbr_nbr not in C_tmp for nbr_nbr in G[nbr]) 

116 if not nbr_still_in_B: 

117 B_tmp.remove(nbr) 

118 removed_B_nodes.add(nbr) 

119 

120 if nbr in C_tmp: 

121 # Add edge between v and nbr to internal edges 

122 I_tmp.add(frozenset([v, nbr])) 

123 

124 # Remove edges no longer in the boundary 

125 for removed_node in removed_B_nodes: 

126 for removed_node_nbr in G[removed_node]: 

127 if removed_node_nbr not in B_tmp: 

128 T_tmp.discard(frozenset([removed_node_nbr, removed_node])) 

129 I_tmp.discard(frozenset([removed_node_nbr, removed_node])) 

130 

131 R_tmp = len(I_tmp) / len(T_tmp) if len(T_tmp) > 0 else 1 

132 return R_tmp, B_tmp, T_tmp, I_tmp 

133 

134 

135ALGORITHMS = { 

136 "clauset": _clauset_greedy_source_expansion, 

137} 

138 

139 

140def greedy_source_expansion(G, *, source, cutoff=None, method="clauset"): 

141 r"""Find the local community around a source node. 

142 

143 Find the local community around a source node using Greedy Source 

144 Expansion. Greedy Source Expansion generally identifies a local community 

145 starting from the source node and expands it based on the criteria of the 

146 chosen algorithm. 

147 

148 The algorithm is specified with the `method` keyword argument. 

149 

150 * `"clauset"` [1]_ uses local modularity gain to determine local communities. 

151 The algorithm adds nbring nodes that maximize local modularity to the 

152 community iteratively, stopping when no additional nodes improve the modularity 

153 or when a predefined cutoff is reached. 

154 

155 Local modularity measures the density of edges within a community relative 

156 to the total graph. By focusing on local modularity, the algorithm efficiently 

157 uncovers communities around a specific node without requiring global 

158 optimization over the entire graph. 

159 

160 The algorithm assumes that the graph $G$ consists of a known community $C$ and 

161 an unknown set of nodes $U$, which are adjacent to $C$ . The boundary of the 

162 community $B$, consists of nodes in $C$ that have at least one nbr in $U$. 

163 

164 Mathematically, the local modularity is expressed as: 

165 

166 .. math:: 

167 R = \frac{I}{T} 

168 

169 where $T$ is the number of edges with one or more endpoints in $B$, and $I$ is the 

170 number of those edges with neither endpoint in $U$. 

171 

172 Parameters 

173 ---------- 

174 G : NetworkX graph 

175 The input graph. 

176 

177 source : node 

178 The source node from which the community expansion begins. 

179 

180 cutoff : int, optional (default=None) 

181 The maximum number of nodes to include in the community. If None, the algorithm 

182 expands until no further modularity gain can be made. 

183 

184 method : string, optional (default='clauset') 

185 The algorithm to use to carry out greedy source expansion. 

186 Supported options: 'clauset'. Other inputs produce a ValueError 

187 

188 Returns 

189 ------- 

190 set 

191 A set of nodes representing the local community around the source node. 

192 

193 Examples 

194 -------- 

195 >>> G = nx.karate_club_graph() 

196 >>> nx.community.greedy_source_expansion(G, source=16) 

197 {16, 0, 4, 5, 6, 10} 

198 

199 Notes 

200 ----- 

201 This algorithm is designed for detecting local communities around a specific node, 

202 which is useful for large networks where global community detection is computationally 

203 expensive. 

204 

205 The result of the algorithm may vary based on the structure of the graph, the choice of 

206 the source node, and the presence of ties between nodes during the greedy expansion process. 

207 

208 References 

209 ---------- 

210 .. [1] Clauset, Aaron. Finding local community structure in networks. 

211 Physical Review E—Statistical, Nonlinear, and Soft Matter Physics 72, no. 2 (2005): 026132. 

212 https://arxiv.org/pdf/physics/0503036 

213 

214 """ 

215 try: 

216 algo = ALGORITHMS[method] 

217 except KeyError as e: 

218 raise ValueError(f"{method} is not a valid choice for an algorithm.") from e 

219 

220 return algo(G, source=source, cutoff=cutoff)