1"""
2Local Community Detection Algorithms
3
4Local Community Detection (LCD) aims to detected one or a few communities
5starting from certain source nodes in the network. This differs from Global
6Community Detection (GCD), which aims to partition an entire network into
7communities.
8
9LCD is often useful when only a portion of the graph is known or the
10graph is large enough that GCD is infeasable
11
12[1]_ Gives a good introduction and overview of LCD
13
14References
15----------
16.. [1] Baltsou, Georgia, Konstantinos Christopoulos, and Konstantinos Tsichlas.
17 Local community detection: A survey. IEEE Access 10 (2022): 110701-110726.
18 https://doi.org/10.1109/ACCESS.2022.3213980
19
20
21"""
22
23__all__ = ["greedy_source_expansion"]
24
25
26def _clauset_greedy_source_expansion(G, *, source, cutoff=None):
27 if cutoff is None:
28 cutoff = float("inf")
29 C = {source}
30 B = {source}
31 U = G[source].keys() - C
32 T = {frozenset([node, nbr]) for node in B for nbr in G.neighbors(node)}
33 I = {edge for edge in T if all(node in C for node in edge)}
34
35 R_value = 0
36 while len(C) < cutoff:
37 if not U:
38 break
39
40 max_R = 0
41 best_node = None
42 best_node_B = best_node_T = best_node_I = set()
43
44 for v in U:
45 R_tmp, B_tmp, T_tmp, I_tmp = _calculate_local_modularity_for_candidate(
46 G, v, C, B, T, I
47 )
48 if R_tmp > max_R:
49 max_R = R_tmp
50 best_node = v
51 best_node_B = B_tmp
52 best_node_T = T_tmp
53 best_node_I = I_tmp
54
55 C = C | {best_node}
56 U.update(G[best_node].keys() - C)
57 U.remove(best_node)
58 B = best_node_B
59 T = best_node_T
60 I = best_node_I
61 if max_R < R_value:
62 break
63 R_value = max_R
64
65 return C
66
67
68def _calculate_local_modularity_for_candidate(G, v, C, B, T, I):
69 """
70 Compute the local modularity R and updated variables when adding node v to the community.
71
72 Parameters
73 ----------
74 G : NetworkX graph
75 The input graph.
76 v : node
77 The candidate node to add to the community.
78 C : set
79 The current set of community nodes.
80 B : set
81 The current set of boundary nodes.
82 T : set of frozenset
83 The current set of boundary edges.
84 I : set of frozenset
85 The current set of internal boundary edges.
86
87 Returns
88 -------
89 R_tmp : float
90 The local modularity after adding node v.
91 B_tmp : set
92 The updated set of boundary nodes.
93 T_tmp : set of frozenset
94 The updated set of boundary edges.
95 I_tmp : set of frozenset
96 The updated set of internal boundary edges.
97 """
98 C_tmp = C | {v}
99 B_tmp = B.copy()
100 T_tmp = T.copy()
101 I_tmp = I.copy()
102 removed_B_nodes = set()
103
104 # Update boundary nodes and edges
105 for nbr in G[v]:
106 if nbr not in C_tmp:
107 # v has nbrs not in the community, so it remains a boundary node
108 B_tmp.add(v)
109 # Add edge between v and nbr to boundary edges
110 T_tmp.add(frozenset([v, nbr]))
111
112 if nbr in B:
113 # Check if nbr should be removed from boundary nodes
114 # Go through nbrs nbrs to see if it is still a boundary node
115 nbr_still_in_B = any(nbr_nbr not in C_tmp for nbr_nbr in G[nbr])
116 if not nbr_still_in_B:
117 B_tmp.remove(nbr)
118 removed_B_nodes.add(nbr)
119
120 if nbr in C_tmp:
121 # Add edge between v and nbr to internal edges
122 I_tmp.add(frozenset([v, nbr]))
123
124 # Remove edges no longer in the boundary
125 for removed_node in removed_B_nodes:
126 for removed_node_nbr in G[removed_node]:
127 if removed_node_nbr not in B_tmp:
128 T_tmp.discard(frozenset([removed_node_nbr, removed_node]))
129 I_tmp.discard(frozenset([removed_node_nbr, removed_node]))
130
131 R_tmp = len(I_tmp) / len(T_tmp) if len(T_tmp) > 0 else 1
132 return R_tmp, B_tmp, T_tmp, I_tmp
133
134
135ALGORITHMS = {
136 "clauset": _clauset_greedy_source_expansion,
137}
138
139
140def greedy_source_expansion(G, *, source, cutoff=None, method="clauset"):
141 r"""Find the local community around a source node.
142
143 Find the local community around a source node using Greedy Source
144 Expansion. Greedy Source Expansion generally identifies a local community
145 starting from the source node and expands it based on the criteria of the
146 chosen algorithm.
147
148 The algorithm is specified with the `method` keyword argument.
149
150 * `"clauset"` [1]_ uses local modularity gain to determine local communities.
151 The algorithm adds nbring nodes that maximize local modularity to the
152 community iteratively, stopping when no additional nodes improve the modularity
153 or when a predefined cutoff is reached.
154
155 Local modularity measures the density of edges within a community relative
156 to the total graph. By focusing on local modularity, the algorithm efficiently
157 uncovers communities around a specific node without requiring global
158 optimization over the entire graph.
159
160 The algorithm assumes that the graph $G$ consists of a known community $C$ and
161 an unknown set of nodes $U$, which are adjacent to $C$ . The boundary of the
162 community $B$, consists of nodes in $C$ that have at least one nbr in $U$.
163
164 Mathematically, the local modularity is expressed as:
165
166 .. math::
167 R = \frac{I}{T}
168
169 where $T$ is the number of edges with one or more endpoints in $B$, and $I$ is the
170 number of those edges with neither endpoint in $U$.
171
172 Parameters
173 ----------
174 G : NetworkX graph
175 The input graph.
176
177 source : node
178 The source node from which the community expansion begins.
179
180 cutoff : int, optional (default=None)
181 The maximum number of nodes to include in the community. If None, the algorithm
182 expands until no further modularity gain can be made.
183
184 method : string, optional (default='clauset')
185 The algorithm to use to carry out greedy source expansion.
186 Supported options: 'clauset'. Other inputs produce a ValueError
187
188 Returns
189 -------
190 set
191 A set of nodes representing the local community around the source node.
192
193 Examples
194 --------
195 >>> G = nx.karate_club_graph()
196 >>> nx.community.greedy_source_expansion(G, source=16)
197 {16, 0, 4, 5, 6, 10}
198
199 Notes
200 -----
201 This algorithm is designed for detecting local communities around a specific node,
202 which is useful for large networks where global community detection is computationally
203 expensive.
204
205 The result of the algorithm may vary based on the structure of the graph, the choice of
206 the source node, and the presence of ties between nodes during the greedy expansion process.
207
208 References
209 ----------
210 .. [1] Clauset, Aaron. Finding local community structure in networks.
211 Physical Review E—Statistical, Nonlinear, and Soft Matter Physics 72, no. 2 (2005): 026132.
212 https://arxiv.org/pdf/physics/0503036
213
214 """
215 try:
216 algo = ALGORITHMS[method]
217 except KeyError as e:
218 raise ValueError(f"{method} is not a valid choice for an algorithm.") from e
219
220 return algo(G, source=source, cutoff=cutoff)