/src/igraph/src/community/leiden.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | IGraph library. |
3 | | Copyright (C) 2020-2025 The igraph development team <igraph@igraph.org> |
4 | | |
5 | | This program is free software; you can redistribute it and/or modify |
6 | | it under the terms of the GNU General Public License as published by |
7 | | the Free Software Foundation; either version 2 of the License, or |
8 | | (at your option) any later version. |
9 | | |
10 | | This program is distributed in the hope that it will be useful, |
11 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | GNU General Public License for more details. |
14 | | |
15 | | You should have received a copy of the GNU General Public License |
16 | | along with this program. If not, see <https://www.gnu.org/licenses/>. |
17 | | */ |
18 | | |
19 | | #include "igraph_community.h" |
20 | | |
21 | | #include "igraph_adjlist.h" |
22 | | #include "igraph_bitset.h" |
23 | | #include "igraph_constructors.h" |
24 | | #include "igraph_dqueue.h" |
25 | | #include "igraph_interface.h" |
26 | | #include "igraph_memory.h" |
27 | | #include "igraph_random.h" |
28 | | #include "igraph_stack.h" |
29 | | #include "igraph_structural.h" |
30 | | #include "igraph_vector.h" |
31 | | #include "igraph_vector_list.h" |
32 | | |
33 | | #include "core/interruption.h" |
34 | | |
35 | | /* Move vertices in order to improve the quality of a partition. |
36 | | * |
37 | | * This function considers each vertex and greedily moves it to a neighboring |
38 | | * community that maximizes the improvement in the quality of a partition. |
39 | | * Only moves that strictly improve the quality are considered. |
40 | | * |
41 | | * The vertices are examined in a queue, and initially all vertices are put in the |
42 | | * queue in a random order. Vertices are popped from the queue when they are |
43 | | * examined, and only neighbors of vertices that are moved (which are not part of |
44 | | * the cluster the vertex was moved to) are pushed to the queue again. |
45 | | * |
46 | | * The \p membership vector is used as the starting point to move around vertices, |
47 | | * and is updated in-place. |
48 | | * |
49 | | */ |
50 | | static igraph_error_t leiden_fastmove_vertices( |
51 | | const igraph_t *graph, |
52 | | const igraph_inclist_t *edges_per_vertex, |
53 | | const igraph_vector_t *edge_weights, |
54 | | const igraph_vector_t *vertex_out_weights, |
55 | | const igraph_vector_t *vertex_in_weights, |
56 | | const igraph_real_t resolution, |
57 | | igraph_integer_t *nb_clusters, |
58 | | igraph_vector_int_t *membership, |
59 | 18.7k | igraph_bool_t *changed) { |
60 | | |
61 | 18.7k | const igraph_integer_t n = igraph_vcount(graph); |
62 | 18.7k | const igraph_bool_t directed = (vertex_in_weights != NULL); |
63 | 18.7k | igraph_dqueue_int_t unstable_vertices; |
64 | 18.7k | igraph_real_t max_diff, diff; |
65 | 18.7k | igraph_bitset_t neighbor_cluster_added, vertex_is_stable; |
66 | 18.7k | igraph_vector_t cluster_out_weights, cluster_in_weights; |
67 | 18.7k | igraph_vector_t edge_weights_per_cluster; |
68 | 18.7k | igraph_vector_int_t neighbor_clusters; |
69 | 18.7k | igraph_vector_int_t vertex_order; |
70 | 18.7k | igraph_vector_int_t nb_vertices_per_cluster; |
71 | 18.7k | igraph_stack_int_t empty_clusters; |
72 | 18.7k | igraph_integer_t c, nb_neigh_clusters; |
73 | 18.7k | int iter = 0; |
74 | | |
75 | | /* Initialize queue of unstable vertices and whether vertex is stable. Only |
76 | | * unstable vertices are in the queue. */ |
77 | 18.7k | IGRAPH_BITSET_INIT_FINALLY(&vertex_is_stable, n); |
78 | | |
79 | 18.7k | IGRAPH_DQUEUE_INT_INIT_FINALLY(&unstable_vertices, n); |
80 | | |
81 | | /* Shuffle vertices */ |
82 | 18.7k | IGRAPH_CHECK(igraph_vector_int_init_range(&vertex_order, 0, n)); |
83 | 18.7k | IGRAPH_FINALLY(igraph_vector_int_destroy, &vertex_order); |
84 | 18.7k | igraph_vector_int_shuffle(&vertex_order); |
85 | | |
86 | | /* Add to the queue */ |
87 | 840k | for (igraph_integer_t i = 0; i < n; i++) { |
88 | 821k | IGRAPH_CHECK(igraph_dqueue_int_push(&unstable_vertices, VECTOR(vertex_order)[i])); |
89 | 821k | } |
90 | | |
91 | | /* Initialize cluster weights and nb vertices */ |
92 | 18.7k | IGRAPH_VECTOR_INIT_FINALLY(&cluster_out_weights, n); |
93 | 18.7k | if (directed) { |
94 | 7.67k | IGRAPH_VECTOR_INIT_FINALLY(&cluster_in_weights, n); |
95 | 7.67k | } |
96 | 18.7k | IGRAPH_VECTOR_INT_INIT_FINALLY(&nb_vertices_per_cluster, n); |
97 | 840k | for (igraph_integer_t i = 0; i < n; i++) { |
98 | 821k | c = VECTOR(*membership)[i]; |
99 | 821k | VECTOR(cluster_out_weights)[c] += VECTOR(*vertex_out_weights)[i]; |
100 | 821k | if (directed) { |
101 | 334k | VECTOR(cluster_in_weights)[c] += VECTOR(*vertex_in_weights)[i]; |
102 | 334k | } |
103 | 821k | VECTOR(nb_vertices_per_cluster)[c] += 1; |
104 | 821k | } |
105 | | |
106 | | /* Initialize empty clusters */ |
107 | 18.7k | IGRAPH_STACK_INT_INIT_FINALLY(&empty_clusters, n); |
108 | 840k | for (c = 0; c < n; c++) { |
109 | 821k | if (VECTOR(nb_vertices_per_cluster)[c] == 0) { |
110 | 6.11k | IGRAPH_CHECK(igraph_stack_int_push(&empty_clusters, c)); |
111 | 6.11k | } |
112 | 821k | } |
113 | | |
114 | | /* Initialize vectors to be used in calculating differences */ |
115 | 18.7k | IGRAPH_VECTOR_INIT_FINALLY(&edge_weights_per_cluster, n); |
116 | | |
117 | | /* Initialize neighboring cluster */ |
118 | 18.7k | IGRAPH_BITSET_INIT_FINALLY(&neighbor_cluster_added, n); |
119 | 18.7k | IGRAPH_VECTOR_INT_INIT_FINALLY(&neighbor_clusters, n); |
120 | | |
121 | | /* Iterate while the queue is not empty */ |
122 | 846k | while (!igraph_dqueue_int_empty(&unstable_vertices)) { |
123 | 827k | igraph_integer_t v = igraph_dqueue_int_pop(&unstable_vertices); |
124 | 827k | igraph_integer_t best_cluster, current_cluster = VECTOR(*membership)[v]; |
125 | 827k | igraph_integer_t degree; |
126 | 827k | igraph_vector_int_t *edges; |
127 | | |
128 | | /* Remove vertex from current cluster */ |
129 | 827k | VECTOR(cluster_out_weights)[current_cluster] -= VECTOR(*vertex_out_weights)[v]; |
130 | 827k | if (directed) { |
131 | 335k | VECTOR(cluster_in_weights)[current_cluster] -= VECTOR(*vertex_in_weights)[v]; |
132 | 335k | } |
133 | 827k | VECTOR(nb_vertices_per_cluster)[current_cluster]--; |
134 | 827k | if (VECTOR(nb_vertices_per_cluster)[current_cluster] == 0) { |
135 | 810k | IGRAPH_CHECK(igraph_stack_int_push(&empty_clusters, current_cluster)); |
136 | 810k | } |
137 | | |
138 | | /* Find out neighboring clusters */ |
139 | 827k | c = igraph_stack_int_top(&empty_clusters); |
140 | 827k | VECTOR(neighbor_clusters)[0] = c; |
141 | 827k | IGRAPH_BIT_SET(neighbor_cluster_added, c); |
142 | 827k | nb_neigh_clusters = 1; |
143 | | |
144 | | /* Determine the edge weight to each neighboring cluster */ |
145 | 827k | edges = igraph_inclist_get(edges_per_vertex, v); |
146 | 827k | degree = igraph_vector_int_size(edges); |
147 | 1.67M | for (igraph_integer_t i = 0; i < degree; i++) { |
148 | 844k | igraph_integer_t e = VECTOR(*edges)[i]; |
149 | 844k | igraph_integer_t u = IGRAPH_OTHER(graph, e, v); |
150 | 844k | if (u != v) { |
151 | 747k | c = VECTOR(*membership)[u]; |
152 | 747k | if (!IGRAPH_BIT_TEST(neighbor_cluster_added, c)) { |
153 | 674k | IGRAPH_BIT_SET(neighbor_cluster_added, c); |
154 | 674k | VECTOR(neighbor_clusters)[nb_neigh_clusters++] = c; |
155 | 674k | } |
156 | 747k | VECTOR(edge_weights_per_cluster)[c] += VECTOR(*edge_weights)[e]; |
157 | 747k | } |
158 | 844k | } |
159 | | |
160 | | /* Calculate maximum diff */ |
161 | 827k | best_cluster = current_cluster; |
162 | 827k | max_diff = VECTOR(edge_weights_per_cluster)[current_cluster]; |
163 | 827k | if (directed) { |
164 | 335k | max_diff -= |
165 | 335k | (VECTOR(*vertex_in_weights)[v] * VECTOR(cluster_out_weights)[current_cluster] + |
166 | 335k | VECTOR(*vertex_out_weights)[v] * VECTOR(cluster_in_weights)[current_cluster]) * resolution; |
167 | 491k | } else { |
168 | 491k | max_diff -= VECTOR(*vertex_out_weights)[v] * VECTOR(cluster_out_weights)[current_cluster] * resolution; |
169 | 491k | } |
170 | 2.33M | for (igraph_integer_t i = 0; i < nb_neigh_clusters; i++) { |
171 | 1.50M | c = VECTOR(neighbor_clusters)[i]; |
172 | 1.50M | diff = VECTOR(edge_weights_per_cluster)[c]; |
173 | 1.50M | if (directed) { |
174 | 599k | diff -= (VECTOR(*vertex_out_weights)[v] * VECTOR(cluster_in_weights)[c] + |
175 | 599k | VECTOR(*vertex_in_weights)[v] * VECTOR(cluster_out_weights)[c]) * resolution; |
176 | 903k | } else { |
177 | 903k | diff -= VECTOR(*vertex_out_weights)[v] * VECTOR(cluster_out_weights)[c] * resolution; |
178 | 903k | } |
179 | | /* Only consider strictly improving moves. |
180 | | * Note that this is important in considering convergence. |
181 | | */ |
182 | 1.50M | if (diff > max_diff) { |
183 | 6.86k | best_cluster = c; |
184 | 6.86k | max_diff = diff; |
185 | 6.86k | } |
186 | 1.50M | VECTOR(edge_weights_per_cluster)[c] = 0.0; |
187 | 1.50M | IGRAPH_BIT_CLEAR(neighbor_cluster_added, c); |
188 | 1.50M | } |
189 | | |
190 | | /* Move vertex to best cluster */ |
191 | 827k | VECTOR(cluster_out_weights)[best_cluster] += VECTOR(*vertex_out_weights)[v]; |
192 | 827k | if (directed) { |
193 | 335k | VECTOR(cluster_in_weights)[best_cluster] += VECTOR(*vertex_in_weights)[v]; |
194 | 335k | } |
195 | 827k | VECTOR(nb_vertices_per_cluster)[best_cluster]++; |
196 | 827k | if (best_cluster == igraph_stack_int_top(&empty_clusters)) { |
197 | 805k | igraph_stack_int_pop(&empty_clusters); |
198 | 805k | } |
199 | | |
200 | | /* Mark vertex as stable */ |
201 | 827k | IGRAPH_BIT_SET(vertex_is_stable, v); |
202 | | |
203 | | /* Add stable neighbours that are not part of the new cluster to the queue */ |
204 | 827k | if (best_cluster != current_cluster) { |
205 | 6.25k | *changed = true; |
206 | 6.25k | VECTOR(*membership)[v] = best_cluster; |
207 | | |
208 | 46.7k | for (igraph_integer_t i = 0; i < degree; i++) { |
209 | 40.4k | igraph_integer_t e = VECTOR(*edges)[i]; |
210 | 40.4k | igraph_integer_t u = IGRAPH_OTHER(graph, e, v); |
211 | 40.4k | if (IGRAPH_BIT_TEST(vertex_is_stable, u) && VECTOR(*membership)[u] != best_cluster) { |
212 | 6.52k | IGRAPH_CHECK(igraph_dqueue_int_push(&unstable_vertices, u)); |
213 | 6.52k | IGRAPH_BIT_CLEAR(vertex_is_stable, u); |
214 | 6.52k | } |
215 | 40.4k | } |
216 | 6.25k | } |
217 | | |
218 | 827k | IGRAPH_ALLOW_INTERRUPTION_LIMITED(iter, 1 << 14); |
219 | 827k | } |
220 | | |
221 | 18.7k | IGRAPH_CHECK(igraph_reindex_membership(membership, NULL, nb_clusters)); |
222 | | |
223 | 18.7k | igraph_vector_int_destroy(&neighbor_clusters); |
224 | 18.7k | igraph_bitset_destroy(&neighbor_cluster_added); |
225 | 18.7k | igraph_vector_destroy(&edge_weights_per_cluster); |
226 | 18.7k | igraph_stack_int_destroy(&empty_clusters); |
227 | 18.7k | igraph_vector_int_destroy(&nb_vertices_per_cluster); |
228 | 18.7k | if (directed) igraph_vector_destroy(&cluster_in_weights); |
229 | 18.7k | igraph_vector_destroy(&cluster_out_weights); |
230 | 18.7k | igraph_vector_int_destroy(&vertex_order); |
231 | 18.7k | igraph_dqueue_int_destroy(&unstable_vertices); |
232 | 18.7k | igraph_bitset_destroy(&vertex_is_stable); |
233 | 18.7k | if (directed) { |
234 | 7.67k | IGRAPH_FINALLY_CLEAN(10); |
235 | 11.0k | } else { |
236 | 11.0k | IGRAPH_FINALLY_CLEAN(9); |
237 | 11.0k | } |
238 | | |
239 | 18.7k | return IGRAPH_SUCCESS; |
240 | 18.7k | } |
241 | | |
242 | | /* Clean a refined membership vector. |
243 | | * |
244 | | * This function examines all vertices in \p vertex_subset and updates |
245 | | * \p refined_membership to ensure that the clusters are numbered consecutively, |
246 | | * starting from \p nb_refined_clusters. The \p nb_refined_clusters is also |
247 | | * updated itself. If C is the initial \p nb_refined_clusters and C' the |
248 | | * resulting \p nb_refined_clusters, then vertices in \p vertex_subset are numbered |
249 | | * C, C + 1, ..., C' - 1. |
250 | | */ |
251 | | static igraph_error_t leiden_clean_refined_membership( |
252 | | const igraph_vector_int_t* vertex_subset, |
253 | | igraph_vector_int_t *refined_membership, |
254 | 233k | igraph_integer_t* nb_refined_clusters) { |
255 | | |
256 | 233k | const igraph_integer_t n = igraph_vector_int_size(vertex_subset); |
257 | 233k | igraph_vector_int_t new_cluster; |
258 | | |
259 | 233k | IGRAPH_VECTOR_INT_INIT_FINALLY(&new_cluster, n); |
260 | | |
261 | | /* Clean clusters. We will store the new cluster + 1 so that cluster == 0 |
262 | | * indicates that no membership was assigned yet. */ |
263 | 233k | *nb_refined_clusters += 1; |
264 | 477k | for (igraph_integer_t i = 0; i < n; i++) { |
265 | 244k | igraph_integer_t v = VECTOR(*vertex_subset)[i]; |
266 | 244k | igraph_integer_t c = VECTOR(*refined_membership)[v]; |
267 | 244k | if (VECTOR(new_cluster)[c] == 0) { |
268 | 233k | VECTOR(new_cluster)[c] = *nb_refined_clusters; |
269 | 233k | *nb_refined_clusters += 1; |
270 | 233k | } |
271 | 244k | } |
272 | | |
273 | | /* Assign new cluster */ |
274 | 477k | for (igraph_integer_t i = 0; i < n; i++) { |
275 | 244k | igraph_integer_t v = VECTOR(*vertex_subset)[i]; |
276 | 244k | igraph_integer_t c = VECTOR(*refined_membership)[v]; |
277 | 244k | VECTOR(*refined_membership)[v] = VECTOR(new_cluster)[c] - 1; |
278 | 244k | } |
279 | | /* We used the cluster + 1, so correct */ |
280 | 233k | *nb_refined_clusters -= 1; |
281 | | |
282 | 233k | igraph_vector_int_destroy(&new_cluster); |
283 | 233k | IGRAPH_FINALLY_CLEAN(1); |
284 | | |
285 | 233k | return IGRAPH_SUCCESS; |
286 | 233k | } |
287 | | |
288 | | /* Merge vertices for a subset of the vertices. This is used to refine a partition. |
289 | | * |
290 | | * The vertices included in \p vertex_subset are assumed to be the vertices i for which |
291 | | * membership[i] = cluster_subset. |
292 | | * |
293 | | * All vertices in \p vertex_subset are initialized to a singleton partition in \p |
294 | | * refined_membership. Only singleton clusters can be merged if they are |
295 | | * sufficiently well connected to the current subgraph induced by \p |
296 | | * vertex_subset. |
297 | | * |
298 | | * We only examine each vertex once. Instead of greedily choosing the maximum |
299 | | * possible cluster to merge with, the cluster is chosen randomly among all |
300 | | * possibilities that do not decrease the quality of the partition. The |
301 | | * probability of choosing a certain cluster is proportional to exp(diff/beta). |
302 | | * For beta to 0 this converges to selecting a cluster with the maximum |
303 | | * improvement. For beta to infinity this converges to a uniform distribution |
304 | | * among all eligible clusters. |
305 | | * |
306 | | * The \p refined_membership is updated for vertex in \p vertex_subset. The number |
307 | | * of refined clusters, \p nb_refined_clusters is used to set the actual refined |
308 | | * cluster membership and is updated after this routine. Within each cluster |
309 | | * (i.e. for a given \p vertex_subset), the refined membership is initially simply |
310 | | * set to 0, ..., n - 1 (for n vertices in \p vertex_subset). However, for each \p |
311 | | * vertex_subset the refined membership should of course be unique. Hence, after |
312 | | * merging, the refined membership starts with \p nb_refined_clusters, which is |
313 | | * also updated to ensure that the resulting \p nb_refined_clusters counts all |
314 | | * refined clusters that have already been processed. See |
315 | | * leiden_clean_refined_membership for more information about |
316 | | * this aspect. |
317 | | */ |
318 | | static igraph_error_t leiden_merge_vertices( |
319 | | const igraph_t *graph, |
320 | | const igraph_inclist_t *edges_per_vertex, |
321 | | const igraph_vector_t *edge_weights, |
322 | | const igraph_vector_t *vertex_out_weights, |
323 | | const igraph_vector_t *vertex_in_weights, |
324 | | const igraph_vector_int_t *vertex_subset, |
325 | | const igraph_vector_int_t *membership, |
326 | | const igraph_integer_t cluster_subset, |
327 | | const igraph_real_t resolution, |
328 | | const igraph_real_t beta, |
329 | | igraph_integer_t *nb_refined_clusters, |
330 | 233k | igraph_vector_int_t *refined_membership) { |
331 | | |
332 | 233k | const igraph_bool_t directed = (vertex_in_weights != NULL); |
333 | 233k | igraph_vector_int_t vertex_order; |
334 | 233k | igraph_bitset_t non_singleton_cluster, neighbor_cluster_added; |
335 | 233k | igraph_real_t max_diff, total_cum_trans_diff, diff; |
336 | 233k | igraph_real_t total_vertex_out_weight = 0.0, total_vertex_in_weight = 0.0; |
337 | 233k | const igraph_integer_t n = igraph_vector_int_size(vertex_subset); |
338 | 233k | igraph_vector_t cluster_out_weights, cluster_in_weights; |
339 | 233k | igraph_vector_t cum_trans_diff, edge_weights_per_cluster, external_edge_weight_per_cluster_in_subset; |
340 | 233k | igraph_vector_int_t neighbor_clusters; |
341 | 233k | igraph_vector_int_t *edges, nb_vertices_per_cluster; |
342 | 233k | igraph_integer_t degree, nb_neigh_clusters; |
343 | | |
344 | | /* Initialize cluster weights */ |
345 | 233k | IGRAPH_VECTOR_INIT_FINALLY(&cluster_out_weights, n); |
346 | 233k | if (directed) { |
347 | 40.8k | IGRAPH_VECTOR_INIT_FINALLY(&cluster_in_weights, n); |
348 | 40.8k | } |
349 | | |
350 | | /* Initialize number of vertices per cluster */ |
351 | 233k | IGRAPH_VECTOR_INT_INIT_FINALLY(&nb_vertices_per_cluster, n); |
352 | | |
353 | | /* Initialize external edge weight per cluster in subset */ |
354 | 233k | IGRAPH_VECTOR_INIT_FINALLY(&external_edge_weight_per_cluster_in_subset, n); |
355 | | |
356 | | /* Initialize administration for a singleton partition */ |
357 | 477k | for (igraph_integer_t i = 0; i < n; i++) { |
358 | 244k | igraph_integer_t v = VECTOR(*vertex_subset)[i]; |
359 | 244k | VECTOR(*refined_membership)[v] = i; |
360 | 244k | VECTOR(cluster_out_weights)[i] += VECTOR(*vertex_out_weights)[v]; |
361 | 244k | total_vertex_out_weight += VECTOR(*vertex_out_weights)[v]; |
362 | 244k | if (directed) { |
363 | 42.8k | VECTOR(cluster_in_weights)[i] += VECTOR(*vertex_in_weights)[v]; |
364 | 42.8k | total_vertex_in_weight += VECTOR(*vertex_in_weights)[v]; |
365 | 42.8k | } |
366 | 244k | VECTOR(nb_vertices_per_cluster)[i] += 1; |
367 | | |
368 | | /* Find out neighboring clusters */ |
369 | 244k | edges = igraph_inclist_get(edges_per_vertex, v); |
370 | 244k | degree = igraph_vector_int_size(edges); |
371 | 546k | for (igraph_integer_t j = 0; j < degree; j++) { |
372 | 302k | igraph_integer_t e = VECTOR(*edges)[j]; |
373 | 302k | igraph_integer_t u = IGRAPH_OTHER(graph, e, v); |
374 | 302k | if (u != v && VECTOR(*membership)[u] == cluster_subset) { |
375 | 53.2k | VECTOR(external_edge_weight_per_cluster_in_subset)[i] += VECTOR(*edge_weights)[e]; |
376 | 53.2k | } |
377 | 302k | } |
378 | 244k | } |
379 | | |
380 | | /* Shuffle vertices */ |
381 | 233k | IGRAPH_CHECK(igraph_vector_int_init_copy(&vertex_order, vertex_subset)); |
382 | 233k | IGRAPH_FINALLY(igraph_vector_int_destroy, &vertex_order); |
383 | 233k | igraph_vector_int_shuffle(&vertex_order); |
384 | | |
385 | | /* Initialize non singleton clusters */ |
386 | 233k | IGRAPH_BITSET_INIT_FINALLY(&non_singleton_cluster, n); |
387 | | |
388 | | /* Initialize vectors to be used in calculating differences */ |
389 | 233k | IGRAPH_VECTOR_INIT_FINALLY(&edge_weights_per_cluster, n); |
390 | | |
391 | | /* Initialize neighboring cluster */ |
392 | 233k | IGRAPH_BITSET_INIT_FINALLY(&neighbor_cluster_added, n); |
393 | 233k | IGRAPH_VECTOR_INT_INIT_FINALLY(&neighbor_clusters, n); |
394 | | |
395 | | /* Initialize cumulative transformed difference */ |
396 | 233k | IGRAPH_VECTOR_INIT_FINALLY(&cum_trans_diff, n); |
397 | | |
398 | 477k | for (igraph_integer_t i = 0; i < n; i++) { |
399 | 244k | igraph_integer_t v = VECTOR(vertex_order)[i]; |
400 | 244k | igraph_integer_t chosen_cluster, best_cluster, current_cluster = VECTOR(*refined_membership)[v]; |
401 | 244k | igraph_real_t vertex_weight_prod; |
402 | | |
403 | 244k | if (directed) { |
404 | 42.8k | vertex_weight_prod = |
405 | 42.8k | VECTOR(cluster_out_weights)[current_cluster] * (total_vertex_in_weight - VECTOR(cluster_in_weights)[current_cluster]) + |
406 | 42.8k | VECTOR(cluster_in_weights)[current_cluster] * (total_vertex_out_weight - VECTOR(cluster_out_weights)[current_cluster]); |
407 | 201k | } else { |
408 | 201k | vertex_weight_prod = VECTOR(cluster_out_weights)[current_cluster] * (total_vertex_out_weight - VECTOR(cluster_out_weights)[current_cluster]); |
409 | 201k | } |
410 | | |
411 | 244k | if (!IGRAPH_BIT_TEST(non_singleton_cluster, current_cluster) && |
412 | 244k | (VECTOR(external_edge_weight_per_cluster_in_subset)[current_cluster] >= |
413 | 234k | vertex_weight_prod * resolution)) { |
414 | | /* Remove vertex from current cluster, which is then a singleton by |
415 | | * definition. */ |
416 | 234k | VECTOR(cluster_out_weights)[current_cluster] = 0.0; |
417 | 234k | if (directed) { |
418 | 41.2k | VECTOR(cluster_in_weights)[current_cluster] = 0.0; |
419 | 41.2k | } |
420 | 234k | VECTOR(nb_vertices_per_cluster)[current_cluster] = 0; |
421 | | |
422 | | /* Find out neighboring clusters */ |
423 | 234k | edges = igraph_inclist_get(edges_per_vertex, v); |
424 | 234k | degree = igraph_vector_int_size(edges); |
425 | | |
426 | | /* Also add current cluster to ensure it can be chosen. */ |
427 | 234k | VECTOR(neighbor_clusters)[0] = current_cluster; |
428 | 234k | IGRAPH_BIT_SET(neighbor_cluster_added, current_cluster); |
429 | 234k | nb_neigh_clusters = 1; |
430 | 474k | for (igraph_integer_t j = 0; j < degree; j++) { |
431 | 239k | igraph_integer_t e = VECTOR(*edges)[j]; |
432 | 239k | igraph_integer_t u = IGRAPH_OTHER(graph, e, v); |
433 | 239k | if (u != v && VECTOR(*membership)[u] == cluster_subset) { |
434 | 29.1k | igraph_integer_t c = VECTOR(*refined_membership)[u]; |
435 | 29.1k | if (!IGRAPH_BIT_TEST(neighbor_cluster_added, c)) { |
436 | 13.4k | IGRAPH_BIT_SET(neighbor_cluster_added, c); |
437 | 13.4k | VECTOR(neighbor_clusters)[nb_neigh_clusters++] = c; |
438 | 13.4k | } |
439 | 29.1k | VECTOR(edge_weights_per_cluster)[c] += VECTOR(*edge_weights)[e]; |
440 | 29.1k | } |
441 | 239k | } |
442 | | |
443 | | /* Calculate diffs */ |
444 | 234k | best_cluster = current_cluster; |
445 | 234k | max_diff = 0.0; |
446 | 234k | total_cum_trans_diff = 0.0; |
447 | 483k | for (igraph_integer_t j = 0; j < nb_neigh_clusters; j++) { |
448 | 248k | igraph_integer_t c = VECTOR(neighbor_clusters)[j]; |
449 | | |
450 | 248k | if (directed) { |
451 | 43.7k | vertex_weight_prod = |
452 | 43.7k | VECTOR(cluster_out_weights)[c] * (total_vertex_in_weight - VECTOR(cluster_in_weights)[c]) + |
453 | 43.7k | VECTOR(cluster_in_weights)[c] * (total_vertex_out_weight - VECTOR(cluster_out_weights)[c]); |
454 | 204k | } else { |
455 | 204k | vertex_weight_prod = VECTOR(cluster_out_weights)[c] * (total_vertex_out_weight - VECTOR(cluster_out_weights)[c]); |
456 | 204k | } |
457 | | |
458 | 248k | if (VECTOR(external_edge_weight_per_cluster_in_subset)[c] >= vertex_weight_prod * resolution) { |
459 | 248k | diff = VECTOR(edge_weights_per_cluster)[c]; |
460 | 248k | if (directed) { |
461 | 43.7k | diff -= (VECTOR(*vertex_out_weights)[v] * VECTOR(cluster_in_weights)[c] + |
462 | 43.7k | VECTOR(*vertex_in_weights)[v] * VECTOR(cluster_out_weights)[c]) * resolution; |
463 | 204k | } else { |
464 | 204k | diff -= VECTOR(*vertex_out_weights)[v] * VECTOR(cluster_out_weights)[c] * resolution; |
465 | 204k | } |
466 | | |
467 | | |
468 | 248k | if (diff > max_diff) { |
469 | 12.1k | best_cluster = c; |
470 | 12.1k | max_diff = diff; |
471 | 12.1k | } |
472 | | |
473 | | /* Calculate the transformed difference for sampling */ |
474 | 248k | if (diff >= 0) { |
475 | 247k | total_cum_trans_diff += exp(diff / beta); |
476 | 247k | } |
477 | | |
478 | 248k | } |
479 | | |
480 | 248k | VECTOR(cum_trans_diff)[j] = total_cum_trans_diff; |
481 | 248k | VECTOR(edge_weights_per_cluster)[c] = 0.0; |
482 | 248k | IGRAPH_BIT_CLEAR(neighbor_cluster_added, c); |
483 | 248k | } |
484 | | |
485 | | /* Determine the neighboring cluster to which the currently selected vertex |
486 | | * will be moved. |
487 | | */ |
488 | 234k | if (total_cum_trans_diff < IGRAPH_INFINITY) { |
489 | 234k | igraph_real_t r = RNG_UNIF(0, total_cum_trans_diff); |
490 | 234k | igraph_integer_t chosen_idx; |
491 | 234k | igraph_vector_binsearch_slice(&cum_trans_diff, r, &chosen_idx, 0, nb_neigh_clusters); |
492 | 234k | chosen_cluster = VECTOR(neighbor_clusters)[chosen_idx]; |
493 | 234k | } else { |
494 | 822 | chosen_cluster = best_cluster; |
495 | 822 | } |
496 | | |
497 | | /* Move vertex to randomly chosen cluster */ |
498 | 234k | VECTOR(cluster_out_weights)[chosen_cluster] += VECTOR(*vertex_out_weights)[v]; |
499 | 234k | if (directed) { |
500 | 41.2k | VECTOR(cluster_in_weights)[chosen_cluster] += VECTOR(*vertex_in_weights)[v]; |
501 | 41.2k | } |
502 | 234k | VECTOR(nb_vertices_per_cluster)[chosen_cluster]++; |
503 | | |
504 | 474k | for (igraph_integer_t j = 0; j < degree; j++) { |
505 | 239k | igraph_integer_t e = VECTOR(*edges)[j]; |
506 | 239k | igraph_integer_t u = IGRAPH_OTHER(graph, e, v); |
507 | 239k | if (VECTOR(*membership)[u] == cluster_subset) { |
508 | 36.7k | if (VECTOR(*refined_membership)[u] == chosen_cluster) { |
509 | 26.0k | VECTOR(external_edge_weight_per_cluster_in_subset)[chosen_cluster] -= VECTOR(*edge_weights)[e]; |
510 | 26.0k | } else { |
511 | 10.6k | VECTOR(external_edge_weight_per_cluster_in_subset)[chosen_cluster] += VECTOR(*edge_weights)[e]; |
512 | 10.6k | } |
513 | 36.7k | } |
514 | 239k | } |
515 | | |
516 | | /* Set cluster */ |
517 | 234k | if (chosen_cluster != current_cluster) { |
518 | 10.5k | VECTOR(*refined_membership)[v] = chosen_cluster; |
519 | | |
520 | 10.5k | IGRAPH_BIT_SET(non_singleton_cluster, chosen_cluster); |
521 | 10.5k | } |
522 | 234k | } /* end if singleton and may be merged */ |
523 | 244k | } |
524 | | |
525 | 233k | IGRAPH_CHECK(leiden_clean_refined_membership(vertex_subset, refined_membership, nb_refined_clusters)); |
526 | | |
527 | 233k | igraph_vector_destroy(&cum_trans_diff); |
528 | 233k | igraph_vector_int_destroy(&neighbor_clusters); |
529 | 233k | igraph_bitset_destroy(&neighbor_cluster_added); |
530 | 233k | igraph_vector_destroy(&edge_weights_per_cluster); |
531 | 233k | igraph_bitset_destroy(&non_singleton_cluster); |
532 | 233k | igraph_vector_int_destroy(&vertex_order); |
533 | 233k | igraph_vector_destroy(&external_edge_weight_per_cluster_in_subset); |
534 | 233k | igraph_vector_int_destroy(&nb_vertices_per_cluster); |
535 | 233k | if (directed) igraph_vector_destroy(&cluster_in_weights); |
536 | 233k | igraph_vector_destroy(&cluster_out_weights); |
537 | 233k | if (directed) { |
538 | 40.8k | IGRAPH_FINALLY_CLEAN(10); |
539 | 192k | } else { |
540 | 192k | IGRAPH_FINALLY_CLEAN(9); |
541 | 192k | } |
542 | | |
543 | 233k | return IGRAPH_SUCCESS; |
544 | 233k | } |
545 | | |
546 | | /* Create clusters out of a membership vector. |
547 | | * |
548 | | * It is assumed that the incoming list of integer vectors is already sized |
549 | | * appropriately (i.e. it has at least as many items as the number of clusters |
550 | | * in the membership vector), and that each item in the list of integer vectors |
551 | | * is empty. |
552 | | */ |
553 | | static igraph_error_t leiden_get_clusters( |
554 | | const igraph_vector_int_t *membership, |
555 | 11.0k | igraph_vector_int_list_t *clusters) { |
556 | | |
557 | 11.0k | const igraph_integer_t n = igraph_vector_int_size(membership); |
558 | | |
559 | 499k | for (igraph_integer_t i = 0; i < n; i++) { |
560 | | /* Get cluster for vertex i */ |
561 | 488k | igraph_vector_int_t *cluster = igraph_vector_int_list_get_ptr(clusters, VECTOR(*membership)[i]); |
562 | | |
563 | | /* Add vertex i to cluster vector */ |
564 | 488k | IGRAPH_CHECK(igraph_vector_int_push_back(cluster, i)); |
565 | 488k | } |
566 | | |
567 | 11.0k | return IGRAPH_SUCCESS; |
568 | 11.0k | } |
569 | | |
570 | | /* Aggregate the graph based on the \p refined membership while setting the |
571 | | * membership of each aggregated vertex according to the \p membership. |
572 | | * |
573 | | * Technically speaking we have that |
574 | | * aggregated_membership[refined_membership[v]] = membership[v] for each vertex v. |
575 | | * |
576 | | * The new aggregated graph is returned in \p aggregated_graph. This graph |
577 | | * object should not yet be initialized, igraph_create() is called on it, and |
578 | | * responsibility for destroying the object lies with the calling method |
579 | | * |
580 | | * The remaining results, aggregated_edge_weights, aggregate_vertex_weights and |
581 | | * aggregated_membership are all expected to be initialized. |
582 | | * |
583 | | */ |
584 | | static igraph_error_t leiden_aggregate( |
585 | | const igraph_t *graph, |
586 | | const igraph_inclist_t *edges_per_vertex, |
587 | | const igraph_vector_t *edge_weights, |
588 | | const igraph_vector_t *vertex_out_weights, |
589 | | const igraph_vector_t *vertex_in_weights, |
590 | | const igraph_vector_int_t *membership, |
591 | | const igraph_vector_int_t *refined_membership, |
592 | | const igraph_integer_t nb_refined_clusters, |
593 | | igraph_t *aggregated_graph, |
594 | | igraph_vector_t *aggregated_edge_weights, |
595 | | igraph_vector_t *aggregated_vertex_out_weights, |
596 | | igraph_vector_t *aggregated_vertex_in_weights, |
597 | 5.50k | igraph_vector_int_t *aggregated_membership) { |
598 | | |
599 | 5.50k | const igraph_bool_t directed = (vertex_in_weights != NULL); |
600 | 5.50k | igraph_vector_int_t aggregated_edges; |
601 | 5.50k | igraph_vector_t edge_weight_to_cluster; |
602 | 5.50k | igraph_vector_int_list_t refined_clusters; |
603 | 5.50k | igraph_vector_int_t *incident_edges; |
604 | 5.50k | igraph_vector_int_t neighbor_clusters; |
605 | 5.50k | igraph_bitset_t neighbor_cluster_added; |
606 | 5.50k | igraph_integer_t c, degree, nb_neigh_clusters; |
607 | | |
608 | | /* Get refined clusters */ |
609 | 5.50k | IGRAPH_VECTOR_INT_LIST_INIT_FINALLY(&refined_clusters, nb_refined_clusters); |
610 | 5.50k | IGRAPH_CHECK(leiden_get_clusters(refined_membership, &refined_clusters)); |
611 | | |
612 | | /* Initialize new edges */ |
613 | 5.50k | IGRAPH_VECTOR_INT_INIT_FINALLY(&aggregated_edges, 0); |
614 | | |
615 | | /* We clear the aggregated edge weights, we will push each new edge weight */ |
616 | 5.50k | igraph_vector_clear(aggregated_edge_weights); |
617 | | /* Simply resize the aggregated vertex weights and membership, they can be set directly */ |
618 | 5.50k | IGRAPH_CHECK(igraph_vector_resize(aggregated_vertex_out_weights, nb_refined_clusters)); |
619 | 5.50k | if (directed) { |
620 | 1.03k | IGRAPH_CHECK(igraph_vector_resize(aggregated_vertex_in_weights, nb_refined_clusters)); |
621 | 1.03k | } |
622 | 5.50k | IGRAPH_CHECK(igraph_vector_int_resize(aggregated_membership, nb_refined_clusters)); |
623 | | |
624 | 5.50k | IGRAPH_VECTOR_INIT_FINALLY(&edge_weight_to_cluster, nb_refined_clusters); |
625 | | |
626 | | /* Initialize neighboring cluster */ |
627 | 5.50k | IGRAPH_BITSET_INIT_FINALLY(&neighbor_cluster_added, nb_refined_clusters); |
628 | 5.50k | IGRAPH_VECTOR_INT_INIT_FINALLY(&neighbor_clusters, nb_refined_clusters); |
629 | | |
630 | | /* Check per cluster */ |
631 | 239k | for (c = 0; c < nb_refined_clusters; c++) { |
632 | 233k | igraph_vector_int_t* refined_cluster = igraph_vector_int_list_get_ptr(&refined_clusters, c); |
633 | 233k | igraph_integer_t n_c = igraph_vector_int_size(refined_cluster); |
634 | 233k | igraph_integer_t v = -1; |
635 | | |
636 | | /* Calculate the total edge weight to other clusters */ |
637 | 233k | VECTOR(*aggregated_vertex_out_weights)[c] = 0.0; |
638 | 233k | if (directed) { |
639 | 40.9k | VECTOR(*aggregated_vertex_in_weights)[c] = 0.0; |
640 | 40.9k | } |
641 | 233k | nb_neigh_clusters = 0; |
642 | 478k | for (igraph_integer_t i = 0; i < n_c; i++) { |
643 | 244k | v = VECTOR(*refined_cluster)[i]; |
644 | 244k | incident_edges = igraph_inclist_get(edges_per_vertex, v); |
645 | 244k | degree = igraph_vector_int_size(incident_edges); |
646 | | |
647 | 546k | for (igraph_integer_t j = 0; j < degree; j++) { |
648 | 302k | igraph_integer_t e = VECTOR(*incident_edges)[j]; |
649 | 302k | igraph_integer_t u = IGRAPH_OTHER(graph, e, v); |
650 | 302k | igraph_integer_t c2 = VECTOR(*refined_membership)[u]; |
651 | | |
652 | 302k | if (c2 > c) { |
653 | 120k | if (!IGRAPH_BIT_TEST(neighbor_cluster_added, c2)) { |
654 | 105k | IGRAPH_BIT_SET(neighbor_cluster_added, c2); |
655 | 105k | VECTOR(neighbor_clusters)[nb_neigh_clusters++] = c2; |
656 | 105k | } |
657 | 120k | VECTOR(edge_weight_to_cluster)[c2] += VECTOR(*edge_weights)[e]; |
658 | 120k | } |
659 | 302k | } |
660 | | |
661 | 244k | VECTOR(*aggregated_vertex_out_weights)[c] += VECTOR(*vertex_out_weights)[v]; |
662 | 244k | if (directed) { |
663 | 42.8k | VECTOR(*aggregated_vertex_in_weights)[c] += VECTOR(*vertex_in_weights)[v]; |
664 | 42.8k | } |
665 | 244k | } |
666 | | |
667 | | /* Add actual edges from this cluster to the other clusters */ |
668 | 339k | for (igraph_integer_t i = 0; i < nb_neigh_clusters; i++) { |
669 | 105k | igraph_integer_t c2 = VECTOR(neighbor_clusters)[i]; |
670 | | |
671 | | /* Add edge */ |
672 | 105k | IGRAPH_CHECK(igraph_vector_int_push_back(&aggregated_edges, c)); |
673 | 105k | IGRAPH_CHECK(igraph_vector_int_push_back(&aggregated_edges, c2)); |
674 | | |
675 | | /* Add edge weight */ |
676 | 105k | IGRAPH_CHECK(igraph_vector_push_back(aggregated_edge_weights, VECTOR(edge_weight_to_cluster)[c2])); |
677 | | |
678 | 105k | VECTOR(edge_weight_to_cluster)[c2] = 0.0; |
679 | 105k | IGRAPH_BIT_CLEAR(neighbor_cluster_added, c2); |
680 | 105k | } |
681 | | |
682 | 233k | VECTOR(*aggregated_membership)[c] = VECTOR(*membership)[v]; |
683 | | |
684 | 233k | } |
685 | | |
686 | 5.50k | igraph_vector_int_destroy(&neighbor_clusters); |
687 | 5.50k | igraph_bitset_destroy(&neighbor_cluster_added); |
688 | 5.50k | igraph_vector_destroy(&edge_weight_to_cluster); |
689 | 5.50k | igraph_vector_int_list_destroy(&refined_clusters); |
690 | 5.50k | IGRAPH_FINALLY_CLEAN(4); |
691 | | |
692 | 5.50k | igraph_destroy(aggregated_graph); |
693 | 5.50k | IGRAPH_CHECK(igraph_create(aggregated_graph, &aggregated_edges, nb_refined_clusters, |
694 | 5.50k | directed)); |
695 | | |
696 | 5.50k | igraph_vector_int_destroy(&aggregated_edges); |
697 | 5.50k | IGRAPH_FINALLY_CLEAN(1); |
698 | | |
699 | 5.50k | return IGRAPH_SUCCESS; |
700 | 5.50k | } |
701 | | |
702 | | /* Calculate the quality of the partition. |
703 | | * |
704 | | * The quality is defined as |
705 | | * |
706 | | * 1 / 2m sum_ij (A_ij - gamma n_i n_j) d(s_i, s_j) |
707 | | * |
708 | | * for undirected graphs and as |
709 | | * |
710 | | * 1 / m sum_ij (A_ij - gamma n^out_i n^in_j) d(s_i, s_j) |
711 | | * |
712 | | * where m is the total edge weight, A_ij is the weight of edge (i, j), gamma is |
713 | | * the so-called resolution parameter, n_i is the vertex weight of vertex i, s_i is |
714 | | * the cluster of vertex i and d(x, y) = 1 if and only if x = y and 0 otherwise. |
715 | | * |
716 | | * Note that by setting n_i = k_i the degree of vertex i and dividing gamma by 2m, |
717 | | * we effectively optimize modularity. By setting n_i = 1 we optimize the |
718 | | * Constant Potts Model. |
719 | | * |
720 | | * This can be represented as a sum over clusters as |
721 | | * |
722 | | * 1 / 2m sum_c (e_c - gamma N_c^2) |
723 | | * |
724 | | * where e_c = sum_ij A_ij d(s_i, c)d(s_j, c) is the internal edge weight |
725 | | * in cluster c (or twice this value if undirected) and |
726 | | * N_c = sum_i n_i d(s_i, c) is the sum of the vertex weights inside cluster c. |
727 | | * This is how the quality is calculated in practice. |
728 | | */ |
729 | | static igraph_error_t leiden_quality( |
730 | | const igraph_t *graph, |
731 | | const igraph_vector_t *edge_weights, |
732 | | const igraph_vector_t *vertex_out_weights, |
733 | | const igraph_vector_t *vertex_in_weights, |
734 | | const igraph_vector_int_t *membership, |
735 | | const igraph_integer_t nb_clusters, |
736 | | const igraph_real_t resolution, |
737 | 13.2k | igraph_real_t *quality) { |
738 | | |
739 | 13.2k | const igraph_integer_t vcount = igraph_vcount(graph); |
740 | 13.2k | const igraph_integer_t ecount = igraph_ecount(graph); |
741 | 13.2k | const igraph_bool_t directed = (vertex_in_weights != NULL); |
742 | 13.2k | const igraph_real_t directed_multiplier = directed ? 1.0 : 2.0; |
743 | 13.2k | igraph_vector_t cluster_out_weights, cluster_in_weights; |
744 | 13.2k | igraph_real_t total_edge_weight = 0.0; |
745 | | |
746 | 13.2k | *quality = 0.0; |
747 | | |
748 | 315k | for (igraph_integer_t e=0; e < ecount; e++) { |
749 | 302k | igraph_integer_t from = IGRAPH_FROM(graph, e); |
750 | 302k | igraph_integer_t to = IGRAPH_TO(graph, e); |
751 | 302k | total_edge_weight += VECTOR(*edge_weights)[e]; |
752 | | |
753 | | /* We add the internal edge weights. */ |
754 | 302k | if (VECTOR(*membership)[from] == VECTOR(*membership)[to]) { |
755 | 74.1k | *quality += directed_multiplier * VECTOR(*edge_weights)[e]; |
756 | 74.1k | } |
757 | 302k | } |
758 | | |
759 | | /* Initialize and compute cluster weights. */ |
760 | | |
761 | 13.2k | IGRAPH_VECTOR_INIT_FINALLY(&cluster_out_weights, vcount); |
762 | 13.2k | if (directed) { |
763 | 6.63k | IGRAPH_VECTOR_INIT_FINALLY(&cluster_in_weights, vcount); |
764 | 6.63k | } |
765 | | |
766 | 600k | for (igraph_integer_t i = 0; i < vcount; i++) { |
767 | 587k | igraph_integer_t c = VECTOR(*membership)[i]; |
768 | 587k | VECTOR(cluster_out_weights)[c] += VECTOR(*vertex_out_weights)[i]; |
769 | 587k | if (directed) { |
770 | 293k | VECTOR(cluster_in_weights)[c] += VECTOR(*vertex_in_weights)[i]; |
771 | 293k | } |
772 | 587k | } |
773 | | |
774 | | /* We subtract gamma * N^out_c * N^in_c */ |
775 | | |
776 | 590k | for (igraph_integer_t c = 0; c < nb_clusters; c++) { |
777 | 577k | if (directed) { |
778 | 291k | *quality -= resolution * VECTOR(cluster_out_weights)[c] * VECTOR(cluster_in_weights)[c]; |
779 | 291k | } else { |
780 | 285k | *quality -= resolution * VECTOR(cluster_out_weights)[c] * VECTOR(cluster_out_weights)[c]; |
781 | 285k | } |
782 | 577k | } |
783 | | |
784 | 13.2k | if (directed) { |
785 | 6.63k | igraph_vector_destroy(&cluster_in_weights); |
786 | 6.63k | IGRAPH_FINALLY_CLEAN(1); |
787 | 6.63k | } |
788 | 13.2k | igraph_vector_destroy(&cluster_out_weights); |
789 | 13.2k | IGRAPH_FINALLY_CLEAN(1); |
790 | | |
791 | | /* We normalise by m or 2m depending on directedness */ |
792 | 13.2k | *quality /= (directed_multiplier * total_edge_weight); |
793 | | |
794 | 13.2k | return IGRAPH_SUCCESS; |
795 | 13.2k | } |
796 | | |
797 | | /* This is the core of the Leiden algorithm and relies on subroutines to |
798 | | * perform the three different phases: (1) local moving of vertices, (2) |
799 | | * refinement of the partition and (3) aggregation of the network based on the |
800 | | * refined partition, using the non-refined partition to create an initial |
801 | | * partition for the aggregate network. |
802 | | */ |
803 | | static igraph_error_t community_leiden( |
804 | | const igraph_t *graph, |
805 | | igraph_vector_t *edge_weights, |
806 | | igraph_vector_t *vertex_out_weights, |
807 | | igraph_vector_t *vertex_in_weights, |
808 | | igraph_real_t resolution, |
809 | | igraph_real_t beta, |
810 | | igraph_vector_int_t *membership, |
811 | | igraph_integer_t *nb_clusters, |
812 | | igraph_real_t *quality, |
813 | 13.2k | igraph_bool_t *changed) { |
814 | | |
815 | 13.2k | const igraph_integer_t n = igraph_vcount(graph); |
816 | 13.2k | const igraph_bool_t directed = (vertex_in_weights != NULL); |
817 | 13.2k | igraph_integer_t nb_refined_clusters; |
818 | 13.2k | igraph_integer_t i, c; |
819 | 13.2k | igraph_t aggregated_graph, *i_graph; |
820 | 13.2k | igraph_vector_t aggregated_edge_weights; |
821 | 13.2k | igraph_vector_t aggregated_vertex_out_weights, aggregated_vertex_in_weights; |
822 | 13.2k | igraph_vector_int_t aggregated_membership; |
823 | 13.2k | igraph_vector_t *i_edge_weights; |
824 | 13.2k | igraph_vector_t *i_vertex_out_weights, *i_vertex_in_weights; |
825 | 13.2k | igraph_vector_int_t *i_membership; |
826 | 13.2k | igraph_vector_t tmp_edge_weights, tmp_vertex_out_weights, tmp_vertex_in_weights; |
827 | 13.2k | igraph_vector_int_t tmp_membership; |
828 | 13.2k | igraph_vector_int_t refined_membership; |
829 | 13.2k | igraph_vector_int_t aggregate_vertex; |
830 | 13.2k | igraph_vector_int_list_t clusters; |
831 | 13.2k | igraph_inclist_t edges_per_vertex; |
832 | 13.2k | igraph_bool_t continue_clustering; |
833 | 13.2k | igraph_integer_t level = 0; |
834 | | |
835 | | /* Initialize temporary weights and membership to be used in aggregation */ |
836 | 13.2k | IGRAPH_VECTOR_INIT_FINALLY(&tmp_edge_weights, 0); |
837 | 13.2k | IGRAPH_VECTOR_INIT_FINALLY(&tmp_vertex_out_weights, 0); |
838 | 13.2k | if (directed) { |
839 | 6.63k | IGRAPH_VECTOR_INIT_FINALLY(&tmp_vertex_in_weights, 0); |
840 | 6.63k | } |
841 | 13.2k | IGRAPH_VECTOR_INT_INIT_FINALLY(&tmp_membership, 0); |
842 | | |
843 | | /* Initialize clusters */ |
844 | 13.2k | IGRAPH_VECTOR_INT_LIST_INIT_FINALLY(&clusters, n); |
845 | | |
846 | | /* Initialize aggregate vertices, which initially is identical to simply the |
847 | | * vertices in the graph. */ |
848 | 13.2k | IGRAPH_CHECK(igraph_vector_int_init_range(&aggregate_vertex, 0, n)); |
849 | 13.2k | IGRAPH_FINALLY(igraph_vector_int_destroy, &aggregate_vertex); |
850 | | |
851 | | /* Initialize refined membership */ |
852 | 13.2k | IGRAPH_VECTOR_INT_INIT_FINALLY(&refined_membership, 0); |
853 | | |
854 | | /* Initialize aggregated graph */ |
855 | 13.2k | IGRAPH_CHECK(igraph_empty(&aggregated_graph, 0, directed)); |
856 | 13.2k | IGRAPH_FINALLY(igraph_destroy, &aggregated_graph); |
857 | | |
858 | | /* Initialize aggregated edge weights */ |
859 | 13.2k | IGRAPH_VECTOR_INIT_FINALLY(&aggregated_edge_weights, 0); |
860 | | |
861 | | /* Initialize aggregated vertex weights */ |
862 | 13.2k | IGRAPH_VECTOR_INIT_FINALLY(&aggregated_vertex_out_weights, 0); |
863 | 13.2k | if (directed) { |
864 | 6.63k | IGRAPH_VECTOR_INIT_FINALLY(&aggregated_vertex_in_weights, 0); |
865 | 6.63k | } |
866 | | |
867 | | /* Initialize aggregated membership */ |
868 | 13.2k | IGRAPH_VECTOR_INT_INIT_FINALLY(&aggregated_membership, 0); |
869 | | |
870 | | /* Set actual graph, weights and membership to be used. */ |
871 | 13.2k | i_graph = (igraph_t*)graph; |
872 | 13.2k | i_edge_weights = edge_weights; |
873 | 13.2k | i_vertex_out_weights = vertex_out_weights; |
874 | 13.2k | i_vertex_in_weights = directed ? vertex_in_weights : NULL; |
875 | 13.2k | i_membership = membership; |
876 | | |
877 | | /* Clean membership: ensure that cluster indices are 0 <= c < n. */ |
878 | 13.2k | IGRAPH_CHECK(igraph_reindex_membership(i_membership, NULL, nb_clusters)); |
879 | | |
880 | | /* We start out with no changes, whenever a vertex is moved, this will be set to true. */ |
881 | 13.2k | *changed = false; |
882 | 18.7k | do { |
883 | | |
884 | | /* Get incidence list for fast iteration */ |
885 | 18.7k | IGRAPH_CHECK(igraph_inclist_init( i_graph, &edges_per_vertex, IGRAPH_ALL, IGRAPH_LOOPS_TWICE)); |
886 | 18.7k | IGRAPH_FINALLY(igraph_inclist_destroy, &edges_per_vertex); |
887 | | |
888 | | /* Move around the vertices in order to increase the quality */ |
889 | 18.7k | IGRAPH_CHECK(leiden_fastmove_vertices(i_graph, |
890 | 18.7k | &edges_per_vertex, |
891 | 18.7k | i_edge_weights, |
892 | 18.7k | i_vertex_out_weights, i_vertex_in_weights, |
893 | 18.7k | resolution, |
894 | 18.7k | nb_clusters, |
895 | 18.7k | i_membership, |
896 | 18.7k | changed)); |
897 | | |
898 | | /* We only continue clustering if not all clusters are represented by a |
899 | | * single vertex yet |
900 | | */ |
901 | 18.7k | continue_clustering = (*nb_clusters < igraph_vcount(i_graph)); |
902 | | |
903 | 18.7k | if (continue_clustering) { |
904 | | /* Set original membership */ |
905 | 5.50k | if (level > 0) { |
906 | 29.4k | for (i = 0; i < n; i++) { |
907 | 28.8k | igraph_integer_t v_aggregate = VECTOR(aggregate_vertex)[i]; |
908 | 28.8k | VECTOR(*membership)[i] = VECTOR(*i_membership)[v_aggregate]; |
909 | 28.8k | } |
910 | 561 | } |
911 | | |
912 | | /* Get vertex sets for each cluster. */ |
913 | 5.50k | IGRAPH_CHECK(leiden_get_clusters(i_membership, &clusters)); |
914 | | |
915 | | /* Ensure refined membership is correct size */ |
916 | 5.50k | IGRAPH_CHECK(igraph_vector_int_resize(&refined_membership, igraph_vcount(i_graph))); |
917 | | |
918 | | /* Refine each cluster */ |
919 | 5.50k | nb_refined_clusters = 0; |
920 | 238k | for (c = 0; c < *nb_clusters; c++) { |
921 | 233k | igraph_vector_int_t* cluster = igraph_vector_int_list_get_ptr(&clusters, c); |
922 | 233k | IGRAPH_CHECK(leiden_merge_vertices(i_graph, |
923 | 233k | &edges_per_vertex, |
924 | 233k | i_edge_weights, |
925 | 233k | i_vertex_out_weights, i_vertex_in_weights, |
926 | 233k | cluster, i_membership, c, |
927 | 233k | resolution, beta, |
928 | 233k | &nb_refined_clusters, &refined_membership)); |
929 | | /* Empty cluster */ |
930 | 233k | igraph_vector_int_clear(cluster); |
931 | 233k | } |
932 | | |
933 | | /* If refinement didn't aggregate anything, we aggregate on the basis of |
934 | | * the actual clustering */ |
935 | 5.50k | if (nb_refined_clusters >= igraph_vcount(i_graph)) { |
936 | 78 | IGRAPH_CHECK(igraph_vector_int_update(&refined_membership, i_membership)); |
937 | 78 | nb_refined_clusters = *nb_clusters; |
938 | 78 | } |
939 | | |
940 | | /* Keep track of aggregate vertex. */ |
941 | 252k | for (i = 0; i < n; i++) { |
942 | | /* Current aggregate vertex */ |
943 | 246k | igraph_integer_t v_aggregate = VECTOR(aggregate_vertex)[i]; |
944 | | /* New aggregate vertex */ |
945 | 246k | VECTOR(aggregate_vertex)[i] = VECTOR(refined_membership)[v_aggregate]; |
946 | 246k | } |
947 | | |
948 | 5.50k | IGRAPH_CHECK(leiden_aggregate( |
949 | 5.50k | i_graph, |
950 | 5.50k | &edges_per_vertex, |
951 | 5.50k | i_edge_weights, |
952 | 5.50k | i_vertex_out_weights, i_vertex_in_weights, |
953 | 5.50k | i_membership, &refined_membership, nb_refined_clusters, |
954 | 5.50k | &aggregated_graph, |
955 | 5.50k | &tmp_edge_weights, |
956 | 5.50k | &tmp_vertex_out_weights, directed ? &tmp_vertex_in_weights : NULL, |
957 | 5.50k | &tmp_membership)); |
958 | | |
959 | | /* On the lowest level, the actual graph and vertex and edge weights and |
960 | | * membership are used. On higher levels, we will use the aggregated graph |
961 | | * and associated vectors. |
962 | | */ |
963 | 5.50k | if (level == 0) { |
964 | | /* Set actual graph, weights and membership to be used. */ |
965 | 4.94k | i_graph = &aggregated_graph; |
966 | 4.94k | i_edge_weights = &aggregated_edge_weights; |
967 | 4.94k | i_vertex_out_weights = &aggregated_vertex_out_weights; |
968 | 4.94k | if (directed) { |
969 | 894 | i_vertex_in_weights = &aggregated_vertex_in_weights; |
970 | 894 | } |
971 | 4.94k | i_membership = &aggregated_membership; |
972 | 4.94k | } |
973 | | |
974 | | /* Update the aggregated administration. */ |
975 | 5.50k | IGRAPH_CHECK(igraph_vector_update(i_edge_weights, &tmp_edge_weights)); |
976 | 5.50k | IGRAPH_CHECK(igraph_vector_update(i_vertex_out_weights, &tmp_vertex_out_weights)); |
977 | 5.50k | if (directed) { |
978 | 1.03k | IGRAPH_CHECK(igraph_vector_update(i_vertex_in_weights, &tmp_vertex_in_weights)); |
979 | 1.03k | } |
980 | 5.50k | IGRAPH_CHECK(igraph_vector_int_update(i_membership, &tmp_membership)); |
981 | | |
982 | 5.50k | level += 1; |
983 | 5.50k | } |
984 | | |
985 | | /* We are done iterating, so we destroy the incidence list */ |
986 | 18.7k | igraph_inclist_destroy(&edges_per_vertex); |
987 | 18.7k | IGRAPH_FINALLY_CLEAN(1); |
988 | 18.7k | } while (continue_clustering); |
989 | | |
990 | | /* Free aggregated graph and associated vectors */ |
991 | 13.2k | igraph_vector_int_destroy(&aggregated_membership); |
992 | 13.2k | if (directed) igraph_vector_destroy(&aggregated_vertex_in_weights); |
993 | 13.2k | igraph_vector_destroy(&aggregated_vertex_out_weights); |
994 | 13.2k | igraph_vector_destroy(&aggregated_edge_weights); |
995 | 13.2k | igraph_destroy(&aggregated_graph); |
996 | | |
997 | | /* Free remaining memory */ |
998 | 13.2k | igraph_vector_int_destroy(&refined_membership); |
999 | 13.2k | igraph_vector_int_destroy(&aggregate_vertex); |
1000 | 13.2k | igraph_vector_int_list_destroy(&clusters); |
1001 | 13.2k | igraph_vector_int_destroy(&tmp_membership); |
1002 | 13.2k | igraph_vector_destroy(&tmp_vertex_out_weights); |
1003 | 13.2k | if (directed) igraph_vector_destroy(&tmp_vertex_in_weights); |
1004 | 13.2k | igraph_vector_destroy(&tmp_edge_weights); |
1005 | | |
1006 | 13.2k | if (directed) { |
1007 | 6.63k | IGRAPH_FINALLY_CLEAN(12); |
1008 | 6.63k | } else { |
1009 | 6.63k | IGRAPH_FINALLY_CLEAN(10); |
1010 | 6.63k | } |
1011 | | |
1012 | | /* Calculate quality */ |
1013 | 13.2k | if (quality) { |
1014 | 13.2k | IGRAPH_CHECK(leiden_quality(graph, |
1015 | 13.2k | edge_weights, vertex_out_weights, vertex_in_weights, |
1016 | 13.2k | membership, |
1017 | 13.2k | *nb_clusters, resolution, |
1018 | 13.2k | quality)); |
1019 | 13.2k | } |
1020 | | |
1021 | 13.2k | return IGRAPH_SUCCESS; |
1022 | 13.2k | } |
1023 | | |
1024 | | /** |
1025 | | * \ingroup communities |
1026 | | * \function igraph_community_leiden |
1027 | | * \brief Finding community structure using the Leiden algorithm. |
1028 | | * |
1029 | | * This function implements the Leiden algorithm for finding community |
1030 | | * structure. |
1031 | | * |
1032 | | * </para><para> |
1033 | | * It is similar to the multilevel algorithm, often called the Louvain |
1034 | | * algorithm, but it is faster and yields higher quality solutions. It can |
1035 | | * optimize both modularity and the Constant Potts Model, which does not suffer |
1036 | | * from the resolution-limit (see Traag, Van Dooren & Nesterov). |
1037 | | * |
1038 | | * </para><para> |
1039 | | * The Leiden algorithm consists of three phases: (1) local moving of vertices, (2) |
1040 | | * refinement of the partition and (3) aggregation of the network based on the |
1041 | | * refined partition, using the non-refined partition to create an initial |
1042 | | * partition for the aggregate network. In the local move procedure in the |
1043 | | * Leiden algorithm, only vertices whose neighborhood has changed are visited. Only |
1044 | | * moves that strictly improve the quality function are made. The refinement is |
1045 | | * done by restarting from a singleton partition within each cluster and |
1046 | | * gradually merging the subclusters. When aggregating, a single cluster may |
1047 | | * then be represented by several vertices (which are the subclusters identified in |
1048 | | * the refinement). |
1049 | | * |
1050 | | * </para><para> |
1051 | | * The Leiden algorithm provides several guarantees. The Leiden algorithm is |
1052 | | * typically iterated: the output of one iteration is used as the input for the |
1053 | | * next iteration. At each iteration all clusters are guaranteed to be (weakly) |
1054 | | * connected and well-separated. After an iteration in which nothing has |
1055 | | * changed, all vertices and some parts are guaranteed to be locally optimally |
1056 | | * assigned. Note that even if a single iteration did not result in any change, |
1057 | | * it is still possible that a subsequent iteration might find some |
1058 | | * improvement. Each iteration explores different subsets of vertices to consider |
1059 | | * for moving from one cluster to another. Finally, asymptotically, all subsets |
1060 | | * of all clusters are guaranteed to be locally optimally assigned. For more |
1061 | | * details, please see Traag, Waltman & van Eck (2019). |
1062 | | * |
1063 | | * </para><para> |
1064 | | * The objective function being optimized is |
1065 | | * |
1066 | | * </para><para> |
1067 | | * <code>1 / 2m sum_ij (A_ij - γ n_i n_j) δ(s_i, s_j)</code> |
1068 | | * |
1069 | | * </para><para> |
1070 | | * in the undirected case and |
1071 | | * |
1072 | | * </para><para> |
1073 | | * <code>1 / m sum_ij (A_ij - γ n^out_i n^in_j) δ(s_i, s_j)</code> |
1074 | | * |
1075 | | * </para><para> |
1076 | | * in the directed case. |
1077 | | * Here \c m is the total edge weight, <code>A_ij</code> is the weight of edge |
1078 | | * (i, j), \c γ is the so-called resolution parameter, <code>n_i</code> |
1079 | | * is the vertex weight of vertex \c i (separate out- and in-weights are used |
1080 | | * with directed graphs), <code>s_i</code> is the cluster of vertex |
1081 | | * \c i and <code>δ(x, y) = 1</code> if and only if <code>x = y</code> and 0 |
1082 | | * otherwise. |
1083 | | * |
1084 | | * </para><para> |
1085 | | * By setting <code>n_i = k_i</code>, the degree of vertex \c i, and |
1086 | | * dividing \c γ by <code>2m</code> (by \c m in the directed case), we effectively |
1087 | | * obtain an expression for modularity. Hence, the standard modularity will be |
1088 | | * optimized when you supply the degrees (out- and in-degrees with directed graphs) |
1089 | | * as the vertex weights and by supplying as a resolution parameter |
1090 | | * <code>1/(2m)</code> (<code>1/m</code> with directed graphs). |
1091 | | * Use the \ref igraph_community_leiden_simple() convenience function to |
1092 | | * compute vertex weights automatically for modularity maximization. |
1093 | | * |
1094 | | * </para><para> |
1095 | | * References: |
1096 | | * |
1097 | | * </para><para> |
1098 | | * V. A. Traag, L. Waltman, N. J. van Eck: |
1099 | | * From Louvain to Leiden: guaranteeing well-connected communities. |
1100 | | * Scientific Reports, 9(1), 5233 (2019). |
1101 | | * http://dx.doi.org/10.1038/s41598-019-41695-z |
1102 | | * |
1103 | | * </para><para> |
1104 | | * V. A. Traag, P. Van Dooren, and Y. Nesterov: |
1105 | | * Narrow scope for resolution-limit-free community detection. |
1106 | | * Phys. Rev. E 84, 016114 (2011). |
1107 | | * https://doi.org/10.1103/PhysRevE.84.016114 |
1108 | | * |
1109 | | * \param graph The input graph. |
1110 | | * \param edge_weights Numeric vector containing edge weights. If \c NULL, |
1111 | | * every edge has equal weight of 1. The weights need not be non-negative. |
1112 | | * \param vertex_out_weights Numeric vector containing vertex weights, or vertex |
1113 | | * out-weights for directed graphs. If \c NULL, every vertex has equal |
1114 | | * weight of 1. |
1115 | | * \param vertex_in_weights Numeric vector containing vertex in-weights for |
1116 | | * directed graphs. If set to \c NULL, in-weights are assumed to be the same |
1117 | | * as out-weights, which effectively ignores edge directions. |
1118 | | * Must be \c NULL for undirected graphs. |
1119 | | * \param n_iterations Iterate the core Leiden algorithm the indicated number |
1120 | | * of times. If this is a negative number, it will continue iterating until |
1121 | | * an iteration did not change the clustering. Two iterations are often |
1122 | | * sufficient, thus 2 is a reasonable default. |
1123 | | * \param beta The randomness used in the refinement step when merging. A small |
1124 | | * amount of randomness (\c beta = 0.01) typically works well. |
1125 | | * \param start Start from membership vector. If this is true, the optimization |
1126 | | * will start from the provided membership vector. If this is false, the |
1127 | | * optimization will start from a singleton partition. |
1128 | | * \param n_iterations Iterate the core Leiden algorithm for the indicated number |
1129 | | * of times. If this is a negative number, it will continue iterating until |
1130 | | * an iteration did not change the clustering. |
1131 | | * \param membership The membership vector. This is both used as the initial |
1132 | | * membership from which optimisation starts and is updated in place. It |
1133 | | * must hence be properly initialized. When finding clusters from scratch it |
1134 | | * is typically started using a singleton clustering. This can be achieved |
1135 | | * using \ref igraph_vector_int_init_range(). |
1136 | | * \param nb_clusters The number of clusters contained in the final \p membership. |
1137 | | * If \c NULL, the number of clusters will not be returned. |
1138 | | * \param quality The quality of the partition, in terms of the objective |
1139 | | * function as included in the documentation. If \c NULL the quality will |
1140 | | * not be calculated. |
1141 | | * \return Error code. |
1142 | | * |
1143 | | * Time complexity: near linear on sparse graphs. |
1144 | | * |
1145 | | * \sa \ref igraph_community_leiden_simple() for a simplified interface |
1146 | | * that allows specifying an objective function directly and does not require |
1147 | | * vertex weights. |
1148 | | * |
1149 | | * \example examples/simple/igraph_community_leiden.c |
1150 | | */ |
1151 | | igraph_error_t igraph_community_leiden( |
1152 | | const igraph_t *graph, |
1153 | | const igraph_vector_t *edge_weights, |
1154 | | const igraph_vector_t *vertex_out_weights, |
1155 | | const igraph_vector_t *vertex_in_weights, |
1156 | | igraph_real_t resolution, |
1157 | | igraph_real_t beta, |
1158 | | igraph_bool_t start, |
1159 | | igraph_integer_t n_iterations, |
1160 | | igraph_vector_int_t *membership, |
1161 | | igraph_integer_t *nb_clusters, |
1162 | 6.63k | igraph_real_t *quality) { |
1163 | | |
1164 | 6.63k | const igraph_integer_t vcount = igraph_vcount(graph); |
1165 | 6.63k | const igraph_integer_t ecount = igraph_ecount(graph); |
1166 | 6.63k | const igraph_bool_t directed = igraph_is_directed(graph); |
1167 | 6.63k | igraph_vector_t *i_edge_weights, *i_vertex_out_weights, *i_vertex_in_weights; |
1168 | 6.63k | igraph_integer_t i_nb_clusters; |
1169 | | |
1170 | 6.63k | if (!nb_clusters) { |
1171 | 0 | nb_clusters = &i_nb_clusters; |
1172 | 0 | } |
1173 | | |
1174 | 6.63k | if (start) { |
1175 | 0 | if (!membership) { |
1176 | 0 | IGRAPH_ERROR("Cannot start optimization if membership is missing.", IGRAPH_EINVAL); |
1177 | 0 | } |
1178 | | |
1179 | 0 | if (igraph_vector_int_size(membership) != vcount) { |
1180 | 0 | IGRAPH_ERROR("Membership vector length does not equal the number of vertices.", IGRAPH_EINVAL); |
1181 | 0 | } |
1182 | 6.63k | } else { |
1183 | 6.63k | if (!membership) |
1184 | 0 | IGRAPH_ERROR("Membership vector should be supplied and initialized, " |
1185 | 6.63k | "even when not starting optimization from it.", IGRAPH_EINVAL); |
1186 | | |
1187 | 6.63k | IGRAPH_CHECK(igraph_vector_int_range(membership, 0, vcount)); |
1188 | 6.63k | } |
1189 | | |
1190 | | /* Check edge weights to possibly use default. */ |
1191 | 6.63k | if (!edge_weights) { |
1192 | 0 | i_edge_weights = IGRAPH_CALLOC(1, igraph_vector_t); |
1193 | 0 | IGRAPH_CHECK_OOM(i_edge_weights, "Leiden algorithm failed, could not allocate memory for edge weights."); |
1194 | 0 | IGRAPH_FINALLY(igraph_free, i_edge_weights); |
1195 | 0 | IGRAPH_CHECK(igraph_vector_init(i_edge_weights, igraph_ecount(graph))); |
1196 | 0 | IGRAPH_FINALLY(igraph_vector_destroy, i_edge_weights); |
1197 | 0 | igraph_vector_fill(i_edge_weights, 1); |
1198 | 6.63k | } else { |
1199 | 6.63k | if (igraph_vector_size(edge_weights) != ecount) { |
1200 | 0 | IGRAPH_ERRORF("Edge weight vector length (%" IGRAPH_PRId ") does not match number of edges (%" IGRAPH_PRId ").", |
1201 | 0 | IGRAPH_EINVAL, igraph_vector_size(edge_weights), ecount); |
1202 | 0 | } |
1203 | 6.63k | i_edge_weights = (igraph_vector_t*)edge_weights; |
1204 | 6.63k | } |
1205 | | |
1206 | | /* Check vertex out-weights to possibly use default. */ |
1207 | 6.63k | if (!vertex_out_weights) { |
1208 | 6.63k | i_vertex_out_weights = IGRAPH_CALLOC(1, igraph_vector_t); |
1209 | 6.63k | IGRAPH_CHECK_OOM(i_vertex_out_weights, "Leiden algorithm failed, could not allocate memory for vertex weights."); |
1210 | 6.63k | IGRAPH_FINALLY(igraph_free, i_vertex_out_weights); |
1211 | 6.63k | IGRAPH_VECTOR_INIT_FINALLY(i_vertex_out_weights, vcount); |
1212 | 6.63k | igraph_vector_fill(i_vertex_out_weights, 1); |
1213 | 6.63k | } else { |
1214 | 0 | if (igraph_vector_size(vertex_out_weights) != vcount) { |
1215 | 0 | IGRAPH_ERRORF("Vertex %sweight vector length (%" IGRAPH_PRId ") does not match number of vertices (%" IGRAPH_PRId ").", |
1216 | 0 | IGRAPH_EINVAL, |
1217 | 0 | directed ? "out-" : "", |
1218 | 0 | igraph_vector_size(vertex_out_weights), vcount); |
1219 | 0 | } |
1220 | 0 | i_vertex_out_weights = (igraph_vector_t*)vertex_out_weights; |
1221 | 0 | } |
1222 | | |
1223 | 6.63k | if (directed) { |
1224 | | /* When in-weights are not given for a directed graph, |
1225 | | * assume that they are the same as the out-weights. |
1226 | | * This effectively ignores edge directions. */ |
1227 | 3.31k | if (vertex_in_weights) { |
1228 | 0 | if (igraph_vector_size(vertex_in_weights) != vcount) { |
1229 | 0 | IGRAPH_ERRORF("Vertex in-weight vector length (%" IGRAPH_PRId ") does not match number of vertices (%" IGRAPH_PRId ").", |
1230 | 0 | IGRAPH_EINVAL, |
1231 | 0 | igraph_vector_size(vertex_in_weights), vcount); |
1232 | 0 | } |
1233 | 0 | i_vertex_in_weights = (igraph_vector_t*)vertex_in_weights; |
1234 | 3.31k | } else { |
1235 | 3.31k | i_vertex_in_weights = i_vertex_out_weights; |
1236 | 3.31k | } |
1237 | 3.31k | } else { |
1238 | | /* In-weights must be NULL in the undirected case. */ |
1239 | 3.31k | if (vertex_in_weights) { |
1240 | 0 | IGRAPH_ERROR("Vertex in-weights must not be given for undirected graphs.", IGRAPH_EINVAL); |
1241 | 3.31k | } else { |
1242 | 3.31k | i_vertex_in_weights = NULL; |
1243 | 3.31k | } |
1244 | 3.31k | } |
1245 | | |
1246 | | /* Perform actual Leiden algorithm iteratively. We either |
1247 | | * perform a fixed number of iterations, or we perform |
1248 | | * iterations until the quality remains unchanged. Even if |
1249 | | * a single iteration did not change anything, a subsequent |
1250 | | * iteration may still find some improvement. This is because |
1251 | | * each iteration explores different subsets of vertices. |
1252 | | */ |
1253 | 6.63k | igraph_bool_t changed = true; |
1254 | 6.63k | for (igraph_integer_t itr = 0; |
1255 | 19.9k | n_iterations < 0 ? changed : itr < n_iterations; |
1256 | 13.2k | itr++) { |
1257 | 13.2k | IGRAPH_CHECK(community_leiden(graph, |
1258 | 13.2k | i_edge_weights, i_vertex_out_weights, i_vertex_in_weights, |
1259 | 13.2k | resolution, beta, |
1260 | 13.2k | membership, nb_clusters, quality, &changed)); |
1261 | 13.2k | } |
1262 | | |
1263 | 6.63k | if (!edge_weights) { |
1264 | 0 | igraph_vector_destroy(i_edge_weights); |
1265 | 0 | IGRAPH_FREE(i_edge_weights); |
1266 | 0 | IGRAPH_FINALLY_CLEAN(2); |
1267 | 0 | } |
1268 | | |
1269 | 6.63k | if (!vertex_out_weights) { |
1270 | 6.63k | igraph_vector_destroy(i_vertex_out_weights); |
1271 | 6.63k | IGRAPH_FREE(i_vertex_out_weights); |
1272 | 6.63k | IGRAPH_FINALLY_CLEAN(2); |
1273 | 6.63k | } |
1274 | | |
1275 | 6.63k | return IGRAPH_SUCCESS; |
1276 | 6.63k | } |
1277 | | |
1278 | | /** |
1279 | | * \function igraph_community_leiden_simple |
1280 | | * \brief Finding community structure using the Leiden algorithm, simple interface. |
1281 | | * |
1282 | | * This is a simplified interface to \ref igraph_community_leiden() for |
1283 | | * convenience purposes. Instead of requiring vertex weights, it allows |
1284 | | * choosing from a set of objective functions to maximize. It implements |
1285 | | * these objective functions by passing suitable vertex weights to |
1286 | | * \ref igraph_community_leiden(), as explained in the documentation of |
1287 | | * that function. |
1288 | | * |
1289 | | * \param graph The input graph. May be directed or undirected. |
1290 | | * \param weights The edge weights. If \c NULL, all weights are assumed to be 1. |
1291 | | * \param objective The objective function to maximize. |
1292 | | * \clist |
1293 | | * \cli IGRAPH_LEIDEN_OBJECTIVE_MODULARITY |
1294 | | * Use the generalized modularity, defined as |
1295 | | * <code>Q = 1/(2m) sum_ij (A_ij - γ k_i k_j / (2m)) δ(c_i, c_j)</code> |
1296 | | * for undirected graphs and as |
1297 | | * <code>Q = 1/m sum_ij (A_ij - γ k^out_i k^in_j / m) δ(c_i, c_j)</code> |
1298 | | * for directed graphs. This effectively uses a multigraph configuration |
1299 | | * model as the null model. Edge weights must not be negative. |
1300 | | * \cli IGRAPH_LEIDEN_OBJECTIVE_CPM |
1301 | | * Use the constant Potts model, whose objective function is defined as |
1302 | | * <code>Q = 1/(2m) sum_ij (A_ij - γ) δ(c_i, c_j)</code> |
1303 | | * for undirected graphs and as |
1304 | | * <code>Q = 1/m sum_ij (A_ij - γ) δ(c_i, c_j)</code> |
1305 | | * for directed graphs. Edge weights are allowed to be negative. |
1306 | | * Edge directions have no impact on the result. |
1307 | | * \cli IGRAPH_LEIDEN_OBJECTIVE_ER |
1308 | | * Use an objective function based on the multigraph Erdős-Rényi G(n,p) |
1309 | | * null model, defined as |
1310 | | * <code>Q = 1/(2m) sum_ij (A_ij - γ p) δ(c_i, c_j)</code> |
1311 | | * for undirected graphs and as |
1312 | | * <code>Q = 1/m sum_ij (A_ij - γ p) δ(c_i, c_j)</code> |
1313 | | * for directed graphs. \c p is the weighted density, i.e. the average |
1314 | | * link strength between all vertex pairs (whether adjacent or not). |
1315 | | * Edge weights must not be negative. Edge directions have no impact on |
1316 | | * the result. |
1317 | | * \endclist |
1318 | | * In the above formulas, \c A is the adjacency matrix, \c m is the total |
1319 | | * edge weight, \c k are the (out- and in-) degrees, \c γ is the resolution |
1320 | | * parameter, and <code>δ(c_i, c_j)</code> is 1 if vertices \c i and \c j |
1321 | | * are in the same community and 0 otherwise. Edge directions are only |
1322 | | * relevant with \c IGRAPH_LEIDEN_OBJECTIVE_MODULARITY. The other two |
1323 | | * objective functions are equivalent between directed and undirected graphs: |
1324 | | * the formal difference is due to each edge being included twice in |
1325 | | * undirected (symmetric) adjacency matrices. |
1326 | | * \param resolution The resolution parameter, which is represented by γ in |
1327 | | * the objective functions detailed above. |
1328 | | * \param beta The randomness used in the refinement step when merging. A small |
1329 | | * amount of randomness (\c beta = 0.01) typically works well. |
1330 | | * \param start Start from membership vector. If this is true, the optimization |
1331 | | * will start from the provided membership vector. If this is false, the |
1332 | | * optimization will start from a singleton partition. |
1333 | | * \param n_iterations Iterate the core Leiden algorithm the indicated number |
1334 | | * of times. If this is a negative number, it will continue iterating until |
1335 | | * an iteration did not change the clustering. Two iterations are often |
1336 | | * sufficient, thus 2 is a reasonable default. |
1337 | | * \param membership The membership vector. If \p start is set to \c false, |
1338 | | * it will be resized appropriately. If \p start is \c true, it must be |
1339 | | * a valid membership vector for the given \p graph. |
1340 | | * \param nb_clusters The number of clusters contained in the final \p membership. |
1341 | | * If \c NULL, the number of clusters will not be returned. |
1342 | | * \param quality The quality of the partition, in terms of the objective |
1343 | | * function selected by \p objective. If \c NULL the quality will |
1344 | | * not be calculated. |
1345 | | * \return Error code. |
1346 | | * |
1347 | | * Time complexity: near linear on sparse graphs. |
1348 | | * |
1349 | | * \sa \ref igraph_community_leiden() for a more flexible interface that |
1350 | | * allows specifying raw vertex weights. |
1351 | | */ |
1352 | | igraph_error_t igraph_community_leiden_simple( |
1353 | | const igraph_t *graph, |
1354 | | const igraph_vector_t *weights, |
1355 | | igraph_leiden_objective_t objective, |
1356 | | igraph_real_t resolution, |
1357 | | igraph_real_t beta, |
1358 | | igraph_bool_t start, |
1359 | | igraph_integer_t n_iterations, |
1360 | | igraph_vector_int_t *membership, |
1361 | | igraph_integer_t *nb_clusters, |
1362 | 0 | igraph_real_t *quality) { |
1363 | |
|
1364 | 0 | const igraph_integer_t vcount = igraph_vcount(graph); |
1365 | 0 | const igraph_integer_t ecount = igraph_ecount(graph); |
1366 | 0 | const igraph_bool_t directed = igraph_is_directed(graph); |
1367 | 0 | igraph_vector_t vertex_out_weights, vertex_in_weights; |
1368 | 0 | igraph_vector_int_t i_membership, *p_membership; |
1369 | 0 | igraph_real_t min_weight = IGRAPH_INFINITY; |
1370 | | |
1371 | | /* Basic weight vector validation, calculate properties used for validation steps |
1372 | | * specific to different objective functions. */ |
1373 | 0 | if (weights) { |
1374 | 0 | if (igraph_vector_size(weights) != ecount) { |
1375 | 0 | IGRAPH_ERROR("Edge weight vector length does not match number of edges.", IGRAPH_EINVAL); |
1376 | 0 | } |
1377 | 0 | for (igraph_integer_t i=0; i < ecount; i++) { |
1378 | 0 | igraph_real_t w = VECTOR(*weights)[i]; |
1379 | 0 | if (w < min_weight) { |
1380 | 0 | min_weight = w; |
1381 | 0 | } |
1382 | 0 | if (! isfinite(w)) { |
1383 | 0 | IGRAPH_ERRORF("Edge weights must not be infinite or NaN, got %g.", |
1384 | 0 | IGRAPH_EINVAL, w); |
1385 | 0 | } |
1386 | 0 | } |
1387 | 0 | } |
1388 | | |
1389 | 0 | IGRAPH_VECTOR_INIT_FINALLY(&vertex_out_weights, vcount); |
1390 | 0 | if (directed) { |
1391 | 0 | IGRAPH_VECTOR_INIT_FINALLY(&vertex_in_weights, vcount); |
1392 | 0 | } |
1393 | | |
1394 | | /* igraph_community_leiden() always requires an initialized membership vector |
1395 | | * of the correct size to be given. We relax this requirement to the case |
1396 | | * when start = true. */ |
1397 | 0 | if (start) { |
1398 | 0 | if (!membership) { |
1399 | 0 | IGRAPH_ERROR("Requesting to start the computation from a specific " |
1400 | 0 | "community assignment, but no membership vector given.", |
1401 | 0 | IGRAPH_EINVAL); |
1402 | 0 | } |
1403 | 0 | if (igraph_vector_int_size(membership) != vcount) { |
1404 | 0 | IGRAPH_ERRORF("Requesting to start the computation from a specific " |
1405 | 0 | "community assignment, but the given membership vector " |
1406 | 0 | "has a different size (%" IGRAPH_PRId " than the vertex " |
1407 | 0 | "count (%" IGRAPH_PRId ").", |
1408 | 0 | IGRAPH_EINVAL, |
1409 | 0 | igraph_vector_int_size(membership), vcount); |
1410 | 0 | } |
1411 | 0 | p_membership = membership; |
1412 | 0 | } else { |
1413 | 0 | if (!membership) { |
1414 | 0 | IGRAPH_VECTOR_INT_INIT_FINALLY(&i_membership, vcount); |
1415 | 0 | p_membership = &i_membership; |
1416 | 0 | } else { |
1417 | 0 | IGRAPH_CHECK(igraph_vector_int_resize(membership, vcount)); |
1418 | 0 | p_membership = membership; |
1419 | 0 | } |
1420 | 0 | } |
1421 | | |
1422 | 0 | switch (objective) { |
1423 | 0 | case IGRAPH_LEIDEN_OBJECTIVE_MODULARITY: |
1424 | 0 | if (min_weight < 0) { |
1425 | 0 | IGRAPH_ERRORF("Edge weights must not be negative for Leiden community " |
1426 | 0 | "detection with modularity objective function, got %g.", |
1427 | 0 | IGRAPH_EINVAL, |
1428 | 0 | min_weight); |
1429 | 0 | } |
1430 | | |
1431 | 0 | IGRAPH_CHECK(igraph_strength( |
1432 | 0 | graph, &vertex_out_weights, |
1433 | 0 | igraph_vss_all(), IGRAPH_OUT, IGRAPH_LOOPS, weights)); |
1434 | 0 | if (directed) { |
1435 | 0 | IGRAPH_CHECK(igraph_strength( |
1436 | 0 | graph, &vertex_in_weights, |
1437 | 0 | igraph_vss_all(), IGRAPH_IN, IGRAPH_LOOPS, weights)); |
1438 | 0 | } |
1439 | | |
1440 | | /* If directed, the sum of vertex_out_weights is the total edge weight. |
1441 | | * If undirected, it is twice the total edge weight. */ |
1442 | 0 | resolution /= igraph_vector_sum(&vertex_out_weights); |
1443 | |
|
1444 | 0 | break; |
1445 | | |
1446 | 0 | case IGRAPH_LEIDEN_OBJECTIVE_CPM: |
1447 | | /* TODO: Potential minor optimization is to use the same vector for both. */ |
1448 | 0 | igraph_vector_fill(&vertex_out_weights, 1); |
1449 | 0 | if (directed) { |
1450 | 0 | igraph_vector_fill(&vertex_in_weights, 1); |
1451 | 0 | } |
1452 | |
|
1453 | 0 | break; |
1454 | | |
1455 | 0 | case IGRAPH_LEIDEN_OBJECTIVE_ER: |
1456 | 0 | if (min_weight < 0) { |
1457 | 0 | IGRAPH_ERRORF("Edge weights must not be negative for Leiden community " |
1458 | 0 | "detection with ER objective function, got %g.", |
1459 | 0 | IGRAPH_EINVAL, |
1460 | 0 | min_weight); |
1461 | 0 | } |
1462 | | |
1463 | | /* TODO: Potential minor optimization is to use the same vector for both. */ |
1464 | 0 | igraph_vector_fill(&vertex_out_weights, 1); |
1465 | 0 | if (directed) { |
1466 | 0 | igraph_vector_fill(&vertex_in_weights, 1); |
1467 | 0 | } |
1468 | |
|
1469 | 0 | { |
1470 | 0 | igraph_real_t p; |
1471 | | /* Note: Loops must be allowed, as the aggregation step of the |
1472 | | * algorithm effectively creates them. */ |
1473 | 0 | IGRAPH_CHECK(igraph_density(graph, weights, &p, /* loops */ true)); |
1474 | 0 | resolution *= p; |
1475 | 0 | } |
1476 | | |
1477 | 0 | break; |
1478 | | |
1479 | | |
1480 | 0 | default: |
1481 | 0 | IGRAPH_ERROR("Invalid objective function for Leiden community detection.", |
1482 | 0 | IGRAPH_EINVAL); |
1483 | 0 | } |
1484 | | |
1485 | 0 | IGRAPH_CHECK(igraph_community_leiden( |
1486 | 0 | graph, weights, |
1487 | 0 | &vertex_out_weights, directed ? &vertex_in_weights : NULL, |
1488 | 0 | resolution, beta, start, n_iterations, p_membership, nb_clusters, quality)); |
1489 | | |
1490 | 0 | if (!membership) { |
1491 | 0 | igraph_vector_int_destroy(&i_membership); |
1492 | 0 | IGRAPH_FINALLY_CLEAN(1); |
1493 | 0 | } |
1494 | |
|
1495 | 0 | if (directed) { |
1496 | 0 | igraph_vector_destroy(&vertex_in_weights); |
1497 | 0 | IGRAPH_FINALLY_CLEAN(1); |
1498 | 0 | } |
1499 | 0 | igraph_vector_destroy(&vertex_out_weights); |
1500 | 0 | IGRAPH_FINALLY_CLEAN(1); |
1501 | |
|
1502 | 0 | return IGRAPH_SUCCESS; |
1503 | 0 | } |