Coverage Report

Created: 2025-08-25 07:03

/src/igraph/src/community/leiden.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
   IGraph library.
3
   Copyright (C) 2020-2025  The igraph development team <igraph@igraph.org>
4
5
   This program is free software; you can redistribute it and/or modify
6
   it under the terms of the GNU General Public License as published by
7
   the Free Software Foundation; either version 2 of the License, or
8
   (at your option) any later version.
9
10
   This program is distributed in the hope that it will be useful,
11
   but WITHOUT ANY WARRANTY; without even the implied warranty of
12
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
   GNU General Public License for more details.
14
15
   You should have received a copy of the GNU General Public License
16
   along with this program.  If not, see <https://www.gnu.org/licenses/>.
17
*/
18
19
#include "igraph_community.h"
20
21
#include "igraph_adjlist.h"
22
#include "igraph_bitset.h"
23
#include "igraph_constructors.h"
24
#include "igraph_dqueue.h"
25
#include "igraph_interface.h"
26
#include "igraph_memory.h"
27
#include "igraph_random.h"
28
#include "igraph_stack.h"
29
#include "igraph_structural.h"
30
#include "igraph_vector.h"
31
#include "igraph_vector_list.h"
32
33
#include "core/interruption.h"
34
35
/* Move vertices in order to improve the quality of a partition.
36
 *
37
 * This function considers each vertex and greedily moves it to a neighboring
38
 * community that maximizes the improvement in the quality of a partition.
39
 * Only moves that strictly improve the quality are considered.
40
 *
41
 * The vertices are examined in a queue, and initially all vertices are put in the
42
 * queue in a random order. Vertices are popped from the queue when they are
43
 * examined, and only neighbors of vertices that are moved (which are not part of
44
 * the cluster the vertex was moved to) are pushed to the queue again.
45
 *
46
 * The \p membership vector is used as the starting point to move around vertices,
47
 * and is updated in-place.
48
 *
49
 */
50
static igraph_error_t leiden_fastmove_vertices(
51
        const igraph_t *graph,
52
        const igraph_inclist_t *edges_per_vertex,
53
        const igraph_vector_t *edge_weights,
54
        const igraph_vector_t *vertex_out_weights,
55
        const igraph_vector_t *vertex_in_weights,
56
        const igraph_real_t resolution,
57
        igraph_integer_t *nb_clusters,
58
        igraph_vector_int_t *membership,
59
18.7k
        igraph_bool_t *changed) {
60
61
18.7k
    const igraph_integer_t n = igraph_vcount(graph);
62
18.7k
    const igraph_bool_t directed = (vertex_in_weights != NULL);
63
18.7k
    igraph_dqueue_int_t unstable_vertices;
64
18.7k
    igraph_real_t max_diff, diff;
65
18.7k
    igraph_bitset_t neighbor_cluster_added, vertex_is_stable;
66
18.7k
    igraph_vector_t cluster_out_weights, cluster_in_weights;
67
18.7k
    igraph_vector_t edge_weights_per_cluster;
68
18.7k
    igraph_vector_int_t neighbor_clusters;
69
18.7k
    igraph_vector_int_t vertex_order;
70
18.7k
    igraph_vector_int_t nb_vertices_per_cluster;
71
18.7k
    igraph_stack_int_t empty_clusters;
72
18.7k
    igraph_integer_t c, nb_neigh_clusters;
73
18.7k
    int iter = 0;
74
75
    /* Initialize queue of unstable vertices and whether vertex is stable. Only
76
     * unstable vertices are in the queue. */
77
18.7k
    IGRAPH_BITSET_INIT_FINALLY(&vertex_is_stable, n);
78
79
18.7k
    IGRAPH_DQUEUE_INT_INIT_FINALLY(&unstable_vertices, n);
80
81
    /* Shuffle vertices */
82
18.7k
    IGRAPH_CHECK(igraph_vector_int_init_range(&vertex_order, 0, n));
83
18.7k
    IGRAPH_FINALLY(igraph_vector_int_destroy, &vertex_order);
84
18.7k
    igraph_vector_int_shuffle(&vertex_order);
85
86
    /* Add to the queue */
87
840k
    for (igraph_integer_t i = 0; i < n; i++) {
88
821k
        IGRAPH_CHECK(igraph_dqueue_int_push(&unstable_vertices, VECTOR(vertex_order)[i]));
89
821k
    }
90
91
    /* Initialize cluster weights and nb vertices */
92
18.7k
    IGRAPH_VECTOR_INIT_FINALLY(&cluster_out_weights, n);
93
18.7k
    if (directed) {
94
7.67k
        IGRAPH_VECTOR_INIT_FINALLY(&cluster_in_weights, n);
95
7.67k
    }
96
18.7k
    IGRAPH_VECTOR_INT_INIT_FINALLY(&nb_vertices_per_cluster, n);
97
840k
    for (igraph_integer_t i = 0; i < n; i++) {
98
821k
        c = VECTOR(*membership)[i];
99
821k
        VECTOR(cluster_out_weights)[c] += VECTOR(*vertex_out_weights)[i];
100
821k
        if (directed) {
101
334k
            VECTOR(cluster_in_weights)[c] += VECTOR(*vertex_in_weights)[i];
102
334k
        }
103
821k
        VECTOR(nb_vertices_per_cluster)[c] += 1;
104
821k
    }
105
106
    /* Initialize empty clusters */
107
18.7k
    IGRAPH_STACK_INT_INIT_FINALLY(&empty_clusters, n);
108
840k
    for (c = 0; c < n; c++) {
109
821k
        if (VECTOR(nb_vertices_per_cluster)[c] == 0) {
110
6.11k
            IGRAPH_CHECK(igraph_stack_int_push(&empty_clusters, c));
111
6.11k
        }
112
821k
    }
113
114
    /* Initialize vectors to be used in calculating differences */
115
18.7k
    IGRAPH_VECTOR_INIT_FINALLY(&edge_weights_per_cluster, n);
116
117
    /* Initialize neighboring cluster */
118
18.7k
    IGRAPH_BITSET_INIT_FINALLY(&neighbor_cluster_added, n);
119
18.7k
    IGRAPH_VECTOR_INT_INIT_FINALLY(&neighbor_clusters, n);
120
121
    /* Iterate while the queue is not empty */
122
846k
    while (!igraph_dqueue_int_empty(&unstable_vertices)) {
123
827k
        igraph_integer_t v = igraph_dqueue_int_pop(&unstable_vertices);
124
827k
        igraph_integer_t best_cluster, current_cluster = VECTOR(*membership)[v];
125
827k
        igraph_integer_t degree;
126
827k
        igraph_vector_int_t *edges;
127
128
        /* Remove vertex from current cluster */
129
827k
        VECTOR(cluster_out_weights)[current_cluster] -= VECTOR(*vertex_out_weights)[v];
130
827k
        if (directed) {
131
335k
            VECTOR(cluster_in_weights)[current_cluster] -= VECTOR(*vertex_in_weights)[v];
132
335k
        }
133
827k
        VECTOR(nb_vertices_per_cluster)[current_cluster]--;
134
827k
        if (VECTOR(nb_vertices_per_cluster)[current_cluster] == 0) {
135
810k
            IGRAPH_CHECK(igraph_stack_int_push(&empty_clusters, current_cluster));
136
810k
        }
137
138
        /* Find out neighboring clusters */
139
827k
        c = igraph_stack_int_top(&empty_clusters);
140
827k
        VECTOR(neighbor_clusters)[0] = c;
141
827k
        IGRAPH_BIT_SET(neighbor_cluster_added, c);
142
827k
        nb_neigh_clusters = 1;
143
144
        /* Determine the edge weight to each neighboring cluster */
145
827k
        edges = igraph_inclist_get(edges_per_vertex, v);
146
827k
        degree = igraph_vector_int_size(edges);
147
1.67M
        for (igraph_integer_t i = 0; i < degree; i++) {
148
844k
            igraph_integer_t e = VECTOR(*edges)[i];
149
844k
            igraph_integer_t u = IGRAPH_OTHER(graph, e, v);
150
844k
            if (u != v) {
151
747k
                c = VECTOR(*membership)[u];
152
747k
                if (!IGRAPH_BIT_TEST(neighbor_cluster_added, c)) {
153
674k
                    IGRAPH_BIT_SET(neighbor_cluster_added, c);
154
674k
                    VECTOR(neighbor_clusters)[nb_neigh_clusters++] = c;
155
674k
                }
156
747k
                VECTOR(edge_weights_per_cluster)[c] += VECTOR(*edge_weights)[e];
157
747k
            }
158
844k
        }
159
160
        /* Calculate maximum diff */
161
827k
        best_cluster = current_cluster;
162
827k
        max_diff = VECTOR(edge_weights_per_cluster)[current_cluster];
163
827k
        if (directed) {
164
335k
            max_diff -=
165
335k
                (VECTOR(*vertex_in_weights)[v]  * VECTOR(cluster_out_weights)[current_cluster] +
166
335k
                 VECTOR(*vertex_out_weights)[v] * VECTOR(cluster_in_weights)[current_cluster]) * resolution;
167
491k
        } else {
168
491k
            max_diff -= VECTOR(*vertex_out_weights)[v] * VECTOR(cluster_out_weights)[current_cluster] * resolution;
169
491k
        }
170
2.33M
        for (igraph_integer_t i = 0; i < nb_neigh_clusters; i++) {
171
1.50M
            c = VECTOR(neighbor_clusters)[i];
172
1.50M
            diff = VECTOR(edge_weights_per_cluster)[c];
173
1.50M
            if (directed) {
174
599k
                diff -= (VECTOR(*vertex_out_weights)[v] * VECTOR(cluster_in_weights)[c] +
175
599k
                         VECTOR(*vertex_in_weights)[v]  * VECTOR(cluster_out_weights)[c]) * resolution;
176
903k
            } else {
177
903k
                diff -= VECTOR(*vertex_out_weights)[v] * VECTOR(cluster_out_weights)[c] * resolution;
178
903k
            }
179
            /* Only consider strictly improving moves.
180
             * Note that this is important in considering convergence.
181
             */
182
1.50M
            if (diff > max_diff) {
183
6.86k
                best_cluster = c;
184
6.86k
                max_diff = diff;
185
6.86k
            }
186
1.50M
            VECTOR(edge_weights_per_cluster)[c] = 0.0;
187
1.50M
            IGRAPH_BIT_CLEAR(neighbor_cluster_added, c);
188
1.50M
        }
189
190
        /* Move vertex to best cluster */
191
827k
        VECTOR(cluster_out_weights)[best_cluster] += VECTOR(*vertex_out_weights)[v];
192
827k
        if (directed) {
193
335k
            VECTOR(cluster_in_weights)[best_cluster] += VECTOR(*vertex_in_weights)[v];
194
335k
        }
195
827k
        VECTOR(nb_vertices_per_cluster)[best_cluster]++;
196
827k
        if (best_cluster == igraph_stack_int_top(&empty_clusters)) {
197
805k
            igraph_stack_int_pop(&empty_clusters);
198
805k
        }
199
200
        /* Mark vertex as stable */
201
827k
        IGRAPH_BIT_SET(vertex_is_stable, v);
202
203
        /* Add stable neighbours that are not part of the new cluster to the queue */
204
827k
        if (best_cluster != current_cluster) {
205
6.25k
            *changed = true;
206
6.25k
            VECTOR(*membership)[v] = best_cluster;
207
208
46.7k
            for (igraph_integer_t i = 0; i < degree; i++) {
209
40.4k
                igraph_integer_t e = VECTOR(*edges)[i];
210
40.4k
                igraph_integer_t u = IGRAPH_OTHER(graph, e, v);
211
40.4k
                if (IGRAPH_BIT_TEST(vertex_is_stable, u) && VECTOR(*membership)[u] != best_cluster) {
212
6.52k
                    IGRAPH_CHECK(igraph_dqueue_int_push(&unstable_vertices, u));
213
6.52k
                    IGRAPH_BIT_CLEAR(vertex_is_stable, u);
214
6.52k
                }
215
40.4k
            }
216
6.25k
        }
217
218
827k
        IGRAPH_ALLOW_INTERRUPTION_LIMITED(iter, 1 << 14);
219
827k
    }
220
221
18.7k
    IGRAPH_CHECK(igraph_reindex_membership(membership, NULL, nb_clusters));
222
223
18.7k
    igraph_vector_int_destroy(&neighbor_clusters);
224
18.7k
    igraph_bitset_destroy(&neighbor_cluster_added);
225
18.7k
    igraph_vector_destroy(&edge_weights_per_cluster);
226
18.7k
    igraph_stack_int_destroy(&empty_clusters);
227
18.7k
    igraph_vector_int_destroy(&nb_vertices_per_cluster);
228
18.7k
    if (directed) igraph_vector_destroy(&cluster_in_weights);
229
18.7k
    igraph_vector_destroy(&cluster_out_weights);
230
18.7k
    igraph_vector_int_destroy(&vertex_order);
231
18.7k
    igraph_dqueue_int_destroy(&unstable_vertices);
232
18.7k
    igraph_bitset_destroy(&vertex_is_stable);
233
18.7k
    if (directed) {
234
7.67k
        IGRAPH_FINALLY_CLEAN(10);
235
11.0k
    } else {
236
11.0k
        IGRAPH_FINALLY_CLEAN(9);
237
11.0k
    }
238
239
18.7k
    return IGRAPH_SUCCESS;
240
18.7k
}
241
242
/* Clean a refined membership vector.
243
 *
244
 * This function examines all vertices in \p vertex_subset and updates
245
 * \p refined_membership to ensure that the clusters are numbered consecutively,
246
 * starting from \p nb_refined_clusters. The \p nb_refined_clusters is also
247
 * updated itself. If C is the initial \p nb_refined_clusters and C' the
248
 * resulting \p nb_refined_clusters, then vertices in \p vertex_subset are numbered
249
 * C, C + 1, ..., C' - 1.
250
 */
251
static igraph_error_t leiden_clean_refined_membership(
252
        const igraph_vector_int_t* vertex_subset,
253
        igraph_vector_int_t *refined_membership,
254
233k
        igraph_integer_t* nb_refined_clusters) {
255
256
233k
    const igraph_integer_t n = igraph_vector_int_size(vertex_subset);
257
233k
    igraph_vector_int_t new_cluster;
258
259
233k
    IGRAPH_VECTOR_INT_INIT_FINALLY(&new_cluster, n);
260
261
    /* Clean clusters. We will store the new cluster + 1 so that cluster == 0
262
     * indicates that no membership was assigned yet. */
263
233k
    *nb_refined_clusters += 1;
264
477k
    for (igraph_integer_t i = 0; i < n; i++) {
265
244k
        igraph_integer_t v = VECTOR(*vertex_subset)[i];
266
244k
        igraph_integer_t c = VECTOR(*refined_membership)[v];
267
244k
        if (VECTOR(new_cluster)[c] == 0) {
268
233k
            VECTOR(new_cluster)[c] = *nb_refined_clusters;
269
233k
            *nb_refined_clusters += 1;
270
233k
        }
271
244k
    }
272
273
    /* Assign new cluster */
274
477k
    for (igraph_integer_t i = 0; i < n; i++) {
275
244k
        igraph_integer_t v = VECTOR(*vertex_subset)[i];
276
244k
        igraph_integer_t c = VECTOR(*refined_membership)[v];
277
244k
        VECTOR(*refined_membership)[v] = VECTOR(new_cluster)[c] - 1;
278
244k
    }
279
    /* We used the cluster + 1, so correct */
280
233k
    *nb_refined_clusters -= 1;
281
282
233k
    igraph_vector_int_destroy(&new_cluster);
283
233k
    IGRAPH_FINALLY_CLEAN(1);
284
285
233k
    return IGRAPH_SUCCESS;
286
233k
}
287
288
/* Merge vertices for a subset of the vertices. This is used to refine a partition.
289
 *
290
 * The vertices included in \p vertex_subset are assumed to be the vertices i for which
291
 * membership[i] = cluster_subset.
292
 *
293
 * All vertices in \p vertex_subset are initialized to a singleton partition in \p
294
 * refined_membership. Only singleton clusters can be merged if they are
295
 * sufficiently well connected to the current subgraph induced by \p
296
 * vertex_subset.
297
 *
298
 * We only examine each vertex once. Instead of greedily choosing the maximum
299
 * possible cluster to merge with, the cluster is chosen randomly among all
300
 * possibilities that do not decrease the quality of the partition. The
301
 * probability of choosing a certain cluster is proportional to exp(diff/beta).
302
 * For beta to 0 this converges to selecting a cluster with the maximum
303
 * improvement. For beta to infinity this converges to a uniform distribution
304
 * among all eligible clusters.
305
 *
306
 * The \p refined_membership is updated for vertex in \p vertex_subset. The number
307
 * of refined clusters, \p nb_refined_clusters is used to set the actual refined
308
 * cluster membership and is updated after this routine. Within each cluster
309
 * (i.e. for a given \p vertex_subset), the refined membership is initially simply
310
 * set to 0, ..., n - 1 (for n vertices in \p vertex_subset). However, for each \p
311
 * vertex_subset the refined membership should of course be unique. Hence, after
312
 * merging, the refined membership starts with \p nb_refined_clusters, which is
313
 * also updated to ensure that the resulting \p nb_refined_clusters counts all
314
 * refined clusters that have already been processed. See
315
 * leiden_clean_refined_membership for more information about
316
 * this aspect.
317
 */
318
static igraph_error_t leiden_merge_vertices(
319
        const igraph_t *graph,
320
        const igraph_inclist_t *edges_per_vertex,
321
        const igraph_vector_t *edge_weights,
322
        const igraph_vector_t *vertex_out_weights,
323
        const igraph_vector_t *vertex_in_weights,
324
        const igraph_vector_int_t *vertex_subset,
325
        const igraph_vector_int_t *membership,
326
        const igraph_integer_t cluster_subset,
327
        const igraph_real_t resolution,
328
        const igraph_real_t beta,
329
        igraph_integer_t *nb_refined_clusters,
330
233k
        igraph_vector_int_t *refined_membership) {
331
332
233k
    const igraph_bool_t directed = (vertex_in_weights != NULL);
333
233k
    igraph_vector_int_t vertex_order;
334
233k
    igraph_bitset_t non_singleton_cluster, neighbor_cluster_added;
335
233k
    igraph_real_t max_diff, total_cum_trans_diff, diff;
336
233k
    igraph_real_t total_vertex_out_weight = 0.0, total_vertex_in_weight = 0.0;
337
233k
    const igraph_integer_t n = igraph_vector_int_size(vertex_subset);
338
233k
    igraph_vector_t cluster_out_weights, cluster_in_weights;
339
233k
    igraph_vector_t cum_trans_diff, edge_weights_per_cluster, external_edge_weight_per_cluster_in_subset;
340
233k
    igraph_vector_int_t neighbor_clusters;
341
233k
    igraph_vector_int_t *edges, nb_vertices_per_cluster;
342
233k
    igraph_integer_t degree, nb_neigh_clusters;
343
344
    /* Initialize cluster weights */
345
233k
    IGRAPH_VECTOR_INIT_FINALLY(&cluster_out_weights, n);
346
233k
    if (directed) {
347
40.8k
        IGRAPH_VECTOR_INIT_FINALLY(&cluster_in_weights, n);
348
40.8k
    }
349
350
    /* Initialize number of vertices per cluster */
351
233k
    IGRAPH_VECTOR_INT_INIT_FINALLY(&nb_vertices_per_cluster, n);
352
353
    /* Initialize external edge weight per cluster in subset */
354
233k
    IGRAPH_VECTOR_INIT_FINALLY(&external_edge_weight_per_cluster_in_subset, n);
355
356
    /* Initialize administration for a singleton partition */
357
477k
    for (igraph_integer_t i = 0; i < n; i++) {
358
244k
        igraph_integer_t v = VECTOR(*vertex_subset)[i];
359
244k
        VECTOR(*refined_membership)[v] = i;
360
244k
        VECTOR(cluster_out_weights)[i] += VECTOR(*vertex_out_weights)[v];
361
244k
        total_vertex_out_weight += VECTOR(*vertex_out_weights)[v];
362
244k
        if (directed) {
363
42.8k
            VECTOR(cluster_in_weights)[i] += VECTOR(*vertex_in_weights)[v];
364
42.8k
            total_vertex_in_weight += VECTOR(*vertex_in_weights)[v];
365
42.8k
        }
366
244k
        VECTOR(nb_vertices_per_cluster)[i] += 1;
367
368
        /* Find out neighboring clusters */
369
244k
        edges = igraph_inclist_get(edges_per_vertex, v);
370
244k
        degree = igraph_vector_int_size(edges);
371
546k
        for (igraph_integer_t j = 0; j < degree; j++) {
372
302k
            igraph_integer_t e = VECTOR(*edges)[j];
373
302k
            igraph_integer_t u = IGRAPH_OTHER(graph, e, v);
374
302k
            if (u != v && VECTOR(*membership)[u] == cluster_subset) {
375
53.2k
                VECTOR(external_edge_weight_per_cluster_in_subset)[i] += VECTOR(*edge_weights)[e];
376
53.2k
            }
377
302k
        }
378
244k
    }
379
380
    /* Shuffle vertices */
381
233k
    IGRAPH_CHECK(igraph_vector_int_init_copy(&vertex_order, vertex_subset));
382
233k
    IGRAPH_FINALLY(igraph_vector_int_destroy, &vertex_order);
383
233k
    igraph_vector_int_shuffle(&vertex_order);
384
385
    /* Initialize non singleton clusters */
386
233k
    IGRAPH_BITSET_INIT_FINALLY(&non_singleton_cluster, n);
387
388
    /* Initialize vectors to be used in calculating differences */
389
233k
    IGRAPH_VECTOR_INIT_FINALLY(&edge_weights_per_cluster, n);
390
391
    /* Initialize neighboring cluster */
392
233k
    IGRAPH_BITSET_INIT_FINALLY(&neighbor_cluster_added, n);
393
233k
    IGRAPH_VECTOR_INT_INIT_FINALLY(&neighbor_clusters, n);
394
395
    /* Initialize cumulative transformed difference */
396
233k
    IGRAPH_VECTOR_INIT_FINALLY(&cum_trans_diff, n);
397
398
477k
    for (igraph_integer_t i = 0; i < n; i++) {
399
244k
        igraph_integer_t v = VECTOR(vertex_order)[i];
400
244k
        igraph_integer_t chosen_cluster, best_cluster, current_cluster = VECTOR(*refined_membership)[v];
401
244k
        igraph_real_t vertex_weight_prod;
402
403
244k
        if (directed) {
404
42.8k
            vertex_weight_prod =
405
42.8k
                VECTOR(cluster_out_weights)[current_cluster] * (total_vertex_in_weight - VECTOR(cluster_in_weights)[current_cluster]) +
406
42.8k
                VECTOR(cluster_in_weights)[current_cluster] * (total_vertex_out_weight - VECTOR(cluster_out_weights)[current_cluster]);
407
201k
        } else {
408
201k
            vertex_weight_prod = VECTOR(cluster_out_weights)[current_cluster] * (total_vertex_out_weight - VECTOR(cluster_out_weights)[current_cluster]);
409
201k
        }
410
411
244k
        if (!IGRAPH_BIT_TEST(non_singleton_cluster, current_cluster) &&
412
244k
            (VECTOR(external_edge_weight_per_cluster_in_subset)[current_cluster] >=
413
234k
             vertex_weight_prod * resolution)) {
414
            /* Remove vertex from current cluster, which is then a singleton by
415
             * definition. */
416
234k
            VECTOR(cluster_out_weights)[current_cluster] = 0.0;
417
234k
            if (directed) {
418
41.2k
                VECTOR(cluster_in_weights)[current_cluster] = 0.0;
419
41.2k
            }
420
234k
            VECTOR(nb_vertices_per_cluster)[current_cluster] = 0;
421
422
            /* Find out neighboring clusters */
423
234k
            edges = igraph_inclist_get(edges_per_vertex, v);
424
234k
            degree = igraph_vector_int_size(edges);
425
426
            /* Also add current cluster to ensure it can be chosen. */
427
234k
            VECTOR(neighbor_clusters)[0] = current_cluster;
428
234k
            IGRAPH_BIT_SET(neighbor_cluster_added, current_cluster);
429
234k
            nb_neigh_clusters = 1;
430
474k
            for (igraph_integer_t j = 0; j < degree; j++) {
431
239k
                igraph_integer_t e = VECTOR(*edges)[j];
432
239k
                igraph_integer_t u = IGRAPH_OTHER(graph, e, v);
433
239k
                if (u != v && VECTOR(*membership)[u] == cluster_subset) {
434
29.1k
                    igraph_integer_t c = VECTOR(*refined_membership)[u];
435
29.1k
                    if (!IGRAPH_BIT_TEST(neighbor_cluster_added, c)) {
436
13.4k
                        IGRAPH_BIT_SET(neighbor_cluster_added, c);
437
13.4k
                        VECTOR(neighbor_clusters)[nb_neigh_clusters++] = c;
438
13.4k
                    }
439
29.1k
                    VECTOR(edge_weights_per_cluster)[c] += VECTOR(*edge_weights)[e];
440
29.1k
                }
441
239k
            }
442
443
            /* Calculate diffs */
444
234k
            best_cluster = current_cluster;
445
234k
            max_diff = 0.0;
446
234k
            total_cum_trans_diff = 0.0;
447
483k
            for (igraph_integer_t j = 0; j < nb_neigh_clusters; j++) {
448
248k
                igraph_integer_t c = VECTOR(neighbor_clusters)[j];
449
450
248k
                if (directed) {
451
43.7k
                    vertex_weight_prod =
452
43.7k
                        VECTOR(cluster_out_weights)[c] * (total_vertex_in_weight - VECTOR(cluster_in_weights)[c]) +
453
43.7k
                        VECTOR(cluster_in_weights)[c] * (total_vertex_out_weight - VECTOR(cluster_out_weights)[c]);
454
204k
                } else {
455
204k
                    vertex_weight_prod = VECTOR(cluster_out_weights)[c] * (total_vertex_out_weight - VECTOR(cluster_out_weights)[c]);
456
204k
                }
457
458
248k
                if (VECTOR(external_edge_weight_per_cluster_in_subset)[c] >= vertex_weight_prod * resolution) {
459
248k
                    diff = VECTOR(edge_weights_per_cluster)[c];
460
248k
                    if (directed) {
461
43.7k
                        diff -= (VECTOR(*vertex_out_weights)[v] * VECTOR(cluster_in_weights)[c] +
462
43.7k
                                 VECTOR(*vertex_in_weights)[v] * VECTOR(cluster_out_weights)[c]) * resolution;
463
204k
                    } else {
464
204k
                        diff -= VECTOR(*vertex_out_weights)[v] * VECTOR(cluster_out_weights)[c] * resolution;
465
204k
                    }
466
467
468
248k
                    if (diff > max_diff) {
469
12.1k
                        best_cluster = c;
470
12.1k
                        max_diff = diff;
471
12.1k
                    }
472
473
                    /* Calculate the transformed difference for sampling */
474
248k
                    if (diff >= 0) {
475
247k
                        total_cum_trans_diff += exp(diff / beta);
476
247k
                    }
477
478
248k
                }
479
480
248k
                VECTOR(cum_trans_diff)[j] = total_cum_trans_diff;
481
248k
                VECTOR(edge_weights_per_cluster)[c] = 0.0;
482
248k
                IGRAPH_BIT_CLEAR(neighbor_cluster_added, c);
483
248k
            }
484
485
            /* Determine the neighboring cluster to which the currently selected vertex
486
             * will be moved.
487
             */
488
234k
            if (total_cum_trans_diff < IGRAPH_INFINITY) {
489
234k
                igraph_real_t r = RNG_UNIF(0, total_cum_trans_diff);
490
234k
                igraph_integer_t chosen_idx;
491
234k
                igraph_vector_binsearch_slice(&cum_trans_diff, r, &chosen_idx, 0, nb_neigh_clusters);
492
234k
                chosen_cluster = VECTOR(neighbor_clusters)[chosen_idx];
493
234k
            } else {
494
822
                chosen_cluster = best_cluster;
495
822
            }
496
497
            /* Move vertex to randomly chosen cluster */
498
234k
            VECTOR(cluster_out_weights)[chosen_cluster] += VECTOR(*vertex_out_weights)[v];
499
234k
            if (directed) {
500
41.2k
                VECTOR(cluster_in_weights)[chosen_cluster] += VECTOR(*vertex_in_weights)[v];
501
41.2k
            }
502
234k
            VECTOR(nb_vertices_per_cluster)[chosen_cluster]++;
503
504
474k
            for (igraph_integer_t j = 0; j < degree; j++) {
505
239k
                igraph_integer_t e = VECTOR(*edges)[j];
506
239k
                igraph_integer_t u = IGRAPH_OTHER(graph, e, v);
507
239k
                if (VECTOR(*membership)[u] == cluster_subset) {
508
36.7k
                    if (VECTOR(*refined_membership)[u] == chosen_cluster) {
509
26.0k
                        VECTOR(external_edge_weight_per_cluster_in_subset)[chosen_cluster] -= VECTOR(*edge_weights)[e];
510
26.0k
                    } else {
511
10.6k
                        VECTOR(external_edge_weight_per_cluster_in_subset)[chosen_cluster] += VECTOR(*edge_weights)[e];
512
10.6k
                    }
513
36.7k
                }
514
239k
            }
515
516
            /* Set cluster  */
517
234k
            if (chosen_cluster != current_cluster) {
518
10.5k
                VECTOR(*refined_membership)[v] = chosen_cluster;
519
520
10.5k
                IGRAPH_BIT_SET(non_singleton_cluster, chosen_cluster);
521
10.5k
            }
522
234k
        } /* end if singleton and may be merged */
523
244k
    }
524
525
233k
    IGRAPH_CHECK(leiden_clean_refined_membership(vertex_subset, refined_membership, nb_refined_clusters));
526
527
233k
    igraph_vector_destroy(&cum_trans_diff);
528
233k
    igraph_vector_int_destroy(&neighbor_clusters);
529
233k
    igraph_bitset_destroy(&neighbor_cluster_added);
530
233k
    igraph_vector_destroy(&edge_weights_per_cluster);
531
233k
    igraph_bitset_destroy(&non_singleton_cluster);
532
233k
    igraph_vector_int_destroy(&vertex_order);
533
233k
    igraph_vector_destroy(&external_edge_weight_per_cluster_in_subset);
534
233k
    igraph_vector_int_destroy(&nb_vertices_per_cluster);
535
233k
    if (directed) igraph_vector_destroy(&cluster_in_weights);
536
233k
    igraph_vector_destroy(&cluster_out_weights);
537
233k
    if (directed) {
538
40.8k
        IGRAPH_FINALLY_CLEAN(10);
539
192k
    } else {
540
192k
        IGRAPH_FINALLY_CLEAN(9);
541
192k
    }
542
543
233k
    return IGRAPH_SUCCESS;
544
233k
}
545
546
/* Create clusters out of a membership vector.
547
 *
548
 * It is assumed that the incoming list of integer vectors is already sized
549
 * appropriately (i.e. it has at least as many items as the number of clusters
550
 * in the membership vector), and that each item in the list of integer vectors
551
 * is empty.
552
 */
553
static igraph_error_t leiden_get_clusters(
554
        const igraph_vector_int_t *membership,
555
11.0k
        igraph_vector_int_list_t *clusters) {
556
557
11.0k
    const igraph_integer_t n = igraph_vector_int_size(membership);
558
559
499k
    for (igraph_integer_t i = 0; i < n; i++) {
560
        /* Get cluster for vertex i */
561
488k
        igraph_vector_int_t *cluster = igraph_vector_int_list_get_ptr(clusters, VECTOR(*membership)[i]);
562
563
        /* Add vertex i to cluster vector */
564
488k
        IGRAPH_CHECK(igraph_vector_int_push_back(cluster, i));
565
488k
    }
566
567
11.0k
    return IGRAPH_SUCCESS;
568
11.0k
}
569
570
/* Aggregate the graph based on the \p refined membership while setting the
571
 * membership of each aggregated vertex according to the \p membership.
572
 *
573
 * Technically speaking we have that
574
 * aggregated_membership[refined_membership[v]] = membership[v] for each vertex v.
575
 *
576
 * The new aggregated graph is returned in \p aggregated_graph. This graph
577
 * object should not yet be initialized, igraph_create() is called on it, and
578
 * responsibility for destroying the object lies with the calling method
579
 *
580
 * The remaining results, aggregated_edge_weights, aggregate_vertex_weights and
581
 * aggregated_membership are all expected to be initialized.
582
 *
583
 */
584
static igraph_error_t leiden_aggregate(
585
        const igraph_t *graph,
586
        const igraph_inclist_t *edges_per_vertex,
587
        const igraph_vector_t *edge_weights,
588
        const igraph_vector_t *vertex_out_weights,
589
        const igraph_vector_t *vertex_in_weights,
590
        const igraph_vector_int_t *membership,
591
        const igraph_vector_int_t *refined_membership,
592
        const igraph_integer_t nb_refined_clusters,
593
        igraph_t *aggregated_graph,
594
        igraph_vector_t *aggregated_edge_weights,
595
        igraph_vector_t *aggregated_vertex_out_weights,
596
        igraph_vector_t *aggregated_vertex_in_weights,
597
5.50k
        igraph_vector_int_t *aggregated_membership) {
598
599
5.50k
    const igraph_bool_t directed = (vertex_in_weights != NULL);
600
5.50k
    igraph_vector_int_t aggregated_edges;
601
5.50k
    igraph_vector_t edge_weight_to_cluster;
602
5.50k
    igraph_vector_int_list_t refined_clusters;
603
5.50k
    igraph_vector_int_t *incident_edges;
604
5.50k
    igraph_vector_int_t neighbor_clusters;
605
5.50k
    igraph_bitset_t neighbor_cluster_added;
606
5.50k
    igraph_integer_t c, degree, nb_neigh_clusters;
607
608
    /* Get refined clusters */
609
5.50k
    IGRAPH_VECTOR_INT_LIST_INIT_FINALLY(&refined_clusters, nb_refined_clusters);
610
5.50k
    IGRAPH_CHECK(leiden_get_clusters(refined_membership, &refined_clusters));
611
612
    /* Initialize new edges */
613
5.50k
    IGRAPH_VECTOR_INT_INIT_FINALLY(&aggregated_edges, 0);
614
615
    /* We clear the aggregated edge weights, we will push each new edge weight */
616
5.50k
    igraph_vector_clear(aggregated_edge_weights);
617
    /* Simply resize the aggregated vertex weights and membership, they can be set directly */
618
5.50k
    IGRAPH_CHECK(igraph_vector_resize(aggregated_vertex_out_weights, nb_refined_clusters));
619
5.50k
    if (directed) {
620
1.03k
        IGRAPH_CHECK(igraph_vector_resize(aggregated_vertex_in_weights, nb_refined_clusters));
621
1.03k
    }
622
5.50k
    IGRAPH_CHECK(igraph_vector_int_resize(aggregated_membership, nb_refined_clusters));
623
624
5.50k
    IGRAPH_VECTOR_INIT_FINALLY(&edge_weight_to_cluster, nb_refined_clusters);
625
626
    /* Initialize neighboring cluster */
627
5.50k
    IGRAPH_BITSET_INIT_FINALLY(&neighbor_cluster_added, nb_refined_clusters);
628
5.50k
    IGRAPH_VECTOR_INT_INIT_FINALLY(&neighbor_clusters, nb_refined_clusters);
629
630
    /* Check per cluster */
631
239k
    for (c = 0; c < nb_refined_clusters; c++) {
632
233k
        igraph_vector_int_t* refined_cluster = igraph_vector_int_list_get_ptr(&refined_clusters, c);
633
233k
        igraph_integer_t n_c = igraph_vector_int_size(refined_cluster);
634
233k
        igraph_integer_t v = -1;
635
636
        /* Calculate the total edge weight to other clusters */
637
233k
        VECTOR(*aggregated_vertex_out_weights)[c] = 0.0;
638
233k
        if (directed) {
639
40.9k
            VECTOR(*aggregated_vertex_in_weights)[c] = 0.0;
640
40.9k
        }
641
233k
        nb_neigh_clusters = 0;
642
478k
        for (igraph_integer_t i = 0; i < n_c; i++) {
643
244k
            v = VECTOR(*refined_cluster)[i];
644
244k
            incident_edges = igraph_inclist_get(edges_per_vertex, v);
645
244k
            degree = igraph_vector_int_size(incident_edges);
646
647
546k
            for (igraph_integer_t j = 0; j < degree; j++) {
648
302k
                igraph_integer_t e = VECTOR(*incident_edges)[j];
649
302k
                igraph_integer_t u = IGRAPH_OTHER(graph, e, v);
650
302k
                igraph_integer_t c2 = VECTOR(*refined_membership)[u];
651
652
302k
                if (c2 > c) {
653
120k
                    if (!IGRAPH_BIT_TEST(neighbor_cluster_added, c2)) {
654
105k
                        IGRAPH_BIT_SET(neighbor_cluster_added, c2);
655
105k
                        VECTOR(neighbor_clusters)[nb_neigh_clusters++] = c2;
656
105k
                    }
657
120k
                    VECTOR(edge_weight_to_cluster)[c2] += VECTOR(*edge_weights)[e];
658
120k
                }
659
302k
            }
660
661
244k
            VECTOR(*aggregated_vertex_out_weights)[c] += VECTOR(*vertex_out_weights)[v];
662
244k
            if (directed) {
663
42.8k
                VECTOR(*aggregated_vertex_in_weights)[c] += VECTOR(*vertex_in_weights)[v];
664
42.8k
            }
665
244k
        }
666
667
        /* Add actual edges from this cluster to the other clusters */
668
339k
        for (igraph_integer_t i = 0; i < nb_neigh_clusters; i++) {
669
105k
            igraph_integer_t c2 = VECTOR(neighbor_clusters)[i];
670
671
            /* Add edge */
672
105k
            IGRAPH_CHECK(igraph_vector_int_push_back(&aggregated_edges, c));
673
105k
            IGRAPH_CHECK(igraph_vector_int_push_back(&aggregated_edges, c2));
674
675
            /* Add edge weight */
676
105k
            IGRAPH_CHECK(igraph_vector_push_back(aggregated_edge_weights, VECTOR(edge_weight_to_cluster)[c2]));
677
678
105k
            VECTOR(edge_weight_to_cluster)[c2] = 0.0;
679
105k
            IGRAPH_BIT_CLEAR(neighbor_cluster_added, c2);
680
105k
        }
681
682
233k
        VECTOR(*aggregated_membership)[c] = VECTOR(*membership)[v];
683
684
233k
    }
685
686
5.50k
    igraph_vector_int_destroy(&neighbor_clusters);
687
5.50k
    igraph_bitset_destroy(&neighbor_cluster_added);
688
5.50k
    igraph_vector_destroy(&edge_weight_to_cluster);
689
5.50k
    igraph_vector_int_list_destroy(&refined_clusters);
690
5.50k
    IGRAPH_FINALLY_CLEAN(4);
691
692
5.50k
    igraph_destroy(aggregated_graph);
693
5.50k
    IGRAPH_CHECK(igraph_create(aggregated_graph, &aggregated_edges, nb_refined_clusters,
694
5.50k
                               directed));
695
696
5.50k
    igraph_vector_int_destroy(&aggregated_edges);
697
5.50k
    IGRAPH_FINALLY_CLEAN(1);
698
699
5.50k
    return IGRAPH_SUCCESS;
700
5.50k
}
701
702
/* Calculate the quality of the partition.
703
 *
704
 * The quality is defined as
705
 *
706
 * 1 / 2m sum_ij (A_ij - gamma n_i n_j) d(s_i, s_j)
707
 *
708
 * for undirected graphs and as
709
 *
710
 * 1 / m sum_ij (A_ij - gamma n^out_i n^in_j) d(s_i, s_j)
711
 *
712
 * where m is the total edge weight, A_ij is the weight of edge (i, j), gamma is
713
 * the so-called resolution parameter, n_i is the vertex weight of vertex i, s_i is
714
 * the cluster of vertex i and d(x, y) = 1 if and only if x = y and 0 otherwise.
715
 *
716
 * Note that by setting n_i = k_i the degree of vertex i and dividing gamma by 2m,
717
 * we effectively optimize modularity. By setting n_i = 1 we optimize the
718
 * Constant Potts Model.
719
 *
720
 * This can be represented as a sum over clusters as
721
 *
722
 * 1 / 2m sum_c (e_c - gamma N_c^2)
723
 *
724
 * where e_c = sum_ij A_ij d(s_i, c)d(s_j, c) is the internal edge weight
725
 * in cluster c (or twice this value if undirected) and
726
 * N_c = sum_i n_i d(s_i, c) is the sum of the vertex weights inside cluster c.
727
 * This is how the quality is calculated in practice.
728
 */
729
static igraph_error_t leiden_quality(
730
        const igraph_t *graph,
731
        const igraph_vector_t *edge_weights,
732
        const igraph_vector_t *vertex_out_weights,
733
        const igraph_vector_t *vertex_in_weights,
734
        const igraph_vector_int_t *membership,
735
        const igraph_integer_t nb_clusters,
736
        const igraph_real_t resolution,
737
13.2k
        igraph_real_t *quality) {
738
739
13.2k
    const igraph_integer_t vcount = igraph_vcount(graph);
740
13.2k
    const igraph_integer_t ecount = igraph_ecount(graph);
741
13.2k
    const igraph_bool_t directed = (vertex_in_weights != NULL);
742
13.2k
    const igraph_real_t directed_multiplier = directed ? 1.0 : 2.0;
743
13.2k
    igraph_vector_t cluster_out_weights, cluster_in_weights;
744
13.2k
    igraph_real_t total_edge_weight = 0.0;
745
746
13.2k
    *quality = 0.0;
747
748
315k
    for (igraph_integer_t e=0; e < ecount; e++) {
749
302k
        igraph_integer_t from = IGRAPH_FROM(graph, e);
750
302k
        igraph_integer_t to = IGRAPH_TO(graph, e);
751
302k
        total_edge_weight += VECTOR(*edge_weights)[e];
752
753
        /* We add the internal edge weights. */
754
302k
        if (VECTOR(*membership)[from] == VECTOR(*membership)[to]) {
755
74.1k
            *quality += directed_multiplier * VECTOR(*edge_weights)[e];
756
74.1k
        }
757
302k
    }
758
759
    /* Initialize and compute cluster weights. */
760
761
13.2k
    IGRAPH_VECTOR_INIT_FINALLY(&cluster_out_weights, vcount);
762
13.2k
    if (directed) {
763
6.63k
        IGRAPH_VECTOR_INIT_FINALLY(&cluster_in_weights, vcount);
764
6.63k
    }
765
766
600k
    for (igraph_integer_t i = 0; i < vcount; i++) {
767
587k
        igraph_integer_t c = VECTOR(*membership)[i];
768
587k
        VECTOR(cluster_out_weights)[c] += VECTOR(*vertex_out_weights)[i];
769
587k
        if (directed) {
770
293k
            VECTOR(cluster_in_weights)[c] += VECTOR(*vertex_in_weights)[i];
771
293k
        }
772
587k
    }
773
774
    /* We subtract gamma * N^out_c * N^in_c */
775
776
590k
    for (igraph_integer_t c = 0; c < nb_clusters; c++) {
777
577k
        if (directed) {
778
291k
            *quality -= resolution * VECTOR(cluster_out_weights)[c] * VECTOR(cluster_in_weights)[c];
779
291k
        } else {
780
285k
            *quality -= resolution * VECTOR(cluster_out_weights)[c] * VECTOR(cluster_out_weights)[c];
781
285k
        }
782
577k
    }
783
784
13.2k
    if (directed) {
785
6.63k
        igraph_vector_destroy(&cluster_in_weights);
786
6.63k
        IGRAPH_FINALLY_CLEAN(1);
787
6.63k
    }
788
13.2k
    igraph_vector_destroy(&cluster_out_weights);
789
13.2k
    IGRAPH_FINALLY_CLEAN(1);
790
791
    /* We normalise by m or 2m depending on directedness */
792
13.2k
    *quality /= (directed_multiplier * total_edge_weight);
793
794
13.2k
    return IGRAPH_SUCCESS;
795
13.2k
}
796
797
/* This is the core of the Leiden algorithm and relies on subroutines to
798
 * perform the three different phases: (1) local moving of vertices, (2)
799
 * refinement of the partition and (3) aggregation of the network based on the
800
 * refined partition, using the non-refined partition to create an initial
801
 * partition for the aggregate network.
802
 */
803
static igraph_error_t community_leiden(
804
        const igraph_t *graph,
805
        igraph_vector_t *edge_weights,
806
        igraph_vector_t *vertex_out_weights,
807
        igraph_vector_t *vertex_in_weights,
808
        igraph_real_t resolution,
809
        igraph_real_t beta,
810
        igraph_vector_int_t *membership,
811
        igraph_integer_t *nb_clusters,
812
        igraph_real_t *quality,
813
13.2k
        igraph_bool_t *changed) {
814
815
13.2k
    const igraph_integer_t n = igraph_vcount(graph);
816
13.2k
    const igraph_bool_t directed = (vertex_in_weights != NULL);
817
13.2k
    igraph_integer_t nb_refined_clusters;
818
13.2k
    igraph_integer_t i, c;
819
13.2k
    igraph_t aggregated_graph, *i_graph;
820
13.2k
    igraph_vector_t aggregated_edge_weights;
821
13.2k
    igraph_vector_t aggregated_vertex_out_weights, aggregated_vertex_in_weights;
822
13.2k
    igraph_vector_int_t aggregated_membership;
823
13.2k
    igraph_vector_t *i_edge_weights;
824
13.2k
    igraph_vector_t *i_vertex_out_weights, *i_vertex_in_weights;
825
13.2k
    igraph_vector_int_t *i_membership;
826
13.2k
    igraph_vector_t tmp_edge_weights, tmp_vertex_out_weights, tmp_vertex_in_weights;
827
13.2k
    igraph_vector_int_t tmp_membership;
828
13.2k
    igraph_vector_int_t refined_membership;
829
13.2k
    igraph_vector_int_t aggregate_vertex;
830
13.2k
    igraph_vector_int_list_t clusters;
831
13.2k
    igraph_inclist_t edges_per_vertex;
832
13.2k
    igraph_bool_t continue_clustering;
833
13.2k
    igraph_integer_t level = 0;
834
835
    /* Initialize temporary weights and membership to be used in aggregation */
836
13.2k
    IGRAPH_VECTOR_INIT_FINALLY(&tmp_edge_weights, 0);
837
13.2k
    IGRAPH_VECTOR_INIT_FINALLY(&tmp_vertex_out_weights, 0);
838
13.2k
    if (directed) {
839
6.63k
        IGRAPH_VECTOR_INIT_FINALLY(&tmp_vertex_in_weights, 0);
840
6.63k
    }
841
13.2k
    IGRAPH_VECTOR_INT_INIT_FINALLY(&tmp_membership, 0);
842
843
    /* Initialize clusters */
844
13.2k
    IGRAPH_VECTOR_INT_LIST_INIT_FINALLY(&clusters, n);
845
846
    /* Initialize aggregate vertices, which initially is identical to simply the
847
     * vertices in the graph. */
848
13.2k
    IGRAPH_CHECK(igraph_vector_int_init_range(&aggregate_vertex, 0, n));
849
13.2k
    IGRAPH_FINALLY(igraph_vector_int_destroy, &aggregate_vertex);
850
851
    /* Initialize refined membership */
852
13.2k
    IGRAPH_VECTOR_INT_INIT_FINALLY(&refined_membership, 0);
853
854
    /* Initialize aggregated graph */
855
13.2k
    IGRAPH_CHECK(igraph_empty(&aggregated_graph, 0, directed));
856
13.2k
    IGRAPH_FINALLY(igraph_destroy, &aggregated_graph);
857
858
    /* Initialize aggregated edge weights */
859
13.2k
    IGRAPH_VECTOR_INIT_FINALLY(&aggregated_edge_weights, 0);
860
861
    /* Initialize aggregated vertex weights */
862
13.2k
    IGRAPH_VECTOR_INIT_FINALLY(&aggregated_vertex_out_weights, 0);
863
13.2k
    if (directed) {
864
6.63k
        IGRAPH_VECTOR_INIT_FINALLY(&aggregated_vertex_in_weights, 0);
865
6.63k
    }
866
867
    /* Initialize aggregated membership */
868
13.2k
    IGRAPH_VECTOR_INT_INIT_FINALLY(&aggregated_membership, 0);
869
870
    /* Set actual graph, weights and membership to be used. */
871
13.2k
    i_graph = (igraph_t*)graph;
872
13.2k
    i_edge_weights = edge_weights;
873
13.2k
    i_vertex_out_weights = vertex_out_weights;
874
13.2k
    i_vertex_in_weights = directed ? vertex_in_weights : NULL;
875
13.2k
    i_membership = membership;
876
877
    /* Clean membership: ensure that cluster indices are 0 <= c < n. */
878
13.2k
    IGRAPH_CHECK(igraph_reindex_membership(i_membership, NULL, nb_clusters));
879
880
    /* We start out with no changes, whenever a vertex is moved, this will be set to true. */
881
13.2k
    *changed = false;
882
18.7k
    do {
883
884
        /* Get incidence list for fast iteration */
885
18.7k
        IGRAPH_CHECK(igraph_inclist_init( i_graph, &edges_per_vertex, IGRAPH_ALL, IGRAPH_LOOPS_TWICE));
886
18.7k
        IGRAPH_FINALLY(igraph_inclist_destroy, &edges_per_vertex);
887
888
        /* Move around the vertices in order to increase the quality */
889
18.7k
        IGRAPH_CHECK(leiden_fastmove_vertices(i_graph,
890
18.7k
                                              &edges_per_vertex,
891
18.7k
                                              i_edge_weights,
892
18.7k
                                              i_vertex_out_weights, i_vertex_in_weights,
893
18.7k
                                              resolution,
894
18.7k
                                              nb_clusters,
895
18.7k
                                              i_membership,
896
18.7k
                                              changed));
897
898
        /* We only continue clustering if not all clusters are represented by a
899
         * single vertex yet
900
         */
901
18.7k
        continue_clustering = (*nb_clusters < igraph_vcount(i_graph));
902
903
18.7k
        if (continue_clustering) {
904
            /* Set original membership */
905
5.50k
            if (level > 0) {
906
29.4k
                for (i = 0; i < n; i++) {
907
28.8k
                    igraph_integer_t v_aggregate = VECTOR(aggregate_vertex)[i];
908
28.8k
                    VECTOR(*membership)[i] = VECTOR(*i_membership)[v_aggregate];
909
28.8k
                }
910
561
            }
911
912
            /* Get vertex sets for each cluster. */
913
5.50k
            IGRAPH_CHECK(leiden_get_clusters(i_membership, &clusters));
914
915
            /* Ensure refined membership is correct size */
916
5.50k
            IGRAPH_CHECK(igraph_vector_int_resize(&refined_membership, igraph_vcount(i_graph)));
917
918
            /* Refine each cluster */
919
5.50k
            nb_refined_clusters = 0;
920
238k
            for (c = 0; c < *nb_clusters; c++) {
921
233k
                igraph_vector_int_t* cluster = igraph_vector_int_list_get_ptr(&clusters, c);
922
233k
                IGRAPH_CHECK(leiden_merge_vertices(i_graph,
923
233k
                                                   &edges_per_vertex,
924
233k
                                                   i_edge_weights,
925
233k
                                                   i_vertex_out_weights, i_vertex_in_weights,
926
233k
                                                   cluster, i_membership, c,
927
233k
                                                   resolution, beta,
928
233k
                                                   &nb_refined_clusters, &refined_membership));
929
                /* Empty cluster */
930
233k
                igraph_vector_int_clear(cluster);
931
233k
            }
932
933
            /* If refinement didn't aggregate anything, we aggregate on the basis of
934
             * the actual clustering */
935
5.50k
            if (nb_refined_clusters >= igraph_vcount(i_graph)) {
936
78
                IGRAPH_CHECK(igraph_vector_int_update(&refined_membership, i_membership));
937
78
                nb_refined_clusters = *nb_clusters;
938
78
            }
939
940
            /* Keep track of aggregate vertex. */
941
252k
            for (i = 0; i < n; i++) {
942
                /* Current aggregate vertex */
943
246k
                igraph_integer_t v_aggregate = VECTOR(aggregate_vertex)[i];
944
                /* New aggregate vertex */
945
246k
                VECTOR(aggregate_vertex)[i] = VECTOR(refined_membership)[v_aggregate];
946
246k
            }
947
948
5.50k
            IGRAPH_CHECK(leiden_aggregate(
949
5.50k
                i_graph,
950
5.50k
                &edges_per_vertex,
951
5.50k
                i_edge_weights,
952
5.50k
                i_vertex_out_weights, i_vertex_in_weights,
953
5.50k
                i_membership, &refined_membership, nb_refined_clusters,
954
5.50k
                &aggregated_graph,
955
5.50k
                &tmp_edge_weights,
956
5.50k
                &tmp_vertex_out_weights, directed ? &tmp_vertex_in_weights : NULL,
957
5.50k
                &tmp_membership));
958
959
            /* On the lowest level, the actual graph and vertex and edge weights and
960
             * membership are used. On higher levels, we will use the aggregated graph
961
             * and associated vectors.
962
             */
963
5.50k
            if (level == 0) {
964
                /* Set actual graph, weights and membership to be used. */
965
4.94k
                i_graph = &aggregated_graph;
966
4.94k
                i_edge_weights = &aggregated_edge_weights;
967
4.94k
                i_vertex_out_weights = &aggregated_vertex_out_weights;
968
4.94k
                if (directed) {
969
894
                    i_vertex_in_weights = &aggregated_vertex_in_weights;
970
894
                }
971
4.94k
                i_membership = &aggregated_membership;
972
4.94k
            }
973
974
            /* Update the aggregated administration. */
975
5.50k
            IGRAPH_CHECK(igraph_vector_update(i_edge_weights, &tmp_edge_weights));
976
5.50k
            IGRAPH_CHECK(igraph_vector_update(i_vertex_out_weights, &tmp_vertex_out_weights));
977
5.50k
            if (directed) {
978
1.03k
                IGRAPH_CHECK(igraph_vector_update(i_vertex_in_weights, &tmp_vertex_in_weights));
979
1.03k
            }
980
5.50k
            IGRAPH_CHECK(igraph_vector_int_update(i_membership, &tmp_membership));
981
982
5.50k
            level += 1;
983
5.50k
        }
984
985
        /* We are done iterating, so we destroy the incidence list */
986
18.7k
        igraph_inclist_destroy(&edges_per_vertex);
987
18.7k
        IGRAPH_FINALLY_CLEAN(1);
988
18.7k
    } while (continue_clustering);
989
990
    /* Free aggregated graph and associated vectors */
991
13.2k
    igraph_vector_int_destroy(&aggregated_membership);
992
13.2k
    if (directed) igraph_vector_destroy(&aggregated_vertex_in_weights);
993
13.2k
    igraph_vector_destroy(&aggregated_vertex_out_weights);
994
13.2k
    igraph_vector_destroy(&aggregated_edge_weights);
995
13.2k
    igraph_destroy(&aggregated_graph);
996
997
    /* Free remaining memory */
998
13.2k
    igraph_vector_int_destroy(&refined_membership);
999
13.2k
    igraph_vector_int_destroy(&aggregate_vertex);
1000
13.2k
    igraph_vector_int_list_destroy(&clusters);
1001
13.2k
    igraph_vector_int_destroy(&tmp_membership);
1002
13.2k
    igraph_vector_destroy(&tmp_vertex_out_weights);
1003
13.2k
    if (directed) igraph_vector_destroy(&tmp_vertex_in_weights);
1004
13.2k
    igraph_vector_destroy(&tmp_edge_weights);
1005
1006
13.2k
    if (directed) {
1007
6.63k
        IGRAPH_FINALLY_CLEAN(12);
1008
6.63k
    } else {
1009
6.63k
        IGRAPH_FINALLY_CLEAN(10);
1010
6.63k
    }
1011
1012
    /* Calculate quality */
1013
13.2k
    if (quality) {
1014
13.2k
        IGRAPH_CHECK(leiden_quality(graph,
1015
13.2k
                                    edge_weights, vertex_out_weights, vertex_in_weights,
1016
13.2k
                                    membership,
1017
13.2k
                                    *nb_clusters, resolution,
1018
13.2k
                                    quality));
1019
13.2k
    }
1020
1021
13.2k
    return IGRAPH_SUCCESS;
1022
13.2k
}
1023
1024
/**
1025
 * \ingroup communities
1026
 * \function igraph_community_leiden
1027
 * \brief Finding community structure using the Leiden algorithm.
1028
 *
1029
 * This function implements the Leiden algorithm for finding community
1030
 * structure.
1031
 *
1032
 * </para><para>
1033
 * It is similar to the multilevel algorithm, often called the Louvain
1034
 * algorithm, but it is faster and yields higher quality solutions. It can
1035
 * optimize both modularity and the Constant Potts Model, which does not suffer
1036
 * from the resolution-limit (see Traag, Van Dooren &amp; Nesterov).
1037
 *
1038
 * </para><para>
1039
 * The Leiden algorithm consists of three phases: (1) local moving of vertices, (2)
1040
 * refinement of the partition and (3) aggregation of the network based on the
1041
 * refined partition, using the non-refined partition to create an initial
1042
 * partition for the aggregate network. In the local move procedure in the
1043
 * Leiden algorithm, only vertices whose neighborhood has changed are visited. Only
1044
 * moves that strictly improve the quality function are made. The refinement is
1045
 * done by restarting from a singleton partition within each cluster and
1046
 * gradually merging the subclusters. When aggregating, a single cluster may
1047
 * then be represented by several vertices (which are the subclusters identified in
1048
 * the refinement).
1049
 *
1050
 * </para><para>
1051
 * The Leiden algorithm provides several guarantees. The Leiden algorithm is
1052
 * typically iterated: the output of one iteration is used as the input for the
1053
 * next iteration. At each iteration all clusters are guaranteed to be (weakly)
1054
 * connected and well-separated. After an iteration in which nothing has
1055
 * changed, all vertices and some parts are guaranteed to be locally optimally
1056
 * assigned. Note that even if a single iteration did not result in any change,
1057
 * it is still possible that a subsequent iteration might find some
1058
 * improvement. Each iteration explores different subsets of vertices to consider
1059
 * for moving from one cluster to another. Finally, asymptotically, all subsets
1060
 * of all clusters are guaranteed to be locally optimally assigned. For more
1061
 * details, please see Traag, Waltman &amp; van Eck (2019).
1062
 *
1063
 * </para><para>
1064
 * The objective function being optimized is
1065
 *
1066
 * </para><para>
1067
 * <code>1 / 2m sum_ij (A_ij - γ n_i n_j) δ(s_i, s_j)</code>
1068
 *
1069
 * </para><para>
1070
 * in the undirected case and
1071
 *
1072
 * </para><para>
1073
 * <code>1 / m sum_ij (A_ij - γ n^out_i n^in_j) δ(s_i, s_j)</code>
1074
 *
1075
 * </para><para>
1076
 * in the directed case.
1077
 * Here \c m is the total edge weight, <code>A_ij</code> is the weight of edge
1078
 * (i, j), \c γ is the so-called resolution parameter, <code>n_i</code>
1079
 * is the vertex weight of vertex \c i (separate out- and in-weights are used
1080
 * with directed graphs), <code>s_i</code> is the cluster of vertex
1081
 * \c i and <code>δ(x, y) = 1</code> if and only if <code>x = y</code> and 0
1082
 * otherwise.
1083
 *
1084
 * </para><para>
1085
 * By setting <code>n_i = k_i</code>, the degree of vertex \c i, and
1086
 * dividing \c γ by <code>2m</code> (by \c m in the directed case), we effectively
1087
 * obtain an expression for modularity. Hence, the standard modularity will be
1088
 * optimized when you supply the degrees (out- and in-degrees with directed graphs)
1089
 * as the vertex weights and by supplying as a resolution parameter
1090
 * <code>1/(2m)</code> (<code>1/m</code> with directed graphs).
1091
 * Use the \ref igraph_community_leiden_simple() convenience function to
1092
 * compute vertex weights automatically for modularity maximization.
1093
 *
1094
 * </para><para>
1095
 * References:
1096
 *
1097
 * </para><para>
1098
 * V. A. Traag, L. Waltman, N. J. van Eck:
1099
 * From Louvain to Leiden: guaranteeing well-connected communities.
1100
 * Scientific Reports, 9(1), 5233 (2019).
1101
 * http://dx.doi.org/10.1038/s41598-019-41695-z
1102
 *
1103
 * </para><para>
1104
 * V. A. Traag, P. Van Dooren, and Y. Nesterov:
1105
 * Narrow scope for resolution-limit-free community detection.
1106
 * Phys. Rev. E 84, 016114 (2011).
1107
 * https://doi.org/10.1103/PhysRevE.84.016114
1108
 *
1109
 * \param graph The input graph.
1110
 * \param edge_weights Numeric vector containing edge weights. If \c NULL,
1111
 *    every edge has equal weight of 1. The weights need not be non-negative.
1112
 * \param vertex_out_weights Numeric vector containing vertex weights, or vertex
1113
 *    out-weights for directed graphs. If \c NULL, every vertex has equal
1114
 *    weight of 1.
1115
 * \param vertex_in_weights Numeric vector containing vertex in-weights for
1116
 *    directed graphs. If set to \c NULL, in-weights are assumed to be the same
1117
 *    as out-weights, which effectively ignores edge directions.
1118
 *    Must be \c NULL for undirected graphs.
1119
 * \param n_iterations Iterate the core Leiden algorithm the indicated number
1120
 *    of times. If this is a negative number, it will continue iterating until
1121
 *    an iteration did not change the clustering. Two iterations are often
1122
 *    sufficient, thus 2 is a reasonable default.
1123
 * \param beta The randomness used in the refinement step when merging. A small
1124
 *    amount of randomness (\c beta = 0.01) typically works well.
1125
 * \param start Start from membership vector. If this is true, the optimization
1126
 *    will start from the provided membership vector. If this is false, the
1127
 *    optimization will start from a singleton partition.
1128
 * \param n_iterations Iterate the core Leiden algorithm for the indicated number
1129
 *    of times. If this is a negative number, it will continue iterating until
1130
 *    an iteration did not change the clustering.
1131
 * \param membership The membership vector. This is both used as the initial
1132
 *    membership from which optimisation starts and is updated in place. It
1133
 *    must hence be properly initialized. When finding clusters from scratch it
1134
 *    is typically started using a singleton clustering. This can be achieved
1135
 *    using \ref igraph_vector_int_init_range().
1136
 * \param nb_clusters The number of clusters contained in the final \p membership.
1137
 *    If \c NULL, the number of clusters will not be returned.
1138
 * \param quality The quality of the partition, in terms of the objective
1139
 *    function as included in the documentation. If \c NULL the quality will
1140
 *    not be calculated.
1141
 * \return Error code.
1142
 *
1143
 * Time complexity: near linear on sparse graphs.
1144
 *
1145
 * \sa \ref igraph_community_leiden_simple() for a simplified interface
1146
 * that allows specifying an objective function directly and does not require
1147
 * vertex weights.
1148
 *
1149
 * \example examples/simple/igraph_community_leiden.c
1150
 */
1151
igraph_error_t igraph_community_leiden(
1152
        const igraph_t *graph,
1153
        const igraph_vector_t *edge_weights,
1154
        const igraph_vector_t *vertex_out_weights,
1155
        const igraph_vector_t *vertex_in_weights,
1156
        igraph_real_t resolution,
1157
        igraph_real_t beta,
1158
        igraph_bool_t start,
1159
        igraph_integer_t n_iterations,
1160
        igraph_vector_int_t *membership,
1161
        igraph_integer_t *nb_clusters,
1162
6.63k
        igraph_real_t *quality) {
1163
1164
6.63k
    const igraph_integer_t vcount = igraph_vcount(graph);
1165
6.63k
    const igraph_integer_t ecount = igraph_ecount(graph);
1166
6.63k
    const igraph_bool_t directed = igraph_is_directed(graph);
1167
6.63k
    igraph_vector_t *i_edge_weights, *i_vertex_out_weights, *i_vertex_in_weights;
1168
6.63k
    igraph_integer_t i_nb_clusters;
1169
1170
6.63k
    if (!nb_clusters) {
1171
0
        nb_clusters = &i_nb_clusters;
1172
0
    }
1173
1174
6.63k
    if (start) {
1175
0
        if (!membership) {
1176
0
            IGRAPH_ERROR("Cannot start optimization if membership is missing.", IGRAPH_EINVAL);
1177
0
        }
1178
1179
0
        if (igraph_vector_int_size(membership) != vcount) {
1180
0
            IGRAPH_ERROR("Membership vector length does not equal the number of vertices.", IGRAPH_EINVAL);
1181
0
        }
1182
6.63k
    } else {
1183
6.63k
        if (!membership)
1184
0
            IGRAPH_ERROR("Membership vector should be supplied and initialized, "
1185
6.63k
                         "even when not starting optimization from it.", IGRAPH_EINVAL);
1186
1187
6.63k
        IGRAPH_CHECK(igraph_vector_int_range(membership, 0, vcount));
1188
6.63k
    }
1189
1190
    /* Check edge weights to possibly use default. */
1191
6.63k
    if (!edge_weights) {
1192
0
        i_edge_weights = IGRAPH_CALLOC(1, igraph_vector_t);
1193
0
        IGRAPH_CHECK_OOM(i_edge_weights, "Leiden algorithm failed, could not allocate memory for edge weights.");
1194
0
        IGRAPH_FINALLY(igraph_free, i_edge_weights);
1195
0
        IGRAPH_CHECK(igraph_vector_init(i_edge_weights, igraph_ecount(graph)));
1196
0
        IGRAPH_FINALLY(igraph_vector_destroy, i_edge_weights);
1197
0
        igraph_vector_fill(i_edge_weights, 1);
1198
6.63k
    } else {
1199
6.63k
        if (igraph_vector_size(edge_weights) != ecount) {
1200
0
            IGRAPH_ERRORF("Edge weight vector length (%" IGRAPH_PRId ") does not match number of edges (%" IGRAPH_PRId ").",
1201
0
                          IGRAPH_EINVAL, igraph_vector_size(edge_weights), ecount);
1202
0
        }
1203
6.63k
        i_edge_weights = (igraph_vector_t*)edge_weights;
1204
6.63k
    }
1205
1206
    /* Check vertex out-weights to possibly use default. */
1207
6.63k
    if (!vertex_out_weights) {
1208
6.63k
        i_vertex_out_weights = IGRAPH_CALLOC(1, igraph_vector_t);
1209
6.63k
        IGRAPH_CHECK_OOM(i_vertex_out_weights, "Leiden algorithm failed, could not allocate memory for vertex weights.");
1210
6.63k
        IGRAPH_FINALLY(igraph_free, i_vertex_out_weights);
1211
6.63k
        IGRAPH_VECTOR_INIT_FINALLY(i_vertex_out_weights, vcount);
1212
6.63k
        igraph_vector_fill(i_vertex_out_weights, 1);
1213
6.63k
    } else {
1214
0
        if (igraph_vector_size(vertex_out_weights) != vcount) {
1215
0
            IGRAPH_ERRORF("Vertex %sweight vector length (%" IGRAPH_PRId ") does not match number of vertices (%" IGRAPH_PRId ").",
1216
0
                          IGRAPH_EINVAL,
1217
0
                          directed ? "out-" : "",
1218
0
                          igraph_vector_size(vertex_out_weights), vcount);
1219
0
        }
1220
0
        i_vertex_out_weights = (igraph_vector_t*)vertex_out_weights;
1221
0
    }
1222
1223
6.63k
    if (directed) {
1224
        /* When in-weights are not given for a directed graph,
1225
         * assume that they are the same as the out-weights.
1226
         * This effectively ignores edge directions. */
1227
3.31k
        if (vertex_in_weights) {
1228
0
            if (igraph_vector_size(vertex_in_weights) != vcount) {
1229
0
                IGRAPH_ERRORF("Vertex in-weight vector length (%" IGRAPH_PRId ") does not match number of vertices (%" IGRAPH_PRId ").",
1230
0
                              IGRAPH_EINVAL,
1231
0
                              igraph_vector_size(vertex_in_weights), vcount);
1232
0
            }
1233
0
            i_vertex_in_weights = (igraph_vector_t*)vertex_in_weights;
1234
3.31k
        } else {
1235
3.31k
            i_vertex_in_weights = i_vertex_out_weights;
1236
3.31k
        }
1237
3.31k
    } else {
1238
        /* In-weights must be NULL in the undirected case. */
1239
3.31k
        if (vertex_in_weights) {
1240
0
            IGRAPH_ERROR("Vertex in-weights must not be given for undirected graphs.", IGRAPH_EINVAL);
1241
3.31k
        } else {
1242
3.31k
            i_vertex_in_weights = NULL;
1243
3.31k
        }
1244
3.31k
    }
1245
1246
    /* Perform actual Leiden algorithm iteratively. We either
1247
     * perform a fixed number of iterations, or we perform
1248
     * iterations until the quality remains unchanged. Even if
1249
     * a single iteration did not change anything, a subsequent
1250
     * iteration may still find some improvement. This is because
1251
     * each iteration explores different subsets of vertices.
1252
     */
1253
6.63k
    igraph_bool_t changed = true;
1254
6.63k
    for (igraph_integer_t itr = 0;
1255
19.9k
         n_iterations < 0 ? changed : itr < n_iterations;
1256
13.2k
         itr++) {
1257
13.2k
        IGRAPH_CHECK(community_leiden(graph,
1258
13.2k
                                      i_edge_weights, i_vertex_out_weights, i_vertex_in_weights,
1259
13.2k
                                      resolution, beta,
1260
13.2k
                                      membership, nb_clusters, quality, &changed));
1261
13.2k
    }
1262
1263
6.63k
    if (!edge_weights) {
1264
0
        igraph_vector_destroy(i_edge_weights);
1265
0
        IGRAPH_FREE(i_edge_weights);
1266
0
        IGRAPH_FINALLY_CLEAN(2);
1267
0
    }
1268
1269
6.63k
    if (!vertex_out_weights) {
1270
6.63k
        igraph_vector_destroy(i_vertex_out_weights);
1271
6.63k
        IGRAPH_FREE(i_vertex_out_weights);
1272
6.63k
        IGRAPH_FINALLY_CLEAN(2);
1273
6.63k
    }
1274
1275
6.63k
    return IGRAPH_SUCCESS;
1276
6.63k
}
1277
1278
/**
1279
 * \function igraph_community_leiden_simple
1280
 * \brief Finding community structure using the Leiden algorithm, simple interface.
1281
 *
1282
 * This is a simplified interface to \ref igraph_community_leiden() for
1283
 * convenience purposes. Instead of requiring vertex weights, it allows
1284
 * choosing from a set of objective functions to maximize. It implements
1285
 * these objective functions by passing suitable vertex weights to
1286
 * \ref igraph_community_leiden(), as explained in the documentation of
1287
 * that function.
1288
 *
1289
 * \param graph The input graph. May be directed or undirected.
1290
 * \param weights The edge weights. If \c NULL, all weights are assumed to be 1.
1291
 * \param objective The objective function to maximize.
1292
 *    \clist
1293
 *    \cli IGRAPH_LEIDEN_OBJECTIVE_MODULARITY
1294
 *      Use the generalized modularity, defined as
1295
 *      <code>Q = 1/(2m) sum_ij (A_ij - γ k_i k_j / (2m)) δ(c_i, c_j)</code>
1296
 *      for undirected graphs and as
1297
 *      <code>Q = 1/m sum_ij (A_ij - γ k^out_i k^in_j / m) δ(c_i, c_j)</code>
1298
 *      for directed graphs. This effectively uses a multigraph configuration
1299
 *      model as the null model. Edge weights must not be negative.
1300
 *    \cli IGRAPH_LEIDEN_OBJECTIVE_CPM
1301
 *      Use the constant Potts model, whose objective function is defined as
1302
 *      <code>Q = 1/(2m) sum_ij (A_ij - γ) δ(c_i, c_j)</code>
1303
 *      for undirected graphs and as
1304
 *      <code>Q = 1/m sum_ij (A_ij - γ) δ(c_i, c_j)</code>
1305
 *      for directed graphs. Edge weights are allowed to be negative.
1306
 *      Edge directions have no impact on the result.
1307
 *    \cli IGRAPH_LEIDEN_OBJECTIVE_ER
1308
 *      Use an objective function based on the multigraph Erdős-Rényi G(n,p)
1309
 *      null model, defined as
1310
 *      <code>Q = 1/(2m) sum_ij (A_ij - γ p) δ(c_i, c_j)</code>
1311
 *      for undirected graphs and as
1312
 *      <code>Q = 1/m sum_ij (A_ij - γ p) δ(c_i, c_j)</code>
1313
 *      for directed graphs. \c p is the weighted density, i.e. the average
1314
 *      link strength between all vertex pairs (whether adjacent or not).
1315
 *      Edge weights must not be negative. Edge directions have no impact on
1316
 *      the result.
1317
 *    \endclist
1318
 *    In the above formulas, \c A is the adjacency matrix, \c m is the total
1319
 *    edge weight, \c k are the (out- and in-) degrees, \c γ is the resolution
1320
 *    parameter, and <code>δ(c_i, c_j)</code> is 1 if vertices \c i and \c j
1321
 *    are in the same community and 0 otherwise. Edge directions are only
1322
 *    relevant with \c IGRAPH_LEIDEN_OBJECTIVE_MODULARITY. The other two
1323
 *    objective functions are equivalent between directed and undirected graphs:
1324
 *    the formal difference is due to each edge being included twice in
1325
 *    undirected (symmetric) adjacency matrices.
1326
 * \param resolution The resolution parameter, which is represented by γ in
1327
 *    the objective functions detailed above.
1328
 * \param beta The randomness used in the refinement step when merging. A small
1329
 *    amount of randomness (\c beta = 0.01) typically works well.
1330
 * \param start Start from membership vector. If this is true, the optimization
1331
 *    will start from the provided membership vector. If this is false, the
1332
 *    optimization will start from a singleton partition.
1333
 * \param n_iterations Iterate the core Leiden algorithm the indicated number
1334
 *    of times. If this is a negative number, it will continue iterating until
1335
 *    an iteration did not change the clustering. Two iterations are often
1336
 *    sufficient, thus 2 is a reasonable default.
1337
 * \param membership The membership vector. If \p start is set to \c false,
1338
 *    it will be resized appropriately. If \p start is \c true, it must be
1339
 *    a valid membership vector for the given \p graph.
1340
 * \param nb_clusters The number of clusters contained in the final \p membership.
1341
 *    If \c NULL, the number of clusters will not be returned.
1342
 * \param quality The quality of the partition, in terms of the objective
1343
 *    function selected by \p objective. If \c NULL the quality will
1344
 *    not be calculated.
1345
 * \return Error code.
1346
 *
1347
 * Time complexity: near linear on sparse graphs.
1348
 *
1349
 * \sa \ref igraph_community_leiden() for a more flexible interface that
1350
 * allows specifying raw vertex weights.
1351
 */
1352
igraph_error_t igraph_community_leiden_simple(
1353
        const igraph_t *graph,
1354
        const igraph_vector_t *weights,
1355
        igraph_leiden_objective_t objective,
1356
        igraph_real_t resolution,
1357
        igraph_real_t beta,
1358
        igraph_bool_t start,
1359
        igraph_integer_t n_iterations,
1360
        igraph_vector_int_t *membership,
1361
        igraph_integer_t *nb_clusters,
1362
0
        igraph_real_t *quality) {
1363
1364
0
    const igraph_integer_t vcount = igraph_vcount(graph);
1365
0
    const igraph_integer_t ecount = igraph_ecount(graph);
1366
0
    const igraph_bool_t directed = igraph_is_directed(graph);
1367
0
    igraph_vector_t vertex_out_weights, vertex_in_weights;
1368
0
    igraph_vector_int_t i_membership, *p_membership;
1369
0
    igraph_real_t min_weight = IGRAPH_INFINITY;
1370
1371
    /* Basic weight vector validation, calculate properties used for validation steps
1372
     * specific to different objective functions. */
1373
0
    if (weights) {
1374
0
        if (igraph_vector_size(weights) != ecount) {
1375
0
            IGRAPH_ERROR("Edge weight vector length does not match number of edges.", IGRAPH_EINVAL);
1376
0
        }
1377
0
        for (igraph_integer_t i=0; i < ecount; i++) {
1378
0
            igraph_real_t w = VECTOR(*weights)[i];
1379
0
            if (w < min_weight) {
1380
0
                min_weight = w;
1381
0
            }
1382
0
            if (! isfinite(w)) {
1383
0
                IGRAPH_ERRORF("Edge weights must not be infinite or NaN, got %g.",
1384
0
                              IGRAPH_EINVAL, w);
1385
0
            }
1386
0
        }
1387
0
    }
1388
1389
0
    IGRAPH_VECTOR_INIT_FINALLY(&vertex_out_weights, vcount);
1390
0
    if (directed) {
1391
0
        IGRAPH_VECTOR_INIT_FINALLY(&vertex_in_weights, vcount);
1392
0
    }
1393
1394
    /* igraph_community_leiden() always requires an initialized membership vector
1395
     * of the correct size to be given. We relax this requirement to the case
1396
     * when start = true. */
1397
0
    if (start) {
1398
0
        if (!membership) {
1399
0
            IGRAPH_ERROR("Requesting to start the computation from a specific "
1400
0
                         "community assignment, but no membership vector given.",
1401
0
                         IGRAPH_EINVAL);
1402
0
        }
1403
0
        if (igraph_vector_int_size(membership) != vcount) {
1404
0
            IGRAPH_ERRORF("Requesting to start the computation from a specific "
1405
0
                          "community assignment, but the given membership vector "
1406
0
                          "has a different size (%" IGRAPH_PRId " than the vertex "
1407
0
                          "count (%" IGRAPH_PRId ").",
1408
0
                          IGRAPH_EINVAL,
1409
0
                          igraph_vector_int_size(membership), vcount);
1410
0
        }
1411
0
        p_membership = membership;
1412
0
    } else {
1413
0
        if (!membership) {
1414
0
            IGRAPH_VECTOR_INT_INIT_FINALLY(&i_membership, vcount);
1415
0
            p_membership = &i_membership;
1416
0
        } else {
1417
0
            IGRAPH_CHECK(igraph_vector_int_resize(membership, vcount));
1418
0
            p_membership = membership;
1419
0
        }
1420
0
    }
1421
1422
0
    switch (objective) {
1423
0
    case IGRAPH_LEIDEN_OBJECTIVE_MODULARITY:
1424
0
        if (min_weight < 0) {
1425
0
            IGRAPH_ERRORF("Edge weights must not be negative for Leiden community "
1426
0
                          "detection with modularity objective function, got %g.",
1427
0
                          IGRAPH_EINVAL,
1428
0
                          min_weight);
1429
0
        }
1430
1431
0
        IGRAPH_CHECK(igraph_strength(
1432
0
            graph, &vertex_out_weights,
1433
0
            igraph_vss_all(), IGRAPH_OUT, IGRAPH_LOOPS, weights));
1434
0
        if (directed) {
1435
0
            IGRAPH_CHECK(igraph_strength(
1436
0
                graph, &vertex_in_weights,
1437
0
                igraph_vss_all(), IGRAPH_IN, IGRAPH_LOOPS, weights));
1438
0
        }
1439
1440
        /* If directed, the sum of vertex_out_weights is the total edge weight.
1441
         * If undirected, it is twice the total edge weight. */
1442
0
        resolution /= igraph_vector_sum(&vertex_out_weights);
1443
1444
0
        break;
1445
1446
0
    case IGRAPH_LEIDEN_OBJECTIVE_CPM:
1447
        /* TODO: Potential minor optimization is to use the same vector for both. */
1448
0
        igraph_vector_fill(&vertex_out_weights, 1);
1449
0
        if (directed) {
1450
0
            igraph_vector_fill(&vertex_in_weights, 1);
1451
0
        }
1452
1453
0
        break;
1454
1455
0
    case IGRAPH_LEIDEN_OBJECTIVE_ER:
1456
0
        if (min_weight < 0) {
1457
0
            IGRAPH_ERRORF("Edge weights must not be negative for Leiden community "
1458
0
                          "detection with ER objective function, got %g.",
1459
0
                          IGRAPH_EINVAL,
1460
0
                          min_weight);
1461
0
        }
1462
1463
        /* TODO: Potential minor optimization is to use the same vector for both. */
1464
0
        igraph_vector_fill(&vertex_out_weights, 1);
1465
0
        if (directed) {
1466
0
            igraph_vector_fill(&vertex_in_weights, 1);
1467
0
        }
1468
1469
0
        {
1470
0
            igraph_real_t p;
1471
            /* Note: Loops must be allowed, as the aggregation step of the
1472
             * algorithm effectively creates them. */
1473
0
            IGRAPH_CHECK(igraph_density(graph, weights, &p, /* loops */ true));
1474
0
            resolution *= p;
1475
0
        }
1476
1477
0
        break;
1478
1479
1480
0
    default:
1481
0
        IGRAPH_ERROR("Invalid objective function for Leiden community detection.",
1482
0
                     IGRAPH_EINVAL);
1483
0
    }
1484
1485
0
    IGRAPH_CHECK(igraph_community_leiden(
1486
0
        graph, weights,
1487
0
        &vertex_out_weights, directed ? &vertex_in_weights : NULL,
1488
0
        resolution, beta, start, n_iterations, p_membership, nb_clusters, quality));
1489
1490
0
    if (!membership) {
1491
0
        igraph_vector_int_destroy(&i_membership);
1492
0
        IGRAPH_FINALLY_CLEAN(1);
1493
0
    }
1494
1495
0
    if (directed) {
1496
0
        igraph_vector_destroy(&vertex_in_weights);
1497
0
        IGRAPH_FINALLY_CLEAN(1);
1498
0
    }
1499
0
    igraph_vector_destroy(&vertex_out_weights);
1500
0
    IGRAPH_FINALLY_CLEAN(1);
1501
1502
0
    return IGRAPH_SUCCESS;
1503
0
}