Coverage Report

Created: 2025-07-11 06:25

/src/igraph/src/community/label_propagation.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
   IGraph library.
3
   Copyright (C) 2007-2022  The igraph development team <igraph@igraph.org>
4
5
   This program is free software; you can redistribute it and/or modify
6
   it under the terms of the GNU General Public License as published by
7
   the Free Software Foundation; either version 2 of the License, or
8
   (at your option) any later version.
9
10
   This program is distributed in the hope that it will be useful,
11
   but WITHOUT ANY WARRANTY; without even the implied warranty of
12
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
   GNU General Public License for more details.
14
15
   You should have received a copy of the GNU General Public License
16
   along with this program.  If not, see <https://www.gnu.org/licenses/>.
17
*/
18
19
#include "igraph_community.h"
20
21
#include "igraph_adjlist.h"
22
#include "igraph_dqueue.h"
23
#include "igraph_interface.h"
24
#include "igraph_memory.h"
25
#include "igraph_random.h"
26
27
#include "core/interruption.h"
28
29
static igraph_error_t community_label_propagation(
30
        const igraph_t *graph,
31
        igraph_vector_int_t *membership,
32
        igraph_neimode_t mode,
33
        const igraph_vector_t *weights,
34
        const igraph_vector_bool_t *fixed,
35
6.63k
        igraph_bool_t retention) {
36
37
6.63k
    const igraph_integer_t no_of_nodes = igraph_vcount(graph);
38
6.63k
    igraph_integer_t no_of_not_fixed_nodes = 0;
39
6.63k
    igraph_integer_t i, j, k;
40
6.63k
    igraph_adjlist_t al;
41
6.63k
    igraph_inclist_t il;
42
6.63k
    igraph_bool_t running, control_iteration;
43
44
6.63k
    igraph_vector_t label_weights;
45
6.63k
    igraph_vector_int_t dominant_labels, nonzero_labels, node_order;
46
6.63k
    igraph_neimode_t reverse_mode;
47
6.63k
    int iter = 0; /* interruption counter */
48
49
6.63k
    reverse_mode = IGRAPH_REVERSE_MODE(mode);
50
51
    /* Create an adjacency/incidence list representation for efficiency.
52
    * For the unweighted case, the adjacency list is enough. For the
53
    * weighted case, we need the incidence list */
54
6.63k
    if (weights) {
55
6.63k
        IGRAPH_CHECK(igraph_inclist_init(graph, &il, reverse_mode, IGRAPH_LOOPS_ONCE));
56
6.63k
        IGRAPH_FINALLY(igraph_inclist_destroy, &il);
57
6.63k
    } else {
58
0
        IGRAPH_CHECK(igraph_adjlist_init(graph, &al, reverse_mode, IGRAPH_LOOPS_ONCE, IGRAPH_MULTIPLE));
59
0
        IGRAPH_FINALLY(igraph_adjlist_destroy, &al);
60
0
    }
61
62
    /* Create storage space for counting distinct labels and dominant ones */
63
6.63k
    IGRAPH_VECTOR_INIT_FINALLY(&label_weights, no_of_nodes);
64
6.63k
    IGRAPH_VECTOR_INT_INIT_FINALLY(&dominant_labels, 0);
65
6.63k
    IGRAPH_VECTOR_INT_INIT_FINALLY(&nonzero_labels, 0);
66
6.63k
    IGRAPH_CHECK(igraph_vector_int_reserve(&dominant_labels, 2));
67
68
    /* Initialize node ordering vector with only the not fixed nodes */
69
6.63k
    if (fixed) {
70
0
        IGRAPH_VECTOR_INT_INIT_FINALLY(&node_order, no_of_nodes);
71
0
        for (i = 0; i < no_of_nodes; i++) {
72
0
            if (!VECTOR(*fixed)[i]) {
73
0
                VECTOR(node_order)[no_of_not_fixed_nodes] = i;
74
0
                no_of_not_fixed_nodes++;
75
0
            }
76
0
        }
77
0
        IGRAPH_CHECK(igraph_vector_int_resize(&node_order, no_of_not_fixed_nodes));
78
6.63k
    } else {
79
6.63k
        IGRAPH_CHECK(igraph_vector_int_init_range(&node_order, 0, no_of_nodes));
80
6.63k
        IGRAPH_FINALLY(igraph_vector_int_destroy, &node_order);
81
6.63k
        no_of_not_fixed_nodes = no_of_nodes;
82
6.63k
    }
83
84
    /* There are two modes of operation in this implementation: retention or
85
     * dominance. When using retention, we prefer to keep the current label of a node.
86
     * Only if the current label is not among the dominant labels will we
87
     * update the label. If a label changes, we will continue to iterate
88
     * over all nodes.
89
     *
90
     * When not using retention we check for dominance after each iteration. This
91
     * is implemented as two alternating types of iterations, one for changing
92
     * labels and the other one for checking the end condition - every vertex in the
93
     * graph has a label to which the maximum number of its neighbors belongs. If
94
     * control_iteration is true, we are just checking the end condition and not
95
     * relabeling nodes.
96
     */
97
98
6.63k
    control_iteration = true;
99
6.63k
    running = true;
100
29.3k
    while (running) {
101
22.7k
        igraph_integer_t v1, num_neis;
102
22.7k
        igraph_real_t max_count;
103
22.7k
        igraph_vector_int_t *neis;
104
22.7k
        igraph_vector_int_t *ineis;
105
22.7k
        igraph_bool_t was_zero;
106
107
22.7k
        IGRAPH_ALLOW_INTERRUPTION_LIMITED(iter, 1 << 8);
108
109
22.7k
        if (retention) {
110
            /* We stop in this iteration by default, unless a label changes */
111
8.55k
            running = false;
112
            /* Shuffle the node ordering vector */
113
8.55k
            igraph_vector_int_shuffle(&node_order);
114
14.1k
        } else {
115
14.1k
            if (control_iteration) {
116
                /* If we are in the control iteration, we expect in the beginning of
117
                the iteration that all vertices meet the end condition, so 'running' is false.
118
                If some of them does not, 'running' is set to true later in the code. */
119
8.75k
                running = false;
120
8.75k
            } else {
121
                /* Shuffle the node ordering vector if we are in the label updating iteration */
122
5.43k
                igraph_vector_int_shuffle(&node_order);
123
5.43k
            }
124
14.1k
        }
125
126
        /* In the prescribed order, loop over the vertices and reassign labels */
127
1.10M
        for (i = 0; i < no_of_not_fixed_nodes; i++) {
128
1.08M
            v1 = VECTOR(node_order)[i];
129
130
            /* Count the weights corresponding to different labels */
131
1.08M
            igraph_vector_int_clear(&dominant_labels);
132
1.08M
            igraph_vector_int_clear(&nonzero_labels);
133
1.08M
            max_count = 0.0;
134
1.08M
            if (weights) {
135
136
1.08M
                ineis = igraph_inclist_get(&il, v1);
137
1.08M
                num_neis = igraph_vector_int_size(ineis);
138
139
2.64M
                for (j = 0; j < num_neis; j++) {
140
1.55M
                    k = VECTOR(*membership)[IGRAPH_OTHER(graph, VECTOR(*ineis)[j], v1)];
141
1.55M
                    if (k < 0) {
142
0
                        continue;    /* skip if it has no label yet */
143
0
                    }
144
1.55M
                    was_zero = (VECTOR(label_weights)[k] == 0);
145
1.55M
                    VECTOR(label_weights)[k] += VECTOR(*weights)[VECTOR(*ineis)[j]];
146
147
1.55M
                    if (was_zero && VECTOR(label_weights)[k] != 0) {
148
                        /* weights just became nonzero */
149
812k
                        IGRAPH_CHECK(igraph_vector_int_push_back(&nonzero_labels, k));
150
812k
                    }
151
152
1.55M
                    if (max_count < VECTOR(label_weights)[k]) {
153
1.20M
                        max_count = VECTOR(label_weights)[k];
154
1.20M
                        IGRAPH_CHECK(igraph_vector_int_resize(&dominant_labels, 1));
155
1.20M
                        VECTOR(dominant_labels)[0] = k;
156
1.20M
                    } else if (max_count == VECTOR(label_weights)[k]) {
157
16.0k
                        IGRAPH_CHECK(igraph_vector_int_push_back(&dominant_labels, k));
158
16.0k
                    }
159
1.55M
                }
160
1.08M
            } else {
161
162
0
                neis = igraph_adjlist_get(&al, v1);
163
0
                num_neis = igraph_vector_int_size(neis);
164
165
0
                for (j = 0; j < num_neis; j++) {
166
167
0
                    k = VECTOR(*membership)[VECTOR(*neis)[j]];
168
0
                    if (k < 0) {
169
0
                        continue;    /* skip if it has no label yet */
170
0
                    }
171
0
                    VECTOR(label_weights)[k]++;
172
173
0
                    if (VECTOR(label_weights)[k] == 1) {
174
                        /* weights just became nonzero */
175
0
                        IGRAPH_CHECK(igraph_vector_int_push_back(&nonzero_labels, k));
176
0
                    }
177
178
0
                    if (max_count < VECTOR(label_weights)[k]) {
179
0
                        max_count = VECTOR(label_weights)[k];
180
0
                        IGRAPH_CHECK(igraph_vector_int_resize(&dominant_labels, 1));
181
0
                        VECTOR(dominant_labels)[0] = k;
182
0
                    } else if (max_count == VECTOR(label_weights)[k]) {
183
0
                        IGRAPH_CHECK(igraph_vector_int_push_back(&dominant_labels, k));
184
0
                    }
185
0
                }
186
0
            }
187
188
1.08M
            if (igraph_vector_int_size(&dominant_labels) > 0) {
189
438k
                if (retention) {
190
                    /* If we are using retention, we first check if the current label
191
                    is among the maximum label. */
192
160k
                    j = (long)VECTOR(*membership)[v1];
193
160k
                    if (j < 0 || /* Doesn't have a label yet */
194
160k
                        VECTOR(label_weights)[j] == 0 || /* Label not present in neighbors */
195
160k
                        VECTOR(label_weights)[j] < max_count /* Label not dominant */) {
196
                        /* Select randomly from the dominant labels */
197
46.1k
                        k = RNG_INTEGER(0, igraph_vector_int_size(&dominant_labels) - 1);
198
46.1k
                        k = VECTOR(dominant_labels)[(long int)k];
199
                        /* If label changes, we will continue running */
200
46.1k
                        if (k != j) {
201
46.1k
                            running = true;
202
46.1k
                        }
203
                        /* Actually change label */
204
46.1k
                        VECTOR(*membership)[v1] = k;
205
46.1k
                    }
206
278k
                } else {
207
                    /* We are not using retention, so check if we should do a control iteration
208
                    or an update iteration. */
209
278k
                    if (control_iteration) {
210
                        /* Check if the _current_ label of the node is also dominant */
211
164k
                        k = VECTOR(*membership)[v1];
212
164k
                        if (k < 0 ||                              /* No label assigned yet or */
213
164k
                            VECTOR(label_weights)[k] < max_count /* Label is not maximum */
214
164k
                           ) {
215
                            /* Nope, we need at least one more iteration */
216
56.1k
                            running = true;
217
56.1k
                        }
218
164k
                    } else {
219
                        /* Select randomly from the dominant labels */
220
113k
                        k = RNG_INTEGER(0, igraph_vector_int_size(&dominant_labels) - 1);
221
113k
                        VECTOR(*membership)[v1] = VECTOR(dominant_labels)[k];
222
113k
                    }
223
278k
                }
224
438k
            }
225
226
            /* Clear the nonzero elements in label_weights */
227
1.08M
            num_neis = igraph_vector_int_size(&nonzero_labels);
228
1.89M
            for (j = 0; j < num_neis; j++) {
229
812k
                VECTOR(label_weights)[VECTOR(nonzero_labels)[j]] = 0;
230
812k
            }
231
1.08M
        }
232
233
        /* Alternating between control iterations and label updating iterations */
234
22.7k
        if (!retention) {
235
14.1k
            control_iteration = !control_iteration;
236
14.1k
        }
237
22.7k
    }
238
239
6.63k
    if (weights) {
240
6.63k
        igraph_inclist_destroy(&il);
241
6.63k
    } else {
242
0
        igraph_adjlist_destroy(&al);
243
0
    }
244
6.63k
    IGRAPH_FINALLY_CLEAN(1);
245
246
6.63k
    igraph_vector_int_destroy(&node_order);
247
6.63k
    igraph_vector_int_destroy(&nonzero_labels);
248
6.63k
    igraph_vector_int_destroy(&dominant_labels);
249
6.63k
    igraph_vector_destroy(&label_weights);
250
6.63k
    IGRAPH_FINALLY_CLEAN(4);
251
252
6.63k
    return IGRAPH_SUCCESS;
253
6.63k
}
254
255
static igraph_error_t community_fast_label_propagation(
256
        const igraph_t *graph,
257
        igraph_vector_int_t *membership,
258
        igraph_neimode_t mode,
259
        const igraph_vector_t *weights,
260
3.31k
        const igraph_vector_bool_t *fixed) {
261
262
3.31k
    const igraph_integer_t no_of_nodes = igraph_vcount(graph);
263
3.31k
    igraph_integer_t no_of_not_fixed_nodes = 0;
264
3.31k
    igraph_integer_t i, j, k;
265
3.31k
    igraph_inclist_t il;
266
3.31k
    igraph_adjlist_t al;
267
268
3.31k
    igraph_vector_t label_weights;
269
3.31k
    igraph_vector_int_t dominant_labels, nonzero_labels, node_order;
270
3.31k
    igraph_dqueue_int_t queue;
271
3.31k
    igraph_vector_bool_t in_queue;
272
3.31k
    igraph_neimode_t reverse_mode;
273
3.31k
    int iter = 0; /* interruption counter */
274
275
3.31k
    reverse_mode = IGRAPH_REVERSE_MODE(mode);
276
277
3.31k
    if (weights) {
278
3.31k
        IGRAPH_CHECK(igraph_inclist_init(graph, &il, reverse_mode, IGRAPH_LOOPS_ONCE));
279
3.31k
        IGRAPH_FINALLY(igraph_inclist_destroy, &il);
280
3.31k
    } else {
281
0
        IGRAPH_CHECK(igraph_adjlist_init(graph, &al, reverse_mode, IGRAPH_LOOPS_ONCE, IGRAPH_MULTIPLE));
282
0
        IGRAPH_FINALLY(igraph_adjlist_destroy, &al);
283
0
    }
284
285
    /* Create storage space for counting distinct labels and dominant ones */
286
3.31k
    IGRAPH_VECTOR_INIT_FINALLY(&label_weights, no_of_nodes);
287
3.31k
    IGRAPH_VECTOR_INT_INIT_FINALLY(&dominant_labels, 0);
288
3.31k
    IGRAPH_VECTOR_INT_INIT_FINALLY(&nonzero_labels, 0);
289
3.31k
    IGRAPH_CHECK(igraph_vector_int_reserve(&dominant_labels, 2));
290
291
    /* Initialize node ordering vector with only the not fixed nodes */
292
3.31k
    IGRAPH_DQUEUE_INT_INIT_FINALLY(&queue, no_of_nodes);
293
3.31k
    IGRAPH_VECTOR_BOOL_INIT_FINALLY(&in_queue, no_of_nodes);
294
295
    /* Initialize node ordering vector with only the not fixed nodes */
296
3.31k
    if (fixed) {
297
0
        IGRAPH_VECTOR_INT_INIT_FINALLY(&node_order, no_of_nodes);
298
0
        for (i = 0; i < no_of_nodes; i++) {
299
0
            if (!VECTOR(*fixed)[i]) {
300
0
                VECTOR(node_order)[no_of_not_fixed_nodes] = i;
301
0
                no_of_not_fixed_nodes++;
302
0
            }
303
0
        }
304
0
        IGRAPH_CHECK(igraph_vector_int_resize(&node_order, no_of_not_fixed_nodes));
305
3.31k
    } else {
306
3.31k
        IGRAPH_CHECK(igraph_vector_int_init_range(&node_order, 0, no_of_nodes));
307
3.31k
        IGRAPH_FINALLY(igraph_vector_int_destroy, &node_order);
308
3.31k
        no_of_not_fixed_nodes = no_of_nodes;
309
3.31k
    }
310
311
152k
    for (i = 0; i < no_of_not_fixed_nodes; i++) {
312
149k
        IGRAPH_CHECK(igraph_dqueue_int_push(&queue, VECTOR(node_order)[i]));
313
149k
        VECTOR(in_queue)[VECTOR(node_order)[i]] = true;
314
149k
    }
315
3.31k
    igraph_vector_int_destroy(&node_order);
316
3.31k
    IGRAPH_FINALLY_CLEAN(1);
317
318
167k
    while (!igraph_dqueue_int_empty(&queue)) {
319
164k
        igraph_integer_t v1, v2, e = -1, num_neis;
320
164k
        igraph_real_t max_count;
321
164k
        igraph_vector_int_t *neis;
322
164k
        igraph_bool_t was_zero;
323
324
164k
        IGRAPH_ALLOW_INTERRUPTION_LIMITED(iter, 1 << 8);
325
326
164k
        v1 = igraph_dqueue_int_pop(&queue);
327
164k
        VECTOR(in_queue)[v1] = false;
328
329
        /* Count the weights corresponding to different labels */
330
164k
        igraph_vector_int_clear(&dominant_labels);
331
164k
        igraph_vector_int_clear(&nonzero_labels);
332
164k
        max_count = 0.0;
333
164k
        if (weights) {
334
164k
            neis = igraph_inclist_get(&il, v1);
335
164k
        } else {
336
0
            neis = igraph_adjlist_get(&al, v1);
337
0
        }
338
339
164k
        num_neis = igraph_vector_int_size(neis);
340
431k
        for (j = 0; j < num_neis; j++) {
341
267k
            if (weights) {
342
267k
                e = VECTOR(*neis)[j];
343
267k
                v2 = IGRAPH_OTHER(graph, e, v1);
344
267k
            } else {
345
0
                v2 = VECTOR(*neis)[j];
346
0
            }
347
267k
            k = VECTOR(*membership)[v2];
348
267k
            if (k < 0) {
349
0
                continue;    /* skip if it has no label yet */
350
0
            }
351
267k
            was_zero = (VECTOR(label_weights)[k] == 0);
352
267k
            VECTOR(label_weights)[k] += (weights ? VECTOR(*weights)[e] : 1);
353
354
267k
            if (was_zero && VECTOR(label_weights)[k] >= 0) {
355
                /* counter just became non-negative */
356
152k
                IGRAPH_CHECK(igraph_vector_int_push_back(&nonzero_labels, k));
357
152k
            }
358
359
267k
            if (max_count < VECTOR(label_weights)[k]) {
360
182k
                max_count = VECTOR(label_weights)[k];
361
182k
                IGRAPH_CHECK(igraph_vector_int_resize(&dominant_labels, 1));
362
182k
                VECTOR(dominant_labels)[0] = k;
363
182k
            } else if (max_count == VECTOR(label_weights)[k]) {
364
3.57k
                IGRAPH_CHECK(igraph_vector_int_push_back(&dominant_labels, k));
365
3.57k
            }
366
267k
        }
367
368
164k
        if (igraph_vector_int_size(&dominant_labels) > 0) {
369
65.5k
            igraph_integer_t current_label = VECTOR(*membership)[v1];
370
371
            /* Select randomly from the dominant labels */
372
65.5k
            k = RNG_INTEGER(0, igraph_vector_int_size(&dominant_labels) - 1);
373
65.5k
            igraph_integer_t new_label = VECTOR(dominant_labels)[k]; /* a dominant label */
374
375
            /* Check if the _current_ label of the node is not the same */
376
65.5k
            if (new_label != current_label) {
377
                /* We still need to consider its neighbors not in the new community */
378
181k
                for (j = 0; j < num_neis; j++) {
379
135k
                    if (weights) {
380
135k
                        e = VECTOR(*neis)[j];
381
135k
                        v2 = IGRAPH_OTHER(graph, e, v1);
382
135k
                    } else {
383
0
                        v2 = VECTOR(*neis)[j];
384
0
                    }
385
135k
                    if (!VECTOR(in_queue)[v2]) {
386
42.3k
                        igraph_integer_t neigh_label = VECTOR(*membership)[v2]; /* neighbor community */
387
42.3k
                        if (neigh_label != new_label && /* not in new community */
388
42.3k
                            (fixed == NULL || !VECTOR(*fixed)[v2]) ) { /* not fixed */
389
14.8k
                            IGRAPH_CHECK(igraph_dqueue_int_push(&queue, v2));
390
14.8k
                            VECTOR(in_queue)[v2] = true;
391
14.8k
                        }
392
42.3k
                    }
393
135k
                }
394
46.1k
            }
395
65.5k
            VECTOR(*membership)[v1] = new_label;
396
65.5k
        }
397
398
        /* Clear the nonzero elements in label_weights */
399
164k
        num_neis = igraph_vector_int_size(&nonzero_labels);
400
316k
        for (j = 0; j < num_neis; j++) {
401
152k
            VECTOR(label_weights)[VECTOR(nonzero_labels)[j]] = 0;
402
152k
        }
403
164k
    }
404
405
3.31k
    if (weights) {
406
3.31k
        igraph_inclist_destroy(&il);
407
3.31k
    } else {
408
0
        igraph_adjlist_destroy(&al);
409
0
    }
410
3.31k
    IGRAPH_FINALLY_CLEAN(1);
411
412
3.31k
    igraph_vector_bool_destroy(&in_queue);
413
3.31k
    igraph_dqueue_int_destroy(&queue);
414
3.31k
    igraph_vector_destroy(&label_weights);
415
3.31k
    igraph_vector_int_destroy(&dominant_labels);
416
3.31k
    igraph_vector_int_destroy(&nonzero_labels);
417
3.31k
    IGRAPH_FINALLY_CLEAN(5);
418
419
3.31k
    return IGRAPH_SUCCESS;
420
3.31k
}
421
422
/**
423
 * \ingroup communities
424
 * \function igraph_community_label_propagation
425
 * \brief Community detection based on label propagation.
426
 *
427
 * This function implements the label propagation-based community detection
428
 * algorithm described by Raghavan, Albert and Kumara (2007). This version extends
429
 * the original method by the ability to take edge weights into consideration
430
 * and also by allowing some labels to be fixed. In addition, it implements
431
 * the fast label propagation alternative introduced by Traag and Šubelj (2023).
432
 *
433
 * </para><para>
434
 * The algorithm works by iterating over nodes and updating the label of a node
435
 * based on the labels of its neighbors. The labels that are most frequent among
436
 * the neighbors are said to be dominant labels. The label of a node is always
437
 * updated to a dominant label. The algorithm guarantees that the label for each
438
 * is dominant when it terminates.
439
 *
440
 * </para><para>
441
 * There are several variants implemented, which work slightly differently with
442
 * the dominance of labels. Nodes with a dominant label might no longer have a
443
 * dominant label if one of their neighbors change label. In \c
444
 * IGRAPH_LPA_DOMINANCE an additional iteration over all nodes is made after
445
 * updating all labels to double check whether all nodes indeed have a dominant
446
 * label. When updating the label of a node, labels are always sampled from
447
 * among all dominant labels. The algorithm stops when all nodes have dominant
448
 * labels. In \c IGRAPH_LPA_RETENTION instead labels are only updated when they
449
 * are not dominant. That is, they retain their current label whenever the
450
 * current label is already dominant. The algorithm does not make an additional
451
 * iteration to check for dominance. Instead, it simply keeps track whether a
452
 * label has been updated, and terminates if no updates have been made. In \c
453
 * IGRAPH_LPA_FAST labels are sampled from among all dominant labels, similar to
454
 * \c IGRAPH_LPA_DOMINANCE. Instead of iterating over all nodes, it keeps track
455
 * of a queue of nodes that should be considered. Nodes are popped from the
456
 * queue when they are considered for update. When the label of a node is
457
 * updated, the node's neighbors are added to the queue again (if they weren't
458
 * already in the queue). The algorithm terminates when the queue is empty. All
459
 * variants guarantee that the labels for all nodes are dominant.
460
 *
461
 * </para><para>
462
 * Weights are taken into account as follows: when the new label of node
463
 * \c i is determined, the algorithm iterates over all edges incident on
464
 * node \c i and calculate the total weight of edges leading to other
465
 * nodes with label 0, 1, 2, ..., \c k - 1 (where \c k is the number of possible
466
 * labels). The new label of node \c i will then be the label whose edges
467
 * (among the ones incident on node \c i) have the highest total weight.
468
 *
469
 * </para><para>
470
 * For directed graphs, it is important to know that labels can circulate
471
 * freely only within the strongly connected components of the graph and
472
 * may propagate in only one direction (or not at all) \em between strongly
473
 * connected components. You should treat directed edges as directed only
474
 * if you are aware of the consequences.
475
 *
476
 * </para><para>
477
 * References:
478
 *
479
 * </para><para>
480
 * Raghavan, U.N. and Albert, R. and Kumara, S.: Near linear time algorithm to
481
 * detect community structures in large-scale networks. Phys Rev E 76, 036106
482
 * (2007). https://doi.org/10.1103/PhysRevE.76.036106
483
 *
484
 * </para><para>
485
 * Šubelj, L.: Label propagation for clustering. Chapter in "Advances in
486
 * Network Clustering and Blockmodeling" edited by P. Doreian, V. Batagelj
487
 * &amp; A. Ferligoj (Wiley, New York, 2018).
488
 * https://doi.org/10.1002/9781119483298.ch5
489
 * https://arxiv.org/abs/1709.05634
490
 *
491
 * </para><para>
492
 * Traag, V. A., and Šubelj, L.: Large network community detection by fast
493
 * label propagation. Scientific Reports, 13:1, (2023).
494
 * https://doi.org/10.1038/s41598-023-29610-z
495
 * https://arxiv.org/abs/2209.13338
496
 *
497
 * \param graph The input graph. Note that the algorithm was originally
498
 *    defined for undirected graphs. You are advised to set \p mode to
499
 *    \c IGRAPH_ALL if you pass a directed graph here to treat it as
500
 *    undirected.
501
 * \param membership The membership vector, the result is returned here.
502
 *    For each vertex it gives the ID of its community (label).
503
 * \param mode Whether to consider edge directions for the label propagation,
504
 *    and if so, which direction the labels should propagate. Ignored for
505
 *    undirected graphs. \c IGRAPH_ALL means to ignore edge directions (even
506
 *    in directed graphs). \c IGRAPH_OUT means to propagate labels along the
507
 *    natural direction of the edges. \c IGRAPH_IN means to propagate labels
508
 *    \em backwards (i.e. from head to tail). It is advised to set this to
509
 *    \c IGRAPH_ALL unless you are specifically interested in the effect of
510
 *    edge directions.
511
 * \param weights The weight vector, it should contain a positive
512
 *    weight for all the edges.
513
 * \param initial The initial state. If \c NULL, every vertex will have
514
 *   a different label at the beginning. Otherwise it must be a vector
515
 *   with an entry for each vertex. Non-negative values denote different
516
 *   labels, negative entries denote vertices without labels. Unlabeled
517
 *   vertices which are not reachable from any labeled ones will remain
518
 *   unlabeled at the end of the label propagation process, and will be
519
 *   labeled in an additional step to avoid returning negative values in
520
 *   \p membership. In undirected graphs, this happens when entire connected
521
 *   components are unlabeled. Then, each unlabeled component will receive
522
 *   its own separate label. In directed graphs, the outcome of the
523
 *   additional labeling should be considered undefined and may change
524
 *   in the future; please do not rely on it.
525
 * \param fixed Boolean vector denoting which labels are fixed. Of course
526
 *   this makes sense only if you provided an initial state, otherwise
527
 *   this element will be ignored. Note that vertices without labels
528
 *   cannot be fixed. The fixed status will be ignored for these with a
529
 *   warning. Also note that label numbers by themselves have no meaning,
530
 *   and igraph may renumber labels. However, co-membership constraints
531
 *   will be respected: two vertices can be fixed to be in the same or in
532
 *   different communities.
533
 * \param lpa_variant Which variant of label propagation algorithm to run.
534
 *   One of
535
 *   \c IGRAPH_LPA_DOMINANCE Check for dominance of all nodes after each
536
 *                           iteration.
537
 *   \c IGRAPH_LPA_RETENTION Keep current label if among dominant labels,
538
 *                           only check if labels changed.
539
 *   \c IGRAPH_LPA_FAST      Sample from dominant labels, only check
540
 *                           neighbors.
541
 * \return Error code.
542
 *
543
 * Time complexity: O(m+n)
544
 *
545
 * \example examples/simple/igraph_community_label_propagation.c
546
 */
547
igraph_error_t igraph_community_label_propagation(const igraph_t *graph,
548
        igraph_vector_int_t *membership,
549
        igraph_neimode_t mode,
550
        const igraph_vector_t *weights,
551
        const igraph_vector_int_t *initial,
552
        const igraph_vector_bool_t *fixed,
553
9.95k
        igraph_lpa_variant_t lpa_variant) {
554
555
9.95k
    const igraph_integer_t no_of_nodes = igraph_vcount(graph);
556
9.95k
    const igraph_integer_t no_of_edges = igraph_ecount(graph);
557
9.95k
    igraph_integer_t no_of_not_fixed_nodes = no_of_nodes;
558
9.95k
    igraph_integer_t i, j, k;
559
9.95k
    igraph_bool_t unlabelled_left;
560
561
    /* We make a copy of 'fixed' as a pointer into 'fixed_copy' after casting
562
     * away the constness, and promise ourselves that we will make a proper
563
     * copy of 'fixed' into 'fixed_copy' as soon as we start mutating it */
564
9.95k
    igraph_vector_bool_t *fixed_copy = (igraph_vector_bool_t *) fixed;
565
566
    /* Unlabelled nodes are represented with -1. */
567
9.95k
#define IS_UNLABELLED(x) (VECTOR(*membership)[x] < 0)
568
569
    /* Do some initial checks */
570
9.95k
    if (fixed && igraph_vector_bool_size(fixed) != no_of_nodes) {
571
0
        IGRAPH_ERROR("Fixed labeling vector length must agree with number of nodes.", IGRAPH_EINVAL);
572
0
    }
573
9.95k
    if (weights) {
574
9.95k
        if (igraph_vector_size(weights) != no_of_edges) {
575
0
            IGRAPH_ERROR("Length of weight vector must agree with number of edges.", IGRAPH_EINVAL);
576
0
        }
577
9.95k
        if (no_of_edges > 0) {
578
9.84k
            igraph_real_t minweight = igraph_vector_min(weights);
579
9.84k
            if (minweight < 0) {
580
0
                IGRAPH_ERROR("Weights must not be negative.", IGRAPH_EINVAL);
581
0
            }
582
9.84k
            if (isnan(minweight)) {
583
0
                IGRAPH_ERROR("Weights must not be NaN.", IGRAPH_EINVAL);
584
0
            }
585
9.84k
        }
586
9.95k
    }
587
9.95k
    if (fixed && !initial) {
588
0
        IGRAPH_WARNING("Ignoring fixed vertices as no initial labeling given.");
589
0
    }
590
591
9.95k
    IGRAPH_CHECK(igraph_vector_int_resize(membership, no_of_nodes));
592
593
9.95k
    if (initial) {
594
0
        if (igraph_vector_int_size(initial) != no_of_nodes) {
595
0
            IGRAPH_ERROR("Initial labeling vector length must agree with number of nodes.", IGRAPH_EINVAL);
596
0
        }
597
        /* Check if the labels used are valid, initialize membership vector */
598
0
        for (i = 0; i < no_of_nodes; i++) {
599
0
            if (VECTOR(*initial)[i] < 0) {
600
0
                VECTOR(*membership)[i] = -1;
601
0
            } else {
602
0
                VECTOR(*membership)[i] = VECTOR(*initial)[i];
603
0
            }
604
0
        }
605
0
        if (fixed) {
606
0
            for (i = 0; i < no_of_nodes; i++) {
607
0
                if (VECTOR(*fixed)[i]) {
608
0
                    if (IS_UNLABELLED(i)) {
609
0
                        IGRAPH_WARNING("Fixed nodes cannot be unlabeled, ignoring them.");
610
611
                        /* We cannot modify 'fixed' because it is const, so we make a copy and
612
                         * modify 'fixed_copy' instead */
613
0
                        if (fixed_copy == fixed) {
614
0
                            fixed_copy = IGRAPH_CALLOC(1, igraph_vector_bool_t);
615
0
                            IGRAPH_CHECK_OOM(fixed_copy, "Insufficient memory for label propagation.");
616
0
                            IGRAPH_FINALLY(igraph_free, fixed_copy);
617
0
                            IGRAPH_CHECK(igraph_vector_bool_init_copy(fixed_copy, fixed));
618
0
                            IGRAPH_FINALLY(igraph_vector_bool_destroy, fixed_copy);
619
0
                        }
620
621
0
                        VECTOR(*fixed_copy)[i] = false;
622
0
                    } else {
623
0
                        no_of_not_fixed_nodes--;
624
0
                    }
625
0
                }
626
0
            }
627
0
        }
628
629
0
        i = igraph_vector_int_max(membership);
630
0
        if (i > no_of_nodes) {
631
0
            IGRAPH_ERROR("Elements of the initial labeling vector must be between 0 and |V|-1.", IGRAPH_EINVAL);
632
0
        }
633
9.95k
    } else {
634
457k
        for (i = 0; i < no_of_nodes; i++) {
635
447k
            VECTOR(*membership)[i] = i;
636
447k
        }
637
9.95k
    }
638
639
    /* From this point onwards we use 'fixed_copy' instead of 'fixed' */
640
9.95k
    switch (lpa_variant) {
641
3.31k
    case IGRAPH_LPA_FAST:
642
3.31k
        IGRAPH_CHECK(community_fast_label_propagation(graph, membership, mode, weights, fixed_copy));
643
3.31k
        break;
644
645
3.31k
    case IGRAPH_LPA_RETENTION:
646
3.31k
        IGRAPH_CHECK(community_label_propagation(graph, membership, mode, weights, fixed_copy, /* retention */ true ));
647
3.31k
        break;
648
649
3.31k
    case IGRAPH_LPA_DOMINANCE:
650
3.31k
        IGRAPH_CHECK(community_label_propagation(graph, membership, mode, weights, fixed_copy, /* retention */ false));
651
3.31k
        break;
652
653
3.31k
    default:
654
0
        IGRAPH_ERROR("Invalid igraph_lpa_variant_t.", IGRAPH_EINVAL);
655
9.95k
    }
656
657
    /* Permute labels in increasing order */
658
9.95k
    igraph_vector_int_t relabel_label;
659
9.95k
    IGRAPH_CHECK(igraph_vector_int_init(&relabel_label, no_of_nodes));
660
9.95k
    igraph_vector_int_fill(&relabel_label, -1);
661
9.95k
    IGRAPH_FINALLY(igraph_vector_int_destroy, &relabel_label);
662
663
9.95k
    j = 0;
664
9.95k
    unlabelled_left = false;
665
457k
    for (i = 0; i < no_of_nodes; i++) {
666
447k
        k = VECTOR(*membership)[i];
667
447k
        if (k >= 0) {
668
447k
            if (VECTOR(relabel_label)[k] == -1) {
669
                /* We have seen this label for the first time */
670
328k
                VECTOR(relabel_label)[k] = j;
671
328k
                k = j;
672
328k
                j++;
673
328k
            } else {
674
119k
                k = VECTOR(relabel_label)[k];
675
119k
            }
676
447k
        } else {
677
            /* This is an unlabeled vertex */
678
0
            unlabelled_left = true;
679
0
        }
680
447k
        VECTOR(*membership)[i] = k;
681
447k
    }
682
683
    /* If any nodes are left unlabelled, we assign the remaining labels to them,
684
     * as well as to all unlabelled nodes reachable from them.
685
     *
686
     * Note that only those nodes could remain unlabelled which were unreachable
687
     * from any labelled ones. Thus, in the undirected case, fully unlabelled
688
     * connected components remain unlabelled. Here we label each such component
689
     * with the same label.
690
     */
691
9.95k
    if (unlabelled_left) {
692
0
        igraph_dqueue_int_t q;
693
0
        igraph_vector_int_t neis;
694
695
0
        igraph_vector_int_t node_order;
696
        /* Initialize node ordering vector with only the not fixed nodes */
697
0
        if (fixed) {
698
0
            no_of_not_fixed_nodes = 0;
699
0
            IGRAPH_VECTOR_INT_INIT_FINALLY(&node_order, no_of_nodes);
700
0
            for (i = 0; i < no_of_nodes; i++) {
701
0
                if (!VECTOR(*fixed)[i]) {
702
0
                    VECTOR(node_order)[no_of_not_fixed_nodes] = i;
703
0
                    no_of_not_fixed_nodes++;
704
0
                }
705
0
            }
706
0
            IGRAPH_CHECK(igraph_vector_int_resize(&node_order, no_of_not_fixed_nodes));
707
0
        } else {
708
0
            IGRAPH_CHECK(igraph_vector_int_init_range(&node_order, 0, no_of_nodes));
709
0
            IGRAPH_FINALLY(igraph_vector_int_destroy, &node_order);
710
0
            no_of_not_fixed_nodes = no_of_nodes;
711
0
        }
712
        /* Shuffle the node ordering vector */
713
0
        igraph_vector_int_shuffle(&node_order);
714
715
0
        IGRAPH_VECTOR_INT_INIT_FINALLY(&neis, 0);
716
717
0
        IGRAPH_CHECK(igraph_dqueue_int_init(&q, 0));
718
0
        IGRAPH_FINALLY(igraph_dqueue_int_destroy, &q);
719
720
0
        for (i=0; i < no_of_not_fixed_nodes; ++i) {
721
0
            igraph_integer_t v = VECTOR(node_order)[i];
722
723
            /* Is this node unlabelled? */
724
0
            if (IS_UNLABELLED(v)) {
725
                /* If yes, we label it, and do a BFS to apply the same label
726
                 * to all other unlabelled nodes reachable from it */
727
0
                IGRAPH_CHECK(igraph_dqueue_int_push(&q, v));
728
0
                VECTOR(*membership)[v] = j;
729
0
                while (!igraph_dqueue_int_empty(&q)) {
730
0
                    igraph_integer_t ni, num_neis;
731
0
                    igraph_integer_t actnode = igraph_dqueue_int_pop(&q);
732
733
0
                    IGRAPH_CHECK(igraph_neighbors(graph, &neis, actnode, mode, IGRAPH_LOOPS, IGRAPH_MULTIPLE));
734
0
                    num_neis = igraph_vector_int_size(&neis);
735
736
0
                    for (ni = 0; ni < num_neis; ++ni) {
737
0
                        igraph_integer_t neighbor = VECTOR(neis)[ni];
738
0
                        if (IS_UNLABELLED(neighbor)) {
739
0
                            VECTOR(*membership)[neighbor] = j;
740
0
                            IGRAPH_CHECK(igraph_dqueue_int_push(&q, neighbor));
741
0
                        }
742
0
                    }
743
0
                }
744
0
                j++;
745
0
            }
746
0
        }
747
748
0
        igraph_vector_int_destroy(&neis);
749
0
        igraph_dqueue_int_destroy(&q);
750
0
        igraph_vector_int_destroy(&node_order);
751
0
        IGRAPH_FINALLY_CLEAN(3);
752
0
    }
753
754
9.95k
    igraph_vector_int_destroy(&relabel_label);
755
9.95k
    IGRAPH_FINALLY_CLEAN(1);
756
757
9.95k
    if (fixed != fixed_copy) {
758
0
        igraph_vector_bool_destroy(fixed_copy);
759
0
        IGRAPH_FREE(fixed_copy);
760
0
        IGRAPH_FINALLY_CLEAN(2);
761
0
    }
762
763
9.95k
    return IGRAPH_SUCCESS;
764
9.95k
}