Coverage Report

Created: 2025-12-14 07:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/igraph/src/paths/sparsifier.c
Line
Count
Source
1
/*
2
   igraph library.
3
   Copyright (C) 2021  The igraph development team <igraph@igraph.org>
4
5
   This program is free software; you can redistribute it and/or modify
6
   it under the terms of the GNU General Public License as published by
7
   the Free Software Foundation; either version 2 of the License, or
8
   (at your option) any later version.
9
10
   This program is distributed in the hope that it will be useful,
11
   but WITHOUT ANY WARRANTY; without even the implied warranty of
12
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
   GNU General Public License for more details.
14
15
   You should have received a copy of the GNU General Public License
16
   along with this program.  If not, see <https://www.gnu.org/licenses/>.
17
*/
18
19
20
#include "igraph_paths.h"
21
22
#include "igraph_adjlist.h"
23
#include "igraph_bitset.h"
24
#include "igraph_error.h"
25
#include "igraph_interface.h"
26
#include "igraph_random.h"
27
28
#include "core/interruption.h"
29
30
/*
31
 * This internal function gets the adjacency and incidence list representation
32
 * of the current residual graph, the weight vector, the current assignment of
33
 * the vertices to clusters, whether the i-th cluster is sampled, and the
34
 * index of a single node v. The function updates the given lightest_eid vector
35
 * such that the i-th element contains the ID of the lightest edge that leads
36
 * from node v to cluster i. Similarly, the lightest_weight vector is updated
37
 * to contain the weights of these edges.
38
 *
39
 * When the is_cluster_sampled vector is provided, the
40
 * nearest_neighboring_sampled_cluster pointer is also updated to the index of
41
 * the cluster that has the smallest weight among the _sampled_ ones.
42
 *
43
 * As a pre-condition, this function requires the lightest_eid vector to be
44
 * filled with -1 and the lightest_weight vector to be filled with infinity.
45
 * This is _not_ checked within the function.
46
 *
47
 * Use the igraph_i_clean_lightest_edges_to_clusters() function to clear these vectors
48
 * after you are done with them. Avoid using igraph_vector_fill() because that
49
 * one is O(|V|), while igraph_i_clean_lightest_edge_vector() is O(d) where d
50
 * is the degree of the vertex.
51
 */
52
static igraph_error_t collect_lightest_edges_to_clusters(
53
    const igraph_adjlist_t *adjlist,
54
    const igraph_inclist_t *inclist,
55
    const igraph_vector_t *weights,
56
    const igraph_vector_int_t *clustering,
57
    const igraph_bitset_t *is_cluster_sampled,
58
    igraph_int_t v,
59
    igraph_vector_int_t *lightest_eid,
60
    igraph_vector_t *lightest_weight,
61
    igraph_vector_int_t *dirty_vids,
62
    igraph_int_t *nearest_neighboring_sampled_cluster
63
56.1k
) {
64
    // This internal function gets the residual graph, the clustering, the sampled clustering and
65
    // the vector and returns the lightest edge to each neighboring cluster and the index of the lightest
66
    // sampled cluster (if any)
67
68
56.1k
    igraph_real_t lightest_weight_to_sampled = IGRAPH_INFINITY;
69
56.1k
    const igraph_vector_int_t *adjacent_nodes = igraph_adjlist_get(adjlist, v);
70
56.1k
    const igraph_vector_int_t *incident_edges = igraph_inclist_get(inclist, v);
71
56.1k
    const igraph_int_t nlen = igraph_vector_int_size(incident_edges);
72
73
109k
    for (igraph_int_t i = 0; i < nlen; i++) {
74
53.4k
        igraph_int_t neighbor_node = VECTOR(*adjacent_nodes)[i];
75
53.4k
        igraph_int_t edge = VECTOR(*incident_edges)[i];
76
53.4k
        igraph_int_t neighbor_cluster = VECTOR(*clustering)[neighbor_node];
77
53.4k
        igraph_real_t weight = weights ? VECTOR(*weights)[edge] : 1;
78
79
        // If the weight of the edge being considered is smaller than the weight
80
        // of the lightest edge found so far that connects v to the same
81
        // cluster, remember the new minimum.
82
53.4k
        if (VECTOR(*lightest_weight)[neighbor_cluster] > weight) {
83
41.8k
            VECTOR(*lightest_weight)[neighbor_cluster] = weight;
84
41.8k
            VECTOR(*lightest_eid)[neighbor_cluster] = edge;
85
86
41.8k
            IGRAPH_CHECK(igraph_vector_int_push_back(dirty_vids, neighbor_cluster));
87
88
            // Also, if this cluster happens to be a sampled cluster, also update
89
            // the variables that store which is the lightest edge that connects
90
            // v to any of the sampled clusters.
91
41.8k
            if (is_cluster_sampled) {
92
38.9k
                if ((IGRAPH_BIT_TEST(*is_cluster_sampled, neighbor_cluster)) && (lightest_weight_to_sampled > weight)) {
93
2.74k
                    lightest_weight_to_sampled = weight;
94
2.74k
                    *nearest_neighboring_sampled_cluster = neighbor_cluster;
95
2.74k
                }
96
38.9k
            }
97
41.8k
        }
98
53.4k
    }
99
100
56.1k
    return IGRAPH_SUCCESS;
101
56.1k
}
102
103
static void clear_lightest_edges_to_clusters(
104
    igraph_vector_int_t *dirty_vids,
105
    igraph_vector_int_t *lightest_eid,
106
    igraph_vector_t *lightest_weight
107
56.1k
) {
108
56.1k
    const igraph_int_t n = igraph_vector_int_size(dirty_vids);
109
97.9k
    for (igraph_int_t i = 0; i < n; i++) {
110
41.8k
        igraph_int_t vid = VECTOR(*dirty_vids)[i];
111
41.8k
        VECTOR(*lightest_weight)[vid] = IGRAPH_INFINITY;
112
41.8k
        VECTOR(*lightest_eid)[vid] = -1;
113
41.8k
    }
114
115
56.1k
    igraph_vector_int_clear(dirty_vids);
116
56.1k
}
117
118
/**
119
 * \ingroup structural
120
 * \function igraph_spanner
121
 * \brief Calculates a spanner of a graph with a given stretch factor.
122
 *
123
 * A spanner of a graph <code>G = (V,E)</code> with a stretch \c t is a
124
 * subgraph <code>H = (V,Es)</code> such that \c Es is a subset of \c E
125
 * and the distance between any pair of nodes in \c H is at most \c t
126
 * times the distance in \c G. The returned graph is always a spanner of
127
 * the given graph with the specified stretch. For weighted graphs the
128
 * number of edges in the spanner is <code>O(k n^(1 + 1 / k))</code>, where
129
 * \c k is <code>k = (t + 1) / 2</code>,  \c m is the number of edges
130
 * and \c n is the number of nodes in \c G. For unweighted graphs the number
131
 * of edges is <code>O(n^(1 + 1 / k) + kn)</code>.
132
 *
133
 * </para><para>
134
 * This function is based on the algorithm of Baswana and Sen: "A Simple and
135
 * Linear Time Randomized Algorithm for Computing Sparse Spanners in
136
 * Weighted Graphs". https://doi.org/10.1002/rsa.20130
137
 *
138
 * \param graph An undirected connected graph object. If the graph
139
 *        is directed, the directions of the edges will be ignored.
140
 * \param spanner An initialized vector, the IDs of the edges that constitute
141
 *        the calculated spanner will be returned here. Use
142
 *        \ref igraph_subgraph_from_edges() to extract the spanner as a separate
143
 *        graph object.
144
 * \param stretch The stretch factor \c t of the spanner.
145
 * \param weights The edge weights or \c NULL.
146
 * \return Error code.
147
 *
148
 * Time complexity: The algorithm is a randomized Las Vegas algorithm. The expected
149
 * running time is O(km) where k is the value mentioned above and m is the number
150
 * of edges.
151
 */
152
igraph_error_t igraph_spanner(const igraph_t *graph, igraph_vector_int_t *spanner,
153
1.22k
        igraph_real_t stretch, const igraph_vector_t *weights) {
154
155
1.22k
    const igraph_int_t no_of_nodes = igraph_vcount(graph);
156
1.22k
    const igraph_int_t no_of_edges = igraph_ecount(graph);
157
1.22k
    igraph_int_t nlen, neighbor, cluster;
158
1.22k
    igraph_real_t sample_prob, k = (stretch + 1) / 2, weight, lightest_sampled_weight;
159
1.22k
    igraph_vector_int_t clustering, lightest_eid;
160
1.22k
    igraph_vector_t lightest_weight;
161
1.22k
    igraph_bitset_t is_cluster_sampled;
162
1.22k
    igraph_bitset_t is_edge_in_spanner;
163
1.22k
    igraph_vector_int_t new_clustering;
164
1.22k
    igraph_vector_int_t dirty_vids;
165
1.22k
    igraph_vector_int_t *adjacent_vertices;
166
1.22k
    igraph_vector_int_t *incident_edges;
167
1.22k
    igraph_adjlist_t adjlist;
168
1.22k
    igraph_inclist_t inclist;
169
1.22k
    igraph_int_t edge;
170
1.22k
    igraph_int_t index;
171
172
1.22k
    if (spanner == NULL) {
173
0
        return IGRAPH_SUCCESS;
174
0
    }
175
176
    /* Test validity of stretch factor */
177
1.22k
    if (stretch < 1) {
178
0
        IGRAPH_ERROR("Stretch factor must be at least 1.", IGRAPH_EINVAL);
179
0
    }
180
181
    /* Test validity of weights vector */
182
1.22k
    if (weights) {
183
0
        if (igraph_vector_size(weights) != no_of_edges) {
184
0
            IGRAPH_ERROR("Weight vector length does not match.", IGRAPH_EINVAL);
185
0
        }
186
0
        if (no_of_edges > 0) {
187
0
            igraph_real_t min = igraph_vector_min(weights);
188
0
            if (min < 0) {
189
0
                IGRAPH_ERROR("Weight vector must be non-negative.", IGRAPH_EINVAL);
190
0
            }
191
0
            else if (isnan(min)) {
192
0
                IGRAPH_ERROR("Weight vector must not contain NaN values.", IGRAPH_EINVAL);
193
0
            }
194
0
        }
195
0
    }
196
197
    // Clear the vector that will contain the IDs of the edges in the spanner
198
1.22k
    igraph_vector_int_clear(spanner);
199
200
    // Create an incidence list representation of the graph and also create the
201
    // corresponding adjacency list. The residual graph will not be constructed
202
    // explicitly; it will only exist in terms of the incidence and the adjacency
203
    // lists, maintained in parallel as the edges are removed from the residual
204
    // graph.
205
1.22k
    IGRAPH_CHECK(igraph_inclist_init(graph, &inclist, IGRAPH_ALL, IGRAPH_NO_LOOPS));
206
1.22k
    IGRAPH_FINALLY(igraph_inclist_destroy, &inclist);
207
1.22k
    IGRAPH_CHECK(igraph_adjlist_init_from_inclist(graph, &adjlist, &inclist));
208
1.22k
    IGRAPH_FINALLY(igraph_adjlist_destroy, &adjlist);
209
210
    // Phase 1: forming the clusters
211
    // Create a vector which maps the nodes to the centers of the corresponding
212
    // clusters. At the beginning each node is its own cluster center.
213
1.22k
    IGRAPH_CHECK(igraph_vector_int_init_range(&clustering, 0, no_of_nodes));
214
1.22k
    IGRAPH_FINALLY(igraph_vector_int_destroy, &clustering);
215
216
    // A mapping vector which indicates the neighboring edge with the smallest
217
    // weight for each cluster central, for a single vertex of interest.
218
    // Preconditions needed by collect_lightest_edges_to_clusters()
219
    // are enforced here.
220
1.22k
    IGRAPH_VECTOR_INT_INIT_FINALLY(&lightest_eid, no_of_nodes);
221
1.22k
    igraph_vector_int_fill(&lightest_eid, -1);
222
223
    // A mapping vector which indicated the minimum weight to each neighboring
224
    // cluster, for a single vertex of interest.
225
    // Preconditions needed by collect_lightest_edges_to_clusters()
226
    // are enforced here.
227
1.22k
    IGRAPH_VECTOR_INIT_FINALLY(&lightest_weight, no_of_nodes);
228
1.22k
    igraph_vector_fill(&lightest_weight, IGRAPH_INFINITY);
229
230
1.22k
    IGRAPH_VECTOR_INT_INIT_FINALLY(&new_clustering, no_of_nodes);
231
232
    // A boolean vector whose i-th element is 1 if the i-th vertex is a cluster
233
    // center that is sampled in the current iteration, 0 otherwise
234
1.22k
    IGRAPH_BITSET_INIT_FINALLY(&is_cluster_sampled, no_of_nodes);
235
1.22k
    IGRAPH_BITSET_INIT_FINALLY(&is_edge_in_spanner, no_of_edges);
236
237
    // Temporary vector used by collect_lightest_edges_to_clusters()
238
    // to keep track of the nodes that it has written to
239
1.22k
    IGRAPH_VECTOR_INT_INIT_FINALLY(&dirty_vids, 0);
240
241
1.22k
    sample_prob = pow((igraph_real_t) no_of_nodes, -1 / k);
242
243
1.22k
#define ADD_EDGE_TO_SPANNER \
244
27.2k
    do { if (!IGRAPH_BIT_TEST(is_edge_in_spanner, edge)) { \
245
26.2k
        IGRAPH_BIT_SET(is_edge_in_spanner, edge); \
246
26.2k
        IGRAPH_CHECK(igraph_vector_int_push_back(spanner, edge)); \
247
27.2k
    } } while (0)
248
249
1.22k
    igraph_vector_fill(&lightest_weight, IGRAPH_INFINITY);
250
251
2.44k
    for (igraph_int_t i = 0; i < k - 1; i++) {
252
1.22k
        IGRAPH_ALLOW_INTERRUPTION();
253
254
1.22k
        igraph_vector_int_fill(&new_clustering, -1);
255
1.22k
        igraph_bitset_null(&is_cluster_sampled);
256
257
        // Step 1: sample cluster centers
258
54.5k
        for (igraph_int_t j = 0; j < no_of_nodes; j++) {
259
53.3k
            if (VECTOR(clustering)[j] == j && RNG_UNIF01() < sample_prob) {
260
4.24k
                IGRAPH_BIT_SET(is_cluster_sampled, j);
261
4.24k
            }
262
53.3k
        }
263
264
        // Step 2 and 3
265
54.5k
        for (igraph_int_t v = 0; v < no_of_nodes; v++) {
266
            // If v is inside a cluster and the cluster of v is sampled, then continue
267
53.3k
            cluster = VECTOR(clustering)[v];
268
53.3k
            if (cluster != -1 && IGRAPH_BIT_TEST(is_cluster_sampled, cluster)) {
269
4.24k
                VECTOR(new_clustering)[v] = cluster;
270
4.24k
                continue;
271
4.24k
            }
272
273
            // Step 2: find the lightest edge that connects vertex v to its
274
            // neighboring sampled clusters
275
49.1k
            igraph_int_t nearest_neighboring_sampled_cluster = -1;
276
49.1k
            IGRAPH_CHECK(collect_lightest_edges_to_clusters(
277
49.1k
                    &adjlist,
278
49.1k
                    &inclist,
279
49.1k
                    weights,
280
49.1k
                    &clustering,
281
49.1k
                    &is_cluster_sampled,
282
49.1k
                    v,
283
49.1k
                    &lightest_eid,
284
49.1k
                    &lightest_weight,
285
49.1k
                    &dirty_vids,
286
49.1k
                    &nearest_neighboring_sampled_cluster
287
49.1k
            ));
288
289
            // Step 3: add edges to spanner
290
49.1k
            if (nearest_neighboring_sampled_cluster == -1) {
291
                // Case 3(a) from the paper: v is not adjacent to any of the
292
                // sampled clusters.
293
294
                // Add lightest edge which connects vertex v to each neighboring
295
                // cluster (none of which are sampled)
296
2.59M
                for (igraph_int_t j = 0; j < no_of_nodes; j++) {
297
2.54M
                    edge = VECTOR(lightest_eid)[j];
298
2.54M
                    if (edge != -1) {
299
21.6k
                        ADD_EDGE_TO_SPANNER;
300
21.6k
                    }
301
2.54M
                }
302
303
                // Remove all edges incident on v from the graph. Note that each
304
                // edge being removed occurs twice in the adjacency / incidence
305
                // lists
306
46.3k
                adjacent_vertices = igraph_adjlist_get(&adjlist, v);
307
46.3k
                incident_edges = igraph_inclist_get(&inclist, v);
308
46.3k
                nlen = igraph_vector_int_size(incident_edges);
309
69.3k
                for (igraph_int_t j = 0; j < nlen; j++) {
310
22.9k
                    neighbor = VECTOR(*adjacent_vertices)[j];
311
22.9k
                    if (neighbor == v) {
312
                        /* should not happen as we did not ask for loop edges in
313
                         * the adjacency / incidence lists, but let's be defensive */
314
0
                        continue;
315
0
                    }
316
317
22.9k
                    if (igraph_vector_int_search(
318
22.9k
                        igraph_inclist_get(&inclist, neighbor),
319
22.9k
                        0,
320
22.9k
                        VECTOR(*incident_edges)[j],
321
22.9k
                        &index
322
22.9k
                    )) {
323
22.9k
                        igraph_vector_int_remove_fast(igraph_adjlist_get(&adjlist, neighbor), index);
324
22.9k
                        igraph_vector_int_remove_fast(igraph_inclist_get(&inclist, neighbor), index);
325
22.9k
                    }
326
22.9k
                }
327
46.3k
                igraph_vector_int_clear(adjacent_vertices);
328
46.3k
                igraph_vector_int_clear(incident_edges);
329
46.3k
            } else {
330
                // Case 3(b) from the paper: v is adjacent to at least one of
331
                // the sampled clusters
332
333
                // add the edge connecting to the lightest sampled cluster
334
2.74k
                edge = VECTOR(lightest_eid)[nearest_neighboring_sampled_cluster];
335
2.74k
                ADD_EDGE_TO_SPANNER;
336
337
                // 'lightest_sampled_weight' is the weight of the lightest edge connecting v to
338
                // one of the sampled clusters. This is where v will belong in
339
                // the new clustering.
340
2.74k
                lightest_sampled_weight = VECTOR(lightest_weight)[nearest_neighboring_sampled_cluster];
341
2.74k
                VECTOR(new_clustering)[v] = nearest_neighboring_sampled_cluster;
342
343
                // Add to the spanner light edges with weight less than 'lightest_sampled_weight'
344
140k
                for (igraph_int_t j = 0; j < no_of_nodes; j++) {
345
138k
                    if (VECTOR(lightest_weight)[j] < lightest_sampled_weight) {
346
0
                        edge = VECTOR(lightest_eid)[j];
347
0
                        ADD_EDGE_TO_SPANNER;
348
0
                    }
349
138k
                }
350
351
                // Remove edges to centers with edge weight less than 'lightest_sampled_weight'
352
2.74k
                adjacent_vertices = igraph_adjlist_get(&adjlist, v);
353
2.74k
                incident_edges = igraph_inclist_get(&inclist, v);
354
2.74k
                nlen = igraph_vector_int_size(incident_edges);
355
24.5k
                for (igraph_int_t j = 0; j < nlen; j++) {
356
21.8k
                    neighbor = VECTOR(*adjacent_vertices)[j];
357
21.8k
                    if (neighbor == v) {
358
                        /* should not happen as we did not ask for loop edges in
359
                         * the adjacency / incidence lists, but let's be defensive */
360
0
                        continue;
361
0
                    }
362
363
21.8k
                    cluster = VECTOR(clustering)[neighbor];
364
21.8k
                    weight = VECTOR(lightest_weight)[cluster];
365
21.8k
                    if ((cluster == nearest_neighboring_sampled_cluster) || (weight < lightest_sampled_weight)) {
366
6.33k
                        edge = VECTOR(*incident_edges)[j];
367
368
6.33k
                        if (igraph_vector_int_search(
369
6.33k
                            igraph_inclist_get(&inclist, neighbor), 0, edge, &index
370
6.33k
                        )) {
371
6.33k
                            igraph_vector_int_remove_fast(igraph_adjlist_get(&adjlist, neighbor), index);
372
6.33k
                            igraph_vector_int_remove_fast(igraph_inclist_get(&inclist, neighbor), index);
373
6.33k
                        }
374
375
6.33k
                        igraph_vector_int_remove_fast(adjacent_vertices, j);
376
6.33k
                        igraph_vector_int_remove_fast(incident_edges, j);
377
378
6.33k
                        j--;
379
6.33k
                        nlen--;
380
6.33k
                    }
381
21.8k
                }
382
2.74k
            }
383
384
            // We don't need lightest_eids and lightest_weights anymore so
385
            // clear them in O(d) time
386
49.1k
            clear_lightest_edges_to_clusters(
387
49.1k
                    &dirty_vids, &lightest_eid, &lightest_weight
388
49.1k
            );
389
49.1k
        }
390
391
        // Commit the new clustering
392
1.22k
        igraph_vector_int_update(&clustering, &new_clustering); /* reserved */
393
394
        // Remove intra-cluster edges
395
54.5k
        for (igraph_int_t v = 0; v < no_of_nodes; v++) {
396
53.3k
            adjacent_vertices = igraph_adjlist_get(&adjlist, v);
397
53.3k
            incident_edges = igraph_inclist_get(&inclist, v);
398
53.3k
            nlen = igraph_vector_int_size(incident_edges);
399
66.6k
            for (igraph_int_t j = 0; j < nlen; j++) {
400
13.3k
                neighbor = VECTOR(*adjacent_vertices)[j];
401
13.3k
                edge = VECTOR(*incident_edges)[j];
402
403
13.3k
                if (VECTOR(clustering)[neighbor] == VECTOR(clustering)[v]) {
404
                    /* We don't need to bother with removing the other copy
405
                     * of the edge from the incidence lists (and the corresponding
406
                     * vertices from the adjacency lists) because we will find
407
                     * them anyway as we are iterating over all nodes */
408
4.69k
                    igraph_vector_int_remove_fast(adjacent_vertices, j);
409
4.69k
                    igraph_vector_int_remove_fast(incident_edges, j);
410
4.69k
                    j--;
411
4.69k
                    nlen--;
412
4.69k
                }
413
13.3k
            }
414
53.3k
        }
415
1.22k
    }
416
417
    // Phase 2: vertex_clustering joining
418
54.5k
    for (igraph_int_t v = 0; v < no_of_nodes; v++) {
419
53.3k
        if (VECTOR(clustering)[v] != -1) {
420
6.98k
            IGRAPH_CHECK(collect_lightest_edges_to_clusters(
421
6.98k
                    &adjlist,
422
6.98k
                    &inclist,
423
6.98k
                    weights,
424
6.98k
                    &clustering,
425
6.98k
                    /* is_cluster_sampled = */ NULL,
426
6.98k
                    v,
427
6.98k
                    &lightest_eid,
428
6.98k
                    &lightest_weight,
429
6.98k
                    &dirty_vids,
430
6.98k
                    NULL
431
6.98k
            ));
432
323k
            for (igraph_int_t j = 0; j < no_of_nodes; j++) {
433
316k
                edge = VECTOR(lightest_eid)[j];
434
316k
                if (edge != -1) {
435
2.90k
                    ADD_EDGE_TO_SPANNER;
436
2.90k
                }
437
316k
            }
438
6.98k
            clear_lightest_edges_to_clusters(&dirty_vids, &lightest_eid, &lightest_weight);
439
6.98k
        }
440
53.3k
    }
441
442
    // Free memory
443
1.22k
    igraph_vector_int_destroy(&dirty_vids);
444
1.22k
    igraph_bitset_destroy(&is_edge_in_spanner);
445
1.22k
    igraph_bitset_destroy(&is_cluster_sampled);
446
1.22k
    igraph_vector_int_destroy(&new_clustering);
447
1.22k
    igraph_vector_destroy(&lightest_weight);
448
1.22k
    igraph_vector_int_destroy(&lightest_eid);
449
1.22k
    igraph_vector_int_destroy(&clustering);
450
1.22k
    igraph_adjlist_destroy(&adjlist);
451
1.22k
    igraph_inclist_destroy(&inclist);
452
1.22k
    IGRAPH_FINALLY_CLEAN(9);
453
454
1.22k
    return IGRAPH_SUCCESS;
455
1.22k
}