Coverage Report

Created: 2025-11-09 06:31

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/igraph/src/centrality/pagerank.c
Line
Count
Source
1
/*
2
   igraph library.
3
   Copyright (C) 2007-2021  The igraph development team <igraph@igraph.org>
4
5
   This program is free software; you can redistribute it and/or modify
6
   it under the terms of the GNU General Public License as published by
7
   the Free Software Foundation; either version 2 of the License, or
8
   (at your option) any later version.
9
10
   This program is distributed in the hope that it will be useful,
11
   but WITHOUT ANY WARRANTY; without even the implied warranty of
12
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
   GNU General Public License for more details.
14
15
   You should have received a copy of the GNU General Public License
16
   along with this program.  If not, see <https://www.gnu.org/licenses/>.
17
*/
18
19
#include "igraph_centrality.h"
20
21
#include "igraph_adjlist.h"
22
#include "igraph_interface.h"
23
#include "igraph_random.h"
24
#include "igraph_structural.h"
25
26
#include "centrality/prpack_internal.h"
27
28
#include <limits.h>
29
30
static igraph_error_t igraph_i_personalized_pagerank_arpack(const igraph_t *graph,
31
                                                 igraph_vector_t *vector,
32
                                                 igraph_real_t *value, const igraph_vs_t vids,
33
                                                 igraph_bool_t directed, igraph_real_t damping,
34
                                                 const igraph_vector_t *reset,
35
                                                 const igraph_vector_t *weights,
36
                                                 igraph_arpack_options_t *options);
37
38
typedef struct {
39
    const igraph_t *graph;
40
    igraph_adjlist_t *adjlist;
41
    igraph_real_t damping;
42
    igraph_vector_t *outdegree;
43
    igraph_vector_t *tmp;
44
    igraph_vector_t *reset;
45
} pagerank_data_t;
46
47
typedef struct {
48
    const igraph_t *graph;
49
    igraph_inclist_t *inclist;
50
    const igraph_vector_t *weights;
51
    igraph_real_t damping;
52
    igraph_vector_t *outdegree;
53
    igraph_vector_t *tmp;
54
    igraph_vector_t *reset;
55
} pagerank_data_weighted_t;
56
57
/* The two pagerank_operator functions below update the probabilities of a random walker
58
 * being in each of the vertices after one step of the walk. */
59
60
static igraph_error_t pagerank_operator_unweighted(igraph_real_t *to, const igraph_real_t *from,
61
0
                             int n, void *extra) {
62
63
0
    pagerank_data_t *data = extra;
64
0
    igraph_adjlist_t *adjlist = data->adjlist;
65
0
    igraph_vector_t *outdegree = data->outdegree;
66
0
    igraph_vector_t *tmp = data->tmp;
67
0
    igraph_vector_t *reset = data->reset;
68
0
    igraph_vector_int_t *neis;
69
0
    igraph_int_t i, j, nlen;
70
0
    igraph_real_t sumfrom = 0.0;
71
0
    igraph_real_t fact = 1 - data->damping;
72
73
    /* Calculate p(x) / outdegree(x) in advance for all the vertices.
74
     * Note that we may divide by zero here; this is intentional since
75
     * we won't use those values and we save a comparison this way.
76
     * At the same time, we calculate the global probability of a
77
     * random jump in `sumfrom`. For vertices with no outgoing edges,
78
     * we will surely jump from there if we are there, hence those
79
     * vertices contribute p(x) to the teleportation probability.
80
     * For vertices with some outgoing edges, we jump from there with
81
     * probability `fact` if we are there, hence they contribute
82
     * p(x)*fact */
83
0
    for (i = 0; i < n; i++) {
84
0
        sumfrom += VECTOR(*outdegree)[i] != 0 ? from[i] * fact : from[i];
85
0
        VECTOR(*tmp)[i] = from[i] / VECTOR(*outdegree)[i];
86
0
    }
87
88
    /* Here we calculate the part of the `to` vector that results from
89
     * moving along links (and not from teleportation) */
90
0
    for (i = 0; i < n; i++) {
91
0
        neis = igraph_adjlist_get(adjlist, i);
92
0
        nlen = igraph_vector_int_size(neis);
93
0
        to[i] = 0.0;
94
0
        for (j = 0; j < nlen; j++) {
95
0
            igraph_int_t nei = VECTOR(*neis)[j];
96
0
            to[i] += VECTOR(*tmp)[nei];
97
0
        }
98
0
        to[i] *= data->damping;
99
0
    }
100
101
    /* Now we add the contribution from random jumps. `reset` is a vector
102
     * that defines the probability of ending up in vertex i after a jump.
103
     * `sumfrom` is the global probability of jumping as mentioned above. */
104
    /* printf("sumfrom = %.6f\n", (float)sumfrom); */
105
106
0
    if (reset) {
107
        /* Running personalized PageRank */
108
0
        for (i = 0; i < n; i++) {
109
0
            to[i] += sumfrom * VECTOR(*reset)[i];
110
0
        }
111
0
    } else {
112
        /* Traditional PageRank with uniform reset vector */
113
0
        sumfrom /= n;
114
0
        for (i = 0; i < n; i++) {
115
0
            to[i] += sumfrom;
116
0
        }
117
0
    }
118
119
0
    return IGRAPH_SUCCESS;
120
0
}
121
122
static igraph_error_t pagerank_operator_weighted(igraph_real_t *to, const igraph_real_t *from,
123
0
                              int n, void *extra) {
124
125
0
    pagerank_data_weighted_t *data = extra;
126
0
    const igraph_t *graph = data->graph;
127
0
    igraph_inclist_t *inclist = data->inclist;
128
0
    const igraph_vector_t *weights = data->weights;
129
0
    igraph_vector_t *outdegree = data->outdegree;
130
0
    igraph_vector_t *tmp = data->tmp;
131
0
    igraph_vector_t *reset = data->reset;
132
0
    igraph_int_t i, j, nlen;
133
0
    igraph_real_t sumfrom = 0.0;
134
0
    igraph_vector_int_t *neis;
135
0
    igraph_real_t fact = 1 - data->damping;
136
137
    /*
138
    printf("PageRank weighted: multiplying vector: ");
139
    for (i=0; i<n; i++) { printf(" %.4f", from[i]); }
140
    printf("\n");
141
    */
142
143
0
    for (i = 0; i < n; i++) {
144
0
        if (VECTOR(*outdegree)[i] > 0) {
145
0
            sumfrom += from[i] * fact;
146
0
            VECTOR(*tmp)[i] = from[i] / VECTOR(*outdegree)[i];
147
0
        } else {
148
0
            sumfrom += from[i];
149
            /* The following value is used only when all outgoing edges have
150
             * weight zero (as opposed to there being no outgoing edges at all).
151
             * We set it to zero to avoid a 0.0*inf situation when computing
152
             * to[i] below. */
153
0
            VECTOR(*tmp)[i] = 0;
154
0
        }
155
0
    }
156
157
0
    for (i = 0; i < n; i++) {
158
0
        neis = igraph_inclist_get(inclist, i);
159
0
        nlen = igraph_vector_int_size(neis);
160
0
        to[i] = 0.0;
161
0
        for (j = 0; j < nlen; j++) {
162
0
            igraph_int_t edge = VECTOR(*neis)[j];
163
0
            igraph_int_t nei = IGRAPH_OTHER(graph, edge, i);
164
0
            to[i] += VECTOR(*weights)[edge] * VECTOR(*tmp)[nei];
165
0
        }
166
0
        to[i] *= data->damping;
167
0
    }
168
169
    /* printf("sumfrom = %.6f\n", (float)sumfrom); */
170
171
0
    if (reset) {
172
        /* Running personalized PageRank */
173
0
        for (i = 0; i < n; i++) {
174
0
            to[i] += sumfrom * VECTOR(*reset)[i];
175
0
        }
176
0
    } else {
177
        /* Traditional PageRank with uniform reset vector */
178
0
        sumfrom /= n;
179
0
        for (i = 0; i < n; i++) {
180
0
            to[i] += sumfrom;
181
0
        }
182
0
    }
183
184
    /*
185
    printf("PageRank weighted: multiplied vector: ");
186
    for (i=0; i<n; i++) { printf(" %.4f", to[i]); }
187
    printf("\n");
188
    */
189
190
0
    return IGRAPH_SUCCESS;
191
0
}
192
193
/**
194
 * \function igraph_pagerank
195
 * \brief Calculates the Google PageRank for the specified vertices.
196
 *
197
 * The PageRank centrality of a vertex is the fraction of time a
198
 * random walker traversing the graph would spend on that vertex.
199
 * The walker follows the out-edges with probabilities proportional
200
 * to their weights. Additionally, in each step, it restarts the walk
201
 * from a random vertex with probability <code>1 - damping</code>.
202
 * If the random walker gets stuck in a sink vertex, it will also restart
203
 * from a random vertex.
204
 *
205
 * </para><para>
206
 * The PageRank centrality is mainly useful for directed graphs. In undirected
207
 * graphs it converges to trivial values proportional to degrees as the damping
208
 * factor approaches 1.
209
 *
210
 * </para><para>
211
 * Starting from version 0.9, igraph has two PageRank implementations,
212
 * and the user can choose between them. The first implementation is
213
 * \c IGRAPH_PAGERANK_ALGO_ARPACK, which phrases the PageRank calculation
214
 * as an eigenvalue problem, which is then solved using the ARPACK library.
215
 * This was the default before igraph version 0.7. The second and recommended
216
 * implementation is \c IGRAPH_PAGERANK_ALGO_PRPACK. This is using the
217
 * PRPACK package, see https://github.com/dgleich/prpack. PRPACK uses an
218
 * algebraic method, i.e. solves a linear system to obtain the PageRank
219
 * scores.
220
 *
221
 * </para><para>
222
 * Note that the PageRank of a given vertex depends on the PageRank
223
 * of all other vertices, so even if you want to calculate the PageRank for
224
 * only some of the vertices, all of them must be calculated. Requesting
225
 * the PageRank for only some of the vertices does not result in any
226
 * performance increase at all.
227
 *
228
 * </para><para>
229
 * References:
230
 *
231
 * </para><para>
232
 * Sergey Brin and Larry Page: The Anatomy of a Large-Scale Hypertextual
233
 * Web Search Engine. Proceedings of the 7th World-Wide Web Conference,
234
 * Brisbane, Australia, April 1998.
235
 * https://doi.org/10.1016/S0169-7552(98)00110-X
236
 *
237
 * \param graph The graph object.
238
 * \param weights Optional edge weights. May be a \c NULL pointer,
239
 *    meaning unweighted edges, or a vector of non-negative values
240
 *    of the same length as the number of edges.
241
 * \param vector Pointer to an initialized vector, the result is
242
 *    stored here. It is resized as needed.
243
 * \param value Pointer to a real variable. When using \c IGRAPH_PAGERANK_ALGO_ARPACK,
244
 *    the eigenvalue corresponding to the PageRank vector is stored here. It is
245
 *    expected to be exactly one. Checking this value can be used to diagnose cases
246
 *    when ARPACK failed to converge to the leading eigenvector.
247
 *    When using \c IGRAPH_PAGERANK_ALGO_PRPACK, this is always set to 1.0.
248
 * \param damping The damping factor ("d" in the original paper).
249
 *    Must be a probability in the range [0, 1]. A commonly used value is 0.85.
250
 * \param directed Boolean, whether to consider the directedness of
251
 *    the edges. This is ignored for undirected graphs.
252
 * \param vids The vertex IDs for which the PageRank is returned. This parameter
253
 *    is only for convenience. Computing PageRank for fewer than all vertices will
254
 *    not speed up the calculation.
255
 * \param algo The PageRank implementation to use. Possible values:
256
 *    \c IGRAPH_PAGERANK_ALGO_ARPACK, \c IGRAPH_PAGERANK_ALGO_PRPACK.
257
 * \param options Options for the ARPACK method. See \ref igraph_arpack_options_t
258
 *    for details. Supply \c NULL here to use the defaults. Note that the function
259
 *    overwrites the <code>n</code> (number of vertices), <code>nev</code> (1),
260
 *    <code>ncv</code> (3) and <code>which</code> (LM) parameters and it always
261
 *    starts the calculation from a non-random vector calculated based on the
262
 *    degree of the vertices.
263
 * \return Error code:
264
 *         \c IGRAPH_ENOMEM, not enough memory for temporary data.
265
 *         \c IGRAPH_EINVVID, invalid vertex ID in \p vids.
266
 *
267
 * Time complexity: depends on the input graph, usually it is O(|E|),
268
 * the number of edges.
269
 *
270
 * \sa \ref igraph_personalized_pagerank() and \ref igraph_personalized_pagerank_vs()
271
 * for the personalized PageRank measure. See \ref igraph_arpack_rssolve() and
272
 * \ref igraph_arpack_rnsolve() for the underlying machinery used by
273
 * \c IGRAPH_PAGERANK_ALGO_ARPACK.
274
 *
275
 * \example examples/simple/igraph_pagerank.c
276
 */
277
igraph_error_t igraph_pagerank(
278
        const igraph_t *graph, const igraph_vector_t *weights,
279
        igraph_vector_t *vector, igraph_real_t *value,
280
        igraph_real_t damping, igraph_bool_t directed,
281
        igraph_vs_t vids,
282
        igraph_pagerank_algo_t algo,
283
2.01k
        igraph_arpack_options_t *options) {
284
2.01k
    return igraph_personalized_pagerank(
285
2.01k
            graph, weights,
286
2.01k
            vector, value,
287
2.01k
            NULL,
288
2.01k
            damping, directed,
289
2.01k
            vids,
290
2.01k
            algo,
291
2.01k
            options);
292
2.01k
}
293
294
/**
295
 * \function igraph_personalized_pagerank_vs
296
 * \brief Calculates the personalized Google PageRank for the specified vertices.
297
 *
298
 * The personalized PageRank is similar to the original PageRank measure, but
299
 * when the random walk is restarted, a new starting vertex is chosen according to
300
 * a specified distribution.
301
 * This distribution is used both when restarting randomly with probability
302
 * <code>1 - damping</code>, and when the walker is forced to restart due to being
303
 * stuck in a sink vertex (a vertex with no outgoing edges).
304
 *
305
 * </para><para>
306
 * This simplified interface takes a vertex sequence and resets the random walk to
307
 * one of the vertices in the specified vertex sequence, chosen uniformly. A typical
308
 * application of personalized PageRank is when the random walk is reset to the same
309
 * vertex every time: this can easily be achieved using \ref igraph_vss_1() which
310
 * generates a vertex sequence containing only a single vertex.
311
 *
312
 * </para><para>
313
 * Note that the personalized PageRank of a given vertex depends on the
314
 * personalized PageRank of all other vertices, so even if you want to calculate
315
 * the personalized PageRank for only some of the vertices, all of them must be
316
 * calculated. Requesting the personalized PageRank for only some of the vertices
317
 * does not result in any performance increase at all.
318
 *
319
 * \param graph The graph object.
320
 * \param weights Optional edge weights, it is either a null pointer,
321
 *    then the edges are not weighted, or a vector of the same length
322
 *    as the number of edges.
323
 * \param vector Pointer to an initialized vector, the result is
324
 *    stored here. It is resized as needed.
325
 * \param value Pointer to a real variable. When using \c IGRAPH_PAGERANK_ALGO_ARPACK,
326
 *    the eigenvalue corresponding to the PageRank vector is stored here. It is
327
 *    expected to be exactly one. Checking this value can be used to diagnose cases
328
 *    when ARPACK failed to converge to the leading eigenvector.
329
 *    When using \c IGRAPH_PAGERANK_ALGO_PRPACK, this is always set to 1.0.
330
 * \param reset_vids IDs of the vertices used when resetting the random walk.
331
 *    The walk will be restarted from a vertex in this set, chosen uniformly at
332
 *    random. Duplicate vertices are allowed.
333
 * \param damping The damping factor ("d" in the original paper).
334
 *    Must be a probability in the range [0, 1]. A commonly used value is 0.85.
335
 * \param directed Boolean, whether to consider the directedness of
336
 *    the edges. This is ignored for undirected graphs.
337
 * \param vids The vertex IDs for which the PageRank is returned. This parameter
338
 *    is only for convenience. Computing PageRank for fewer than all vertices will
339
 *    not speed up the calculation.
340
 * \param algo The PageRank implementation to use. Possible values:
341
 *    \c IGRAPH_PAGERANK_ALGO_ARPACK, \c IGRAPH_PAGERANK_ALGO_PRPACK.
342
 * \param options Options for the ARPACK method. See \ref igraph_arpack_options_t
343
 *    for details. Supply \c NULL here to use the defaults. Note that the function
344
 *    overwrites the <code>n</code> (number of vertices), <code>nev</code> (1),
345
 *    <code>ncv</code> (3) and <code>which</code> (LM) parameters and it always
346
 *    starts the calculation from a non-random vector calculated based on the
347
 *    degree of the vertices.
348
 * \return Error code:
349
 *         \c IGRAPH_ENOMEM, not enough memory for
350
 *         temporary data.
351
 *         \c IGRAPH_EINVVID, invalid vertex ID in
352
 *         \p vids or an empty reset vertex sequence in
353
 *         \p vids_reset.
354
 *
355
 * Time complexity: depends on the input graph, usually it is O(|E|),
356
 * the number of edges.
357
 *
358
 * \sa \ref igraph_pagerank() for the non-personalized implementation.
359
 */
360
igraph_error_t igraph_personalized_pagerank_vs(
361
        const igraph_t *graph, const igraph_vector_t *weights,
362
        igraph_vector_t *vector, igraph_real_t *value,
363
        igraph_vs_t reset_vids,
364
        igraph_real_t damping, igraph_bool_t directed,
365
        igraph_vs_t vids,
366
        igraph_pagerank_algo_t algo,
367
0
        igraph_arpack_options_t *options) {
368
369
0
    igraph_vector_t reset;
370
0
    igraph_vit_t vit;
371
372
0
    IGRAPH_VECTOR_INIT_FINALLY(&reset, igraph_vcount(graph));
373
0
    IGRAPH_CHECK(igraph_vit_create(graph, reset_vids, &vit));
374
0
    IGRAPH_FINALLY(igraph_vit_destroy, &vit);
375
376
0
    while (!IGRAPH_VIT_END(vit)) {
377
        /* Increment by 1 instead of setting to 1 to handle duplicates. */
378
0
        VECTOR(reset)[IGRAPH_VIT_GET(vit)] += 1.0;
379
0
        IGRAPH_VIT_NEXT(vit);
380
0
    }
381
0
    igraph_vit_destroy(&vit);
382
0
    IGRAPH_FINALLY_CLEAN(1);
383
384
0
    IGRAPH_CHECK(igraph_personalized_pagerank(
385
0
            graph, weights, vector,
386
0
            value, &reset,
387
0
            damping, directed, vids, algo,
388
0
            options));
389
390
0
    igraph_vector_destroy(&reset);
391
0
    IGRAPH_FINALLY_CLEAN(1);
392
393
0
    return IGRAPH_SUCCESS;
394
0
}
395
396
/**
397
 * \function igraph_personalized_pagerank
398
 * \brief Calculates the personalized Google PageRank for the specified vertices.
399
 *
400
 * The personalized PageRank is similar to the original PageRank measure, but
401
 * when the random walk is restarted, a new starting vertex is chosen non-uniformly,
402
 * according to the distribution specified in \p reset
403
 * (instead of the uniform distribution in the original PageRank measure).
404
 * The \p reset distribution is used both when restarting randomly with probability
405
 * <code>1 - damping</code>, and when the walker is forced to restart due to being
406
 * stuck in a sink vertex (a vertex with no outgoing edges).
407
 *
408
 * </para><para>
409
 * Note that the personalized PageRank of a given vertex depends on the
410
 * personalized PageRank of all other vertices, so even if you want to calculate
411
 * the personalized PageRank for only some of the vertices, all of them must be
412
 * calculated. Requesting the personalized PageRank for only some of the vertices
413
 * does not result in any performance increase at all.
414
 *
415
 * \param graph The graph object.
416
 * \param weights Optional edge weights. May be a \c NULL pointer,
417
 *    meaning unweighted edges, or a vector of non-negative values
418
 *    of the same length as the number of edges.
419
 * \param vector Pointer to an initialized vector, the result is
420
 *    stored here. It is resized as needed.
421
 * \param value Pointer to a real variable. When using \c IGRAPH_PAGERANK_ALGO_ARPACK,
422
 *    the eigenvalue corresponding to the PageRank vector is stored here. It is
423
 *    expected to be exactly one. Checking this value can be used to diagnose cases
424
 *    when ARPACK failed to converge to the leading eigenvector.
425
 *    When using \c IGRAPH_PAGERANK_ALGO_PRPACK, this is always set to 1.0.
426
 * \param reset The probability distribution over the vertices used when
427
 *    resetting the random walk. It is either a \c NULL pointer (denoting
428
 *    a uniform choice that results in the original PageRank measure)
429
 *    or a vector of the same length as the number of vertices.
430
 * \param damping The damping factor ("d" in the original paper).
431
 *    Must be a probability in the range [0, 1]. A commonly used value is 0.85.
432
 * \param directed Boolean, whether to consider the directedness of
433
 *    the edges. This is ignored for undirected graphs.
434
 * \param vids The vertex IDs for which the PageRank is returned. This parameter
435
 *    is only for convenience. Computing PageRank for fewer than all vertices will
436
 *    not speed up the calculation.
437
 * \param algo The PageRank implementation to use. Possible values:
438
 *    \c IGRAPH_PAGERANK_ALGO_ARPACK, \c IGRAPH_PAGERANK_ALGO_PRPACK.
439
 * \param options Options for the ARPACK method. See \ref igraph_arpack_options_t
440
 *    for details. Supply \c NULL here to use the defaults. Note that the function
441
 *    overwrites the <code>n</code> (number of vertices), <code>nev</code> (1),
442
 *    <code>ncv</code> (3) and <code>which</code> (LM) parameters and it always
443
 *    starts the calculation from a non-random vector calculated based on the
444
 *    degree of the vertices.
445
 * \return Error code:
446
 *         \c IGRAPH_ENOMEM, not enough memory for
447
 *         temporary data.
448
 *         \c IGRAPH_EINVVID, invalid vertex ID in
449
 *         \p vids or an invalid reset vector in \p reset.
450
 *
451
 * Time complexity: depends on the input graph, usually it is O(|E|),
452
 * the number of edges.
453
 *
454
 * \sa \ref igraph_pagerank() for the non-personalized implementation,
455
 * \ref igraph_personalized_pagerank_vs() for a personalized implementation
456
 * with resetting to specific vertices.
457
 */
458
igraph_error_t igraph_personalized_pagerank(
459
        const igraph_t *graph, const igraph_vector_t *weights,
460
        igraph_vector_t *vector, igraph_real_t *value,
461
        const igraph_vector_t *reset,
462
        igraph_real_t damping, igraph_bool_t directed,
463
        igraph_vs_t vids,
464
        igraph_pagerank_algo_t algo,
465
2.01k
        igraph_arpack_options_t *options) {
466
467
2.01k
    if (damping < 0.0 || damping > 1.0) {
468
0
        IGRAPH_ERROR("The PageRank damping factor must be in the range [0,1].", IGRAPH_EINVAL);
469
0
    }
470
471
2.01k
    if (algo == IGRAPH_PAGERANK_ALGO_ARPACK) {
472
0
        return igraph_i_personalized_pagerank_arpack(graph, vector, value, vids,
473
0
                directed, damping, reset,
474
0
                weights, options ? options : igraph_arpack_options_get_default()
475
0
        );
476
2.01k
    } else if (algo == IGRAPH_PAGERANK_ALGO_PRPACK) {
477
2.01k
        return igraph_i_personalized_pagerank_prpack(graph, vector, value, vids,
478
2.01k
                directed, damping, reset,
479
2.01k
                weights);
480
2.01k
    }
481
482
0
    IGRAPH_ERROR("Unknown PageRank algorithm", IGRAPH_EINVAL);
483
0
}
484
485
/*
486
 * ARPACK-based implementation of \c igraph_personalized_pagerank.
487
 *
488
 * See \c igraph_personalized_pagerank for the documentation of the parameters.
489
 */
490
static igraph_error_t igraph_i_personalized_pagerank_arpack(const igraph_t *graph, igraph_vector_t *vector,
491
                                                 igraph_real_t *value, const igraph_vs_t vids,
492
                                                 igraph_bool_t directed, igraph_real_t damping,
493
                                                 const igraph_vector_t *reset,
494
                                                 const igraph_vector_t *weights,
495
0
                                                 igraph_arpack_options_t *options) {
496
0
    igraph_matrix_t values;
497
0
    igraph_matrix_t vectors;
498
0
    igraph_neimode_t dirmode;
499
0
    igraph_vector_t outdegree;
500
0
    igraph_vector_t indegree;
501
0
    igraph_vector_t tmp;
502
0
    igraph_vector_t normalized_reset;
503
504
0
    igraph_int_t i;
505
0
    igraph_int_t no_of_nodes = igraph_vcount(graph);
506
0
    igraph_int_t no_of_edges = igraph_ecount(graph);
507
508
0
    igraph_real_t reset_sum; /* used only when reset != NULL */
509
510
0
    if (no_of_nodes > INT_MAX) {
511
0
        IGRAPH_ERROR("Graph has too many vertices for ARPACK.", IGRAPH_EOVERFLOW);
512
0
    }
513
514
0
    if (weights && igraph_vector_size(weights) != no_of_edges) {
515
0
        IGRAPH_ERROR("Invalid length of weights vector when calculating PageRank scores.", IGRAPH_EINVAL);
516
0
    }
517
518
0
    if (reset && igraph_vector_size(reset) != no_of_nodes) {
519
0
        IGRAPH_ERROR("Invalid length of reset vector when calculating personalized PageRank scores.", IGRAPH_EINVAL);
520
0
    }
521
522
0
    if (reset) {
523
0
        reset_sum = igraph_vector_sum(reset);
524
0
        if (no_of_nodes > 0 && reset_sum == 0) {
525
0
            IGRAPH_ERROR("The sum of the elements in the reset vector must not be zero.", IGRAPH_EINVAL);
526
0
        }
527
528
0
        igraph_real_t reset_min = igraph_vector_min(reset);
529
0
        if (reset_min < 0) {
530
0
            IGRAPH_ERROR("The reset vector must not contain negative elements.", IGRAPH_EINVAL);
531
0
        }
532
0
        if (!isfinite(reset_sum)) {
533
0
            IGRAPH_ERROR("The reset vector must not contain infinite or NaN values.", IGRAPH_EINVAL);
534
0
        }
535
0
    }
536
537
0
    if (no_of_edges == 0) {
538
        /* Special case: graph with no edges. Result is the same as the personalization vector. */
539
0
        if (value) {
540
0
            *value = 1.0;
541
0
        }
542
0
        if (vector) {
543
0
            if (reset && no_of_nodes > 0) {
544
0
                IGRAPH_CHECK(igraph_vector_update(vector, reset));
545
0
                igraph_vector_scale(vector, 1.0 / reset_sum);
546
0
            } else {
547
0
                IGRAPH_CHECK(igraph_vector_resize(vector, no_of_nodes));
548
0
                igraph_vector_fill(vector, 1.0 / no_of_nodes);
549
0
            }
550
0
        }
551
0
        return IGRAPH_SUCCESS;
552
0
    }
553
554
0
    options->n = (int) no_of_nodes;
555
0
    options->nev = 1;
556
0
    options->ncv = 0;   /* 0 means "automatic" in igraph_arpack_rnsolve */
557
0
    options->which[0] = 'L'; options->which[1] = 'R';
558
0
    options->start = 1; /* no random start vector */
559
560
0
    directed = directed && igraph_is_directed(graph);
561
562
0
    if (weights) {
563
0
        igraph_real_t min, max;
564
565
        /* Safe to call minmax, ecount == 0 case was caught earlier */
566
0
        igraph_vector_minmax(weights, &min, &max);
567
0
        if (min < 0) {
568
0
            IGRAPH_ERROR("Edge weights must not be negative.", IGRAPH_EINVAL);
569
0
        }
570
0
        if (isnan(min)) {
571
0
            IGRAPH_ERROR("Weight vector must not contain NaN values.", IGRAPH_EINVAL);
572
0
        }
573
0
        if (min == 0 && max == 0) {
574
            /* Special case: all weights are zeros. Result is the same as the personalization vector. */
575
0
            if (value) {
576
0
                *value = 1.0;
577
0
            }
578
0
            if (vector) {
579
0
                IGRAPH_CHECK(igraph_vector_resize(vector, no_of_nodes));
580
0
                if (reset) {
581
0
                    for (i=0; i < no_of_nodes; ++i) {
582
0
                        VECTOR(*vector)[i] = VECTOR(*reset)[i];
583
0
                    }
584
0
                    igraph_vector_scale(vector, 1.0 / igraph_vector_sum(vector));
585
0
                } else {
586
0
                    igraph_vector_fill(vector, 1.0 / no_of_nodes);
587
0
                }
588
0
            }
589
0
            return IGRAPH_SUCCESS;
590
0
        }
591
0
    }
592
593
0
    IGRAPH_MATRIX_INIT_FINALLY(&values, 0, 0);
594
0
    IGRAPH_MATRIX_INIT_FINALLY(&vectors, options->n, 1);
595
596
0
    if (directed) {
597
0
        dirmode = IGRAPH_IN;
598
0
    } else {
599
0
        dirmode = IGRAPH_ALL;
600
0
    }
601
602
0
    IGRAPH_VECTOR_INIT_FINALLY(&indegree, options->n);
603
0
    IGRAPH_VECTOR_INIT_FINALLY(&outdegree, options->n);
604
0
    IGRAPH_VECTOR_INIT_FINALLY(&tmp, options->n);
605
606
0
    if (reset) {
607
        /* Normalize reset vector so the sum is 1 */
608
0
        IGRAPH_CHECK(igraph_vector_init_copy(&normalized_reset, reset));
609
0
        IGRAPH_FINALLY(igraph_vector_destroy, &normalized_reset);
610
611
0
        igraph_vector_scale(&normalized_reset, 1.0 / reset_sum);
612
0
    }
613
614
0
    IGRAPH_CHECK(igraph_strength(graph, &outdegree, igraph_vss_all(),
615
0
                                 directed ? IGRAPH_OUT : IGRAPH_ALL, IGRAPH_LOOPS, weights));
616
0
    IGRAPH_CHECK(igraph_strength(graph, &indegree, igraph_vss_all(),
617
0
                                 directed ? IGRAPH_IN : IGRAPH_ALL, IGRAPH_LOOPS, weights));
618
619
    /* Set up an appropriate starting vector. We start from the (possibly weighted)
620
     * in-degrees plus some small random noise to avoid convergence problems. */
621
0
    for (i = 0; i < no_of_nodes; i++) {
622
0
        if (VECTOR(indegree)[i] > 0) {
623
            /* Note: Keep random perturbation non-negative. */
624
0
            MATRIX(vectors, i, 0) = VECTOR(indegree)[i] + RNG_UNIF(0, 1e-4);
625
0
        } else {
626
0
            MATRIX(vectors, i, 0) = 1;
627
0
        }
628
0
    }
629
630
0
    if (!weights) {
631
632
0
        igraph_adjlist_t adjlist;
633
0
        pagerank_data_t data;
634
635
0
        data.graph = graph;
636
0
        data.adjlist = &adjlist;
637
0
        data.damping = damping;
638
0
        data.outdegree = &outdegree;
639
0
        data.tmp = &tmp;
640
0
        data.reset = reset ? &normalized_reset : NULL;
641
642
0
        IGRAPH_CHECK(igraph_adjlist_init(graph, &adjlist, dirmode, IGRAPH_LOOPS, IGRAPH_MULTIPLE));
643
0
        IGRAPH_FINALLY(igraph_adjlist_destroy, &adjlist);
644
645
0
        IGRAPH_CHECK(igraph_arpack_rnsolve(pagerank_operator_unweighted,
646
0
                                           &data, options, NULL, &values, &vectors));
647
648
0
        igraph_adjlist_destroy(&adjlist);
649
0
        IGRAPH_FINALLY_CLEAN(1);
650
651
0
    } else {
652
653
0
        igraph_inclist_t inclist;
654
0
        pagerank_data_weighted_t data;
655
656
0
        data.graph = graph;
657
0
        data.inclist = &inclist;
658
0
        data.weights = weights;
659
0
        data.damping = damping;
660
0
        data.outdegree = &outdegree;
661
0
        data.tmp = &tmp;
662
0
        data.reset = reset ? &normalized_reset : NULL;
663
664
0
        IGRAPH_CHECK(igraph_inclist_init(graph, &inclist, dirmode, IGRAPH_LOOPS));
665
0
        IGRAPH_FINALLY(igraph_inclist_destroy, &inclist);
666
667
0
        IGRAPH_CHECK(igraph_arpack_rnsolve(pagerank_operator_weighted,
668
0
                                           &data, options, NULL, &values, &vectors));
669
670
0
        igraph_inclist_destroy(&inclist);
671
0
        IGRAPH_FINALLY_CLEAN(1);
672
0
    }
673
674
0
    if (reset) {
675
0
        igraph_vector_destroy(&normalized_reset);
676
0
        IGRAPH_FINALLY_CLEAN(1);
677
0
    }
678
679
0
    igraph_vector_destroy(&tmp);
680
0
    igraph_vector_destroy(&outdegree);
681
0
    igraph_vector_destroy(&indegree);
682
0
    IGRAPH_FINALLY_CLEAN(3);
683
684
0
    if (value) {
685
0
        *value = MATRIX(values, 0, 0);
686
0
    }
687
688
0
    if (vector) {
689
0
        igraph_vit_t vit;
690
0
        igraph_int_t nodes_to_calc;
691
0
        igraph_real_t sum = 0;
692
693
0
        for (i = 0; i < no_of_nodes; i++) {
694
0
            sum += MATRIX(vectors, i, 0);
695
0
        }
696
697
0
        IGRAPH_CHECK(igraph_vit_create(graph, vids, &vit));
698
0
        IGRAPH_FINALLY(igraph_vit_destroy, &vit);
699
0
        nodes_to_calc = IGRAPH_VIT_SIZE(vit);
700
701
0
        IGRAPH_CHECK(igraph_vector_resize(vector, nodes_to_calc));
702
0
        for (IGRAPH_VIT_RESET(vit), i = 0; !IGRAPH_VIT_END(vit);
703
0
             IGRAPH_VIT_NEXT(vit), i++) {
704
0
            VECTOR(*vector)[i] = MATRIX(vectors, IGRAPH_VIT_GET(vit), 0);
705
0
            VECTOR(*vector)[i] /= sum;
706
0
        }
707
708
0
        igraph_vit_destroy(&vit);
709
0
        IGRAPH_FINALLY_CLEAN(1);
710
0
    }
711
712
0
    if (options->info) {
713
0
        IGRAPH_WARNING("Non-zero return code from ARPACK routine!");
714
0
    }
715
716
0
    igraph_matrix_destroy(&vectors);
717
0
    igraph_matrix_destroy(&values);
718
0
    IGRAPH_FINALLY_CLEAN(2);
719
720
0
    return IGRAPH_SUCCESS;
721
0
}