/src/igraph/src/properties/ecc.c
Line | Count | Source |
1 | | /* |
2 | | igraph library. |
3 | | Copyright (C) 2022 The igraph development team <igraph@igraph.org> |
4 | | |
5 | | This program is free software; you can redistribute it and/or modify |
6 | | it under the terms of the GNU General Public License as published by |
7 | | the Free Software Foundation; either version 2 of the License, or |
8 | | (at your option) any later version. |
9 | | |
10 | | This program is distributed in the hope that it will be useful, |
11 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | GNU General Public License for more details. |
14 | | |
15 | | You should have received a copy of the GNU General Public License |
16 | | along with this program. If not, see <https://www.gnu.org/licenses/>. |
17 | | */ |
18 | | |
19 | | #include "igraph_transitivity.h" |
20 | | |
21 | | #include "igraph_interface.h" |
22 | | #include "igraph_iterators.h" |
23 | | #include "igraph_adjlist.h" |
24 | | |
25 | | #include "core/interruption.h" |
26 | | |
27 | | /* This implementation of ECC relies on (lazy_)adjlist_get() producing sorted |
28 | | * neighbor lists and (lazy_)adjlist_init() being called with IGRAPH_NO_LOOPS |
29 | | * and IGRAPH_NO_MULTIPLE to prevent duplicate entries. |
30 | | */ |
31 | | |
32 | | /* Optimized for the case when computing ECC for all edges. */ |
33 | | static igraph_error_t igraph_i_ecc3_1( |
34 | | const igraph_t *graph, igraph_vector_t *res, const igraph_es_t eids, |
35 | 3.15k | igraph_bool_t offset, igraph_bool_t normalize) { |
36 | | |
37 | 3.15k | igraph_int_t no_of_nodes = igraph_vcount(graph); |
38 | 3.15k | igraph_vector_int_t degree; |
39 | 3.15k | igraph_adjlist_t al; |
40 | 3.15k | igraph_eit_t eit; |
41 | 3.15k | const igraph_real_t c = offset ? 1.0 : 0.0; |
42 | | |
43 | 3.15k | IGRAPH_CHECK(igraph_adjlist_init(graph, &al, IGRAPH_ALL, IGRAPH_NO_LOOPS, IGRAPH_NO_MULTIPLE)); |
44 | 3.15k | IGRAPH_FINALLY(igraph_adjlist_destroy, &al); |
45 | | |
46 | 3.15k | IGRAPH_VECTOR_INT_INIT_FINALLY(°ree, no_of_nodes); |
47 | 3.15k | IGRAPH_CHECK(igraph_degree(graph, °ree, igraph_vss_all(), IGRAPH_ALL, IGRAPH_LOOPS)); |
48 | | |
49 | 3.15k | IGRAPH_CHECK(igraph_eit_create(graph, eids, &eit)); |
50 | 3.15k | IGRAPH_FINALLY(igraph_eit_destroy, &eit); |
51 | | |
52 | 3.15k | IGRAPH_CHECK(igraph_vector_resize(res, IGRAPH_EIT_SIZE(eit))); |
53 | | |
54 | 3.15k | for (igraph_int_t i=0; |
55 | 68.3k | ! IGRAPH_EIT_END(eit); |
56 | 65.2k | IGRAPH_EIT_NEXT(eit), i++) { |
57 | | |
58 | 65.2k | igraph_int_t edge = IGRAPH_EIT_GET(eit); |
59 | 65.2k | igraph_int_t v1 = IGRAPH_FROM(graph, edge), v2 = IGRAPH_TO(graph, edge); |
60 | | |
61 | 65.2k | igraph_real_t z; /* number of triangles the edge participates in */ |
62 | 65.2k | igraph_real_t s; /* max number of triangles the edge could be part of */ |
63 | | |
64 | 65.2k | IGRAPH_ALLOW_INTERRUPTION(); |
65 | | |
66 | 65.2k | if (v1 == v2) { |
67 | | /* A self-loop isn't, and cannot be part of any triangles. */ |
68 | 0 | z = 0.0; |
69 | 0 | s = 0.0; |
70 | 65.2k | } else { |
71 | 65.2k | const igraph_vector_int_t *a1 = igraph_adjlist_get(&al, v1), *a2 = igraph_adjlist_get(&al, v2); |
72 | 65.2k | igraph_int_t d1 = VECTOR(degree)[v1], d2 = VECTOR(degree)[v2]; |
73 | | |
74 | 65.2k | z = igraph_vector_int_intersection_size_sorted(a1, a2); |
75 | 65.2k | s = (d1 < d2 ? d1 : d2) - 1.0; |
76 | 65.2k | } |
77 | | |
78 | 65.2k | VECTOR(*res)[i] = z + c; |
79 | 65.2k | if (normalize) VECTOR(*res)[i] /= s; |
80 | 65.2k | } |
81 | | |
82 | 3.15k | igraph_eit_destroy(&eit); |
83 | 3.15k | igraph_vector_int_destroy(°ree); |
84 | 3.15k | igraph_adjlist_destroy(&al); |
85 | 3.15k | IGRAPH_FINALLY_CLEAN(3); |
86 | | |
87 | 3.15k | return IGRAPH_SUCCESS; |
88 | 3.15k | } |
89 | | |
90 | | |
91 | | /* Optimized for computing ECC for a small subset of edges. */ |
92 | | static igraph_error_t igraph_i_ecc3_2( |
93 | | const igraph_t *graph, igraph_vector_t *res, |
94 | 0 | const igraph_es_t eids, igraph_bool_t offset, igraph_bool_t normalize) { |
95 | |
|
96 | 0 | igraph_lazy_adjlist_t al; |
97 | 0 | igraph_eit_t eit; |
98 | 0 | const igraph_real_t c = offset ? 1.0 : 0.0; |
99 | |
|
100 | 0 | IGRAPH_CHECK(igraph_lazy_adjlist_init(graph, &al, IGRAPH_ALL, IGRAPH_NO_LOOPS, IGRAPH_NO_MULTIPLE)); |
101 | 0 | IGRAPH_FINALLY(igraph_lazy_adjlist_destroy, &al); |
102 | |
|
103 | 0 | IGRAPH_CHECK(igraph_eit_create(graph, eids, &eit)); |
104 | 0 | IGRAPH_FINALLY(igraph_eit_destroy, &eit); |
105 | |
|
106 | 0 | IGRAPH_CHECK(igraph_vector_resize(res, IGRAPH_EIT_SIZE(eit))); |
107 | | |
108 | 0 | for (igraph_int_t i=0; |
109 | 0 | ! IGRAPH_EIT_END(eit); |
110 | 0 | IGRAPH_EIT_NEXT(eit), i++) { |
111 | |
|
112 | 0 | igraph_int_t edge = IGRAPH_EIT_GET(eit); |
113 | 0 | igraph_int_t v1 = IGRAPH_FROM(graph, edge), v2 = IGRAPH_TO(graph, edge); |
114 | |
|
115 | 0 | igraph_real_t z; /* number of triangles the edge participates in */ |
116 | 0 | igraph_real_t s; /* max number of triangles the edge could be part of */ |
117 | |
|
118 | 0 | IGRAPH_ALLOW_INTERRUPTION(); |
119 | | |
120 | 0 | if (v1 == v2) { |
121 | | /* A self-loop isn't, and cannot be part of any triangles. */ |
122 | 0 | z = 0.0; |
123 | 0 | s = 0.0; |
124 | 0 | } else { |
125 | 0 | igraph_vector_int_t *a1 = igraph_lazy_adjlist_get(&al, v1); |
126 | 0 | igraph_vector_int_t *a2 = igraph_lazy_adjlist_get(&al, v2); |
127 | |
|
128 | 0 | igraph_int_t d1, d2; |
129 | 0 | IGRAPH_CHECK(igraph_degree_1(graph, &d1, v1, IGRAPH_ALL, IGRAPH_LOOPS)); |
130 | 0 | IGRAPH_CHECK(igraph_degree_1(graph, &d2, v2, IGRAPH_ALL, IGRAPH_LOOPS)); |
131 | | |
132 | 0 | z = igraph_vector_int_intersection_size_sorted(a1, a2); |
133 | 0 | s = (d1 < d2 ? d1 : d2) - 1.0; |
134 | 0 | } |
135 | | |
136 | 0 | VECTOR(*res)[i] = z + c; |
137 | 0 | if (normalize) VECTOR(*res)[i] /= s; |
138 | 0 | } |
139 | | |
140 | 0 | igraph_eit_destroy(&eit); |
141 | 0 | igraph_lazy_adjlist_destroy(&al); |
142 | 0 | IGRAPH_FINALLY_CLEAN(2); |
143 | |
|
144 | 0 | return IGRAPH_SUCCESS; |
145 | 0 | } |
146 | | |
147 | | |
148 | | /* Optimized for the case when computing ECC for all edges. */ |
149 | | static igraph_error_t igraph_i_ecc4_1( |
150 | | const igraph_t *graph, igraph_vector_t *res, |
151 | 0 | const igraph_es_t eids, igraph_bool_t offset, igraph_bool_t normalize) { |
152 | |
|
153 | 0 | igraph_int_t no_of_nodes = igraph_vcount(graph); |
154 | 0 | igraph_vector_int_t degree; |
155 | 0 | igraph_adjlist_t al; |
156 | 0 | igraph_eit_t eit; |
157 | 0 | igraph_real_t c = offset ? 1.0 : 0.0; |
158 | |
|
159 | 0 | IGRAPH_CHECK(igraph_adjlist_init(graph, &al, IGRAPH_ALL, IGRAPH_NO_LOOPS, IGRAPH_NO_MULTIPLE)); |
160 | 0 | IGRAPH_FINALLY(igraph_adjlist_destroy, &al); |
161 | |
|
162 | 0 | IGRAPH_VECTOR_INT_INIT_FINALLY(°ree, no_of_nodes); |
163 | 0 | IGRAPH_CHECK(igraph_degree(graph, °ree, igraph_vss_all(), IGRAPH_ALL, IGRAPH_LOOPS)); |
164 | | |
165 | 0 | IGRAPH_CHECK(igraph_eit_create(graph, eids, &eit)); |
166 | 0 | IGRAPH_FINALLY(igraph_eit_destroy, &eit); |
167 | |
|
168 | 0 | IGRAPH_CHECK(igraph_vector_resize(res, IGRAPH_EIT_SIZE(eit))); |
169 | | |
170 | 0 | for (igraph_int_t i=0; |
171 | 0 | ! IGRAPH_EIT_END(eit); |
172 | 0 | IGRAPH_EIT_NEXT(eit), i++) { |
173 | |
|
174 | 0 | igraph_int_t edge = IGRAPH_EIT_GET(eit); |
175 | 0 | igraph_int_t v1 = IGRAPH_FROM(graph, edge), v2 = IGRAPH_TO(graph, edge); |
176 | |
|
177 | 0 | igraph_real_t z; /* number of 4-cycles the edge participates in */ |
178 | 0 | igraph_real_t s; /* max number of 4-cycles the edge could be part of */ |
179 | |
|
180 | 0 | IGRAPH_ALLOW_INTERRUPTION(); |
181 | | |
182 | 0 | if (v1 == v2) { |
183 | 0 | z = 0.0; |
184 | 0 | s = 0.0; |
185 | 0 | } else { |
186 | | /* ensure that v1 is the vertex with the smaller degree */ |
187 | 0 | if (VECTOR(degree)[v1] > VECTOR(degree)[v2]) { |
188 | 0 | igraph_int_t tmp = v1; |
189 | 0 | v1 = v2; |
190 | 0 | v2 = tmp; |
191 | 0 | } |
192 | |
|
193 | 0 | z = 0.0; |
194 | 0 | const igraph_vector_int_t *a1 = igraph_adjlist_get(&al, v1); |
195 | 0 | const igraph_int_t n = igraph_vector_int_size(a1); |
196 | 0 | for (igraph_int_t j=0; j < n; j++) { |
197 | 0 | igraph_int_t v3 = VECTOR(*a1)[j]; |
198 | | |
199 | | /* It is not possible that v3 == v1 because self-loops have been removed from the adjlist. */ |
200 | |
|
201 | 0 | if (v3 == v2) continue; |
202 | | |
203 | 0 | const igraph_vector_int_t *a2 = igraph_adjlist_get(&al, v2), *a3 = igraph_adjlist_get(&al, v3); |
204 | |
|
205 | 0 | z += igraph_vector_int_intersection_size_sorted(a2, a3) - 1.0; |
206 | 0 | } |
207 | |
|
208 | 0 | igraph_int_t d1 = VECTOR(degree)[v1], d2 = VECTOR(degree)[v2]; |
209 | 0 | s = (d1 - 1.0) * (d2 - 1.0); |
210 | 0 | } |
211 | |
|
212 | 0 | VECTOR(*res)[i] = z + c; |
213 | 0 | if (normalize) VECTOR(*res)[i] /= s; |
214 | 0 | } |
215 | | |
216 | 0 | igraph_eit_destroy(&eit); |
217 | 0 | igraph_vector_int_destroy(°ree); |
218 | 0 | igraph_adjlist_destroy(&al); |
219 | 0 | IGRAPH_FINALLY_CLEAN(3); |
220 | |
|
221 | 0 | return IGRAPH_SUCCESS; |
222 | 0 | } |
223 | | |
224 | | |
225 | | /* Optimized for computing ECC for a small subset of edges. */ |
226 | | static igraph_error_t igraph_i_ecc4_2( |
227 | | const igraph_t *graph, igraph_vector_t *res, |
228 | 0 | const igraph_es_t eids, igraph_bool_t offset, igraph_bool_t normalize) { |
229 | |
|
230 | 0 | igraph_lazy_adjlist_t al; |
231 | 0 | igraph_eit_t eit; |
232 | 0 | igraph_real_t c = offset ? 1.0 : 0.0; |
233 | |
|
234 | 0 | IGRAPH_CHECK(igraph_lazy_adjlist_init(graph, &al, IGRAPH_ALL, IGRAPH_NO_LOOPS, IGRAPH_NO_MULTIPLE)); |
235 | 0 | IGRAPH_FINALLY(igraph_lazy_adjlist_destroy, &al); |
236 | |
|
237 | 0 | IGRAPH_CHECK(igraph_eit_create(graph, eids, &eit)); |
238 | 0 | IGRAPH_FINALLY(igraph_eit_destroy, &eit); |
239 | |
|
240 | 0 | IGRAPH_CHECK(igraph_vector_resize(res, IGRAPH_EIT_SIZE(eit))); |
241 | | |
242 | 0 | for (igraph_int_t i=0; |
243 | 0 | ! IGRAPH_EIT_END(eit); |
244 | 0 | IGRAPH_EIT_NEXT(eit), i++) { |
245 | |
|
246 | 0 | igraph_int_t edge = IGRAPH_EIT_GET(eit); |
247 | 0 | igraph_int_t v1 = IGRAPH_FROM(graph, edge), v2 = IGRAPH_TO(graph, edge); |
248 | |
|
249 | 0 | igraph_real_t z; /* number of 4-cycles the edge participates in */ |
250 | 0 | igraph_real_t s; /* max number of 4-cycles the edge could be part of */ |
251 | |
|
252 | 0 | IGRAPH_ALLOW_INTERRUPTION(); |
253 | | |
254 | 0 | igraph_int_t d1, d2; |
255 | 0 | IGRAPH_CHECK(igraph_degree_1(graph, &d1, v1, IGRAPH_ALL, IGRAPH_LOOPS)); |
256 | 0 | IGRAPH_CHECK(igraph_degree_1(graph, &d2, v2, IGRAPH_ALL, IGRAPH_LOOPS)); |
257 | | |
258 | 0 | if (v1 == v2) { |
259 | 0 | z = 0.0; |
260 | 0 | s = 0.0; |
261 | 0 | } else { |
262 | | /* ensure that v1 is the vertex with the smaller degree */ |
263 | 0 | if (d1 > d2) { |
264 | 0 | igraph_int_t tmp = v1; |
265 | 0 | v1 = v2; |
266 | 0 | v2 = tmp; |
267 | |
|
268 | 0 | tmp = d1; |
269 | 0 | d1 = d2; |
270 | 0 | d2 = tmp; |
271 | 0 | } |
272 | |
|
273 | 0 | z = 0.0; |
274 | |
|
275 | 0 | igraph_vector_int_t *a1 = igraph_lazy_adjlist_get(&al, v1); |
276 | |
|
277 | 0 | const igraph_int_t n = igraph_vector_int_size(a1); |
278 | 0 | for (igraph_int_t j=0; j < n; j++) { |
279 | 0 | igraph_int_t v3 = VECTOR(*a1)[j]; |
280 | | |
281 | | /* It is not possible that v3 == v1 because self-loops have been removed from the adjlist. */ |
282 | |
|
283 | 0 | if (v3 == v2) continue; |
284 | | |
285 | 0 | igraph_vector_int_t *a2 = igraph_lazy_adjlist_get(&al, v2); |
286 | 0 | igraph_vector_int_t *a3 = igraph_lazy_adjlist_get(&al, v3); |
287 | |
|
288 | 0 | z += igraph_vector_int_intersection_size_sorted(a2, a3) - 1.0; |
289 | 0 | } |
290 | |
|
291 | 0 | s = (d1 - 1.0) * (d2 - 1.0); |
292 | 0 | } |
293 | |
|
294 | 0 | VECTOR(*res)[i] = z + c; |
295 | 0 | if (normalize) VECTOR(*res)[i] /= s; |
296 | 0 | } |
297 | | |
298 | 0 | igraph_eit_destroy(&eit); |
299 | 0 | igraph_lazy_adjlist_destroy(&al); |
300 | 0 | IGRAPH_FINALLY_CLEAN(2); |
301 | |
|
302 | 0 | return IGRAPH_SUCCESS; |
303 | 0 | } |
304 | | |
305 | | |
306 | | /** |
307 | | * \function igraph_ecc |
308 | | * \brief Edge clustering coefficient of some edges. |
309 | | * |
310 | | * The edge clustering coefficient <code>C^(k)_ij</code> of an edge (i, j) |
311 | | * is defined based on the number of k-cycles the edge participates in, |
312 | | * <code>z^(k)_ij</code>, and the largest number of such cycles it could |
313 | | * participate in given the degrees of its endpoints, <code>s^(k)_ij</code>. |
314 | | * The original definition given in the reference below is: |
315 | | * |
316 | | * </para><para> |
317 | | * <code>C^(k)_ij = (z^(k)_ij + 1) / s^(k)_ij</code> |
318 | | * |
319 | | * </para><para> |
320 | | * For <code>k=3</code>, <code>s^(k)_ij = min(d_i - 1, d_j - 1)</code>, |
321 | | * where \c d_i and \c d_j are the edge endpoint degrees. |
322 | | * For <code>k=4</code>, <code>s^(k)_ij = (d_i - 1) (d_j - 1)</code>. |
323 | | * |
324 | | * </para><para> |
325 | | * The \p normalize and \p offset parameters allow for skipping normalization |
326 | | * by <code>s^(k)</code> and offsetting the cycle count <code>z^(k)</code> |
327 | | * by one in the numerator of <code>C^(k)</code>. Set both to \c true to |
328 | | * compute the original definition of this metric. |
329 | | * |
330 | | * </para><para> |
331 | | * This function ignores edge multiplicities when listing k-cycles |
332 | | * (i.e. <code>z^(k)</code>), but not when computing the maximum number of |
333 | | * cycles an edge can participate in (<code>s^(k)</code>). |
334 | | * |
335 | | * </para><para> |
336 | | * Reference: |
337 | | * |
338 | | * </para><para> |
339 | | * F. Radicchi, C. Castellano, F. Cecconi, V. Loreto, and D. Parisi, |
340 | | * PNAS 101, 2658 (2004). |
341 | | * https://doi.org/10.1073/pnas.0400054101 |
342 | | * |
343 | | * \param graph The input graph. |
344 | | * \param res Initialized vector, the result will be stored here. |
345 | | * \param eids The edges for which the edge clustering coefficient will be computed. |
346 | | * \param k Size of cycles to use in calculation. Must be at least 3. Currently |
347 | | * only values of 3 and 4 are supported. |
348 | | * \param offset Boolean, whether to add one to cycle counts. When \c false, |
349 | | * <code>z^(k)</code> is used instead of <code>z^(k) + 1</code>. In this case |
350 | | * the maximum value of the normalized metric is 1. For <code>k=3</code> this |
351 | | * is achieved for all edges in a complete graph. |
352 | | * \param normalize Boolean, whether to normalize cycle counts by the maximum |
353 | | * possible count <code>s^(k)</code> given the degrees. |
354 | | * \return Error code. |
355 | | * |
356 | | * Time complexity: When \p k is 3, O(|V| d log d + |E| d). |
357 | | * When \p k is 4, O(|V| d log d + |E| d^2). d denotes the degree of vertices. |
358 | | */ |
359 | | igraph_error_t igraph_ecc(const igraph_t *graph, igraph_vector_t *res, |
360 | | const igraph_es_t eids, igraph_int_t k, |
361 | 3.15k | igraph_bool_t offset, igraph_bool_t normalize) { |
362 | | |
363 | 3.15k | if (k < 3) { |
364 | 0 | IGRAPH_ERRORF("Cycle size for edge clustering coefficient must be at least 3, got %" IGRAPH_PRId ".", |
365 | 0 | IGRAPH_EINVAL, k); |
366 | 0 | } |
367 | | |
368 | 3.15k | switch (k) { |
369 | 3.15k | case 3: |
370 | 3.15k | if (igraph_es_is_all(&eids)) { |
371 | 3.15k | return igraph_i_ecc3_1(graph, res, eids, offset, normalize); |
372 | 3.15k | } else { |
373 | 0 | return igraph_i_ecc3_2(graph, res, eids, offset, normalize); |
374 | 0 | } |
375 | 0 | case 4: |
376 | 0 | if (igraph_es_is_all(&eids)) { |
377 | 0 | return igraph_i_ecc4_1(graph, res, eids, offset, normalize); |
378 | 0 | } else { |
379 | 0 | return igraph_i_ecc4_2(graph, res, eids, offset, normalize); |
380 | 0 | } |
381 | 0 | default: |
382 | 0 | IGRAPH_ERROR("Edge clustering coefficient calculation is only implemented for cycle sizes 3 and 4.", |
383 | 3.15k | IGRAPH_UNIMPLEMENTED); |
384 | 3.15k | } |
385 | 3.15k | } |