/src/igraph/src/community/fast_modularity.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | IGraph library. |
3 | | Copyright (C) 2007-2012 Gabor Csardi <csardi.gabor@gmail.com> |
4 | | 334 Harvard street, Cambridge, MA 02139 USA |
5 | | |
6 | | This program is free software; you can redistribute it and/or modify |
7 | | it under the terms of the GNU General Public License as published by |
8 | | the Free Software Foundation; either version 2 of the License, or |
9 | | (at your option) any later version. |
10 | | |
11 | | This program is distributed in the hope that it will be useful, |
12 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | | GNU General Public License for more details. |
15 | | |
16 | | You should have received a copy of the GNU General Public License |
17 | | along with this program; if not, write to the Free Software |
18 | | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
19 | | 02110-1301 USA |
20 | | |
21 | | */ |
22 | | |
23 | | #include "igraph_community.h" |
24 | | |
25 | | #include "igraph_memory.h" |
26 | | #include "igraph_iterators.h" |
27 | | #include "igraph_interface.h" |
28 | | #include "igraph_progress.h" |
29 | | #include "igraph_structural.h" |
30 | | #include "igraph_vector_ptr.h" |
31 | | |
32 | | #include "core/interruption.h" |
33 | | |
34 | | /* #define IGRAPH_FASTCOMM_DEBUG */ |
35 | | |
36 | | #ifdef IGRAPH_FASTCOMM_DEBUG |
37 | | #define debug(...) fprintf(stderr, __VA_ARGS__) |
38 | | #else |
39 | | #define debug(...) |
40 | | #endif |
41 | | |
42 | | /* |
43 | | * Implementation of the community structure algorithm originally published |
44 | | * by Clauset et al in: |
45 | | * |
46 | | * A. Clauset, M.E.J. Newman and C. Moore, "Finding community structure in |
47 | | * very large networks.". Phys. Rev. E 70, 066111 (2004). |
48 | | * |
49 | | * The data structures being used are slightly different and they are described |
50 | | * most closely in: |
51 | | * |
52 | | * K. Wakita, T. Tsurumi, "Finding community structure in mega-scale social |
53 | | * networks.". arXiv:cs/0702048v1. |
54 | | * |
55 | | * We maintain a vector of communities, each of which containing a list of |
56 | | * pointers to their neighboring communities along with the increase in the |
57 | | * modularity score that could be achieved by joining the two communities. |
58 | | * Each community has a pointer to one of its neighbors - the one which would |
59 | | * result in the highest increase in modularity after a join. The local |
60 | | * (community-level) maximums are also stored in an indexed max-heap. The |
61 | | * max-heap itself stores its elements in an array which satisfies the heap |
62 | | * property, but to allow us to access any of the elements in the array based |
63 | | * on the community index (and not based on the array index - which depends on |
64 | | * the element's actual position in the heap), we also maintain an index |
65 | | * vector in the heap: the ith element of the index vector contains the |
66 | | * position of community i in the array of the max-heap. When we perform |
67 | | * sifting operations on the heap to restore the heap property, we also maintain |
68 | | * the index vector. |
69 | | */ |
70 | | |
71 | | /* Structure storing a pair of communities along with their dQ values */ |
72 | | typedef struct s_igraph_i_fastgreedy_commpair { |
73 | | igraph_integer_t first; /* first member of the community pair */ |
74 | | igraph_integer_t second; /* second member of the community pair */ |
75 | | igraph_real_t *dq; /* pointer to a member of the dq vector storing the */ |
76 | | /* increase in modularity achieved when joining */ |
77 | | struct s_igraph_i_fastgreedy_commpair *opposite; |
78 | | } igraph_i_fastgreedy_commpair; |
79 | | |
80 | | /* Structure storing a community */ |
81 | | typedef struct { |
82 | | igraph_integer_t id; /* Identifier of the community (for merges matrix) */ |
83 | | igraph_integer_t size; /* Size of the community */ |
84 | | igraph_vector_ptr_t neis; /* references to neighboring communities */ |
85 | | igraph_i_fastgreedy_commpair *maxdq; /* community pair with maximal dq */ |
86 | | } igraph_i_fastgreedy_community; |
87 | | |
88 | | /* Global community list structure */ |
89 | | typedef struct { |
90 | | igraph_integer_t no_of_communities, n; /* number of communities, number of vertices */ |
91 | | igraph_i_fastgreedy_community *e; /* list of communities */ |
92 | | igraph_i_fastgreedy_community **heap; /* heap of communities */ |
93 | | igraph_integer_t *heapindex; /* heap index to speed up lookup by community idx */ |
94 | | } igraph_i_fastgreedy_community_list; |
95 | | |
96 | | /* Scans the community neighborhood list for the new maximal dq value. |
97 | | * Returns true if the maximum is different from the previous one, |
98 | | * false otherwise. */ |
99 | 229k | static igraph_bool_t igraph_i_fastgreedy_community_rescan_max(igraph_i_fastgreedy_community *comm) { |
100 | 229k | igraph_integer_t i, n; |
101 | 229k | igraph_i_fastgreedy_commpair *p, *best; |
102 | 229k | igraph_real_t bestdq, currdq; |
103 | | |
104 | 229k | n = igraph_vector_ptr_size(&comm->neis); |
105 | 229k | if (n == 0) { |
106 | 3.28k | comm->maxdq = NULL; |
107 | 3.28k | return true; |
108 | 3.28k | } |
109 | | |
110 | 226k | best = (igraph_i_fastgreedy_commpair*)VECTOR(comm->neis)[0]; |
111 | 226k | bestdq = *best->dq; |
112 | 628k | for (i = 1; i < n; i++) { |
113 | 401k | p = (igraph_i_fastgreedy_commpair*)VECTOR(comm->neis)[i]; |
114 | 401k | currdq = *p->dq; |
115 | 401k | if (currdq > bestdq) { |
116 | 76.0k | best = p; |
117 | 76.0k | bestdq = currdq; |
118 | 76.0k | } |
119 | 401k | } |
120 | | |
121 | 226k | if (best != comm->maxdq) { |
122 | 55.3k | comm->maxdq = best; |
123 | 55.3k | return true; |
124 | 171k | } else { |
125 | 171k | return false; |
126 | 171k | } |
127 | 226k | } |
128 | | |
129 | | /* Destroys the global community list object */ |
130 | | static void igraph_i_fastgreedy_community_list_destroy( |
131 | 3.30k | igraph_i_fastgreedy_community_list *list) { |
132 | 3.30k | igraph_integer_t i; |
133 | 151k | for (i = 0; i < list->n; i++) { |
134 | 148k | igraph_vector_ptr_destroy(&list->e[i].neis); |
135 | 148k | } |
136 | 3.30k | IGRAPH_FREE(list->e); |
137 | 3.30k | if (list->heapindex != NULL) { |
138 | 3.30k | IGRAPH_FREE(list->heapindex); |
139 | 3.30k | } |
140 | 3.30k | if (list->heap != NULL) { |
141 | 3.30k | IGRAPH_FREE(list->heap); |
142 | 3.30k | } |
143 | 3.30k | } |
144 | | |
145 | | /* Community list heap maintenance: sift down */ |
146 | | static void igraph_i_fastgreedy_community_list_sift_down( |
147 | 315k | igraph_i_fastgreedy_community_list *list, igraph_integer_t idx) { |
148 | 315k | igraph_integer_t root, child, c1, c2; |
149 | 315k | igraph_i_fastgreedy_community *dummy; |
150 | 315k | igraph_integer_t dummy2; |
151 | 315k | igraph_i_fastgreedy_community** heap = list->heap; |
152 | 315k | igraph_integer_t *heapindex = list->heapindex; |
153 | | |
154 | 315k | root = idx; |
155 | 492k | while (root * 2 + 1 < list->no_of_communities) { |
156 | 320k | child = root * 2 + 1; |
157 | 320k | if (child + 1 < list->no_of_communities && |
158 | 320k | *heap[child]->maxdq->dq < *heap[child + 1]->maxdq->dq) { |
159 | 111k | child++; |
160 | 111k | } |
161 | 320k | if (*heap[root]->maxdq->dq < *heap[child]->maxdq->dq) { |
162 | 177k | c1 = heap[root]->maxdq->first; |
163 | 177k | c2 = heap[child]->maxdq->first; |
164 | | |
165 | 177k | dummy = heap[root]; |
166 | 177k | heap[root] = heap[child]; |
167 | 177k | heap[child] = dummy; |
168 | | |
169 | 177k | dummy2 = heapindex[c1]; |
170 | 177k | heapindex[c1] = heapindex[c2]; |
171 | 177k | heapindex[c2] = dummy2; |
172 | | |
173 | 177k | root = child; |
174 | 177k | } else { |
175 | 142k | break; |
176 | 142k | } |
177 | 320k | } |
178 | 315k | } |
179 | | |
180 | | /* Community list heap maintenance: sift up */ |
181 | | static void igraph_i_fastgreedy_community_list_sift_up( |
182 | 3.33k | igraph_i_fastgreedy_community_list *list, igraph_integer_t idx) { |
183 | 3.33k | igraph_integer_t root, parent, c1, c2; |
184 | 3.33k | igraph_i_fastgreedy_community *dummy; |
185 | 3.33k | igraph_integer_t dummy2; |
186 | 3.33k | igraph_i_fastgreedy_community** heap = list->heap; |
187 | 3.33k | igraph_integer_t *heapindex = list->heapindex; |
188 | | |
189 | 3.33k | root = idx; |
190 | 6.83k | while (root > 0) { |
191 | 5.57k | parent = (root - 1) / 2; |
192 | 5.57k | if (*heap[parent]->maxdq->dq < *heap[root]->maxdq->dq) { |
193 | 3.49k | c1 = heap[root]->maxdq->first; |
194 | 3.49k | c2 = heap[parent]->maxdq->first; |
195 | | |
196 | 3.49k | dummy = heap[parent]; |
197 | 3.49k | heap[parent] = heap[root]; |
198 | 3.49k | heap[root] = dummy; |
199 | | |
200 | 3.49k | dummy2 = heapindex[c1]; |
201 | 3.49k | heapindex[c1] = heapindex[c2]; |
202 | 3.49k | heapindex[c2] = dummy2; |
203 | | |
204 | 3.49k | root = parent; |
205 | 3.49k | } else { |
206 | 2.08k | break; |
207 | 2.08k | } |
208 | 5.57k | } |
209 | 3.33k | } |
210 | | |
211 | | /* Builds the community heap for the first time */ |
212 | | static void igraph_i_fastgreedy_community_list_build_heap( |
213 | 3.30k | igraph_i_fastgreedy_community_list *list) { |
214 | 3.30k | igraph_integer_t i; |
215 | 27.9k | for (i = list->no_of_communities / 2 - 1; i >= 0; i--) { |
216 | 24.6k | igraph_i_fastgreedy_community_list_sift_down(list, i); |
217 | 24.6k | } |
218 | 3.30k | } |
219 | | |
220 | | /* Finds the element belonging to a given community in the heap and return its |
221 | | * index in the heap array */ |
222 | 273k | #define igraph_i_fastgreedy_community_list_find_in_heap(list, idx) (list)->heapindex[idx] |
223 | | |
224 | | /* Dumps the heap - for debugging purposes */ |
225 | | /* |
226 | | static void igraph_i_fastgreedy_community_list_dump_heap( |
227 | | igraph_i_fastgreedy_community_list *list) { |
228 | | igraph_integer_t i; |
229 | | debug("Heap:\n"); |
230 | | for (i = 0; i < list->no_of_communities; i++) { |
231 | | debug("(%ld, %p, %p)", i, list->heap[i], |
232 | | list->heap[i]->maxdq); |
233 | | if (list->heap[i]->maxdq) { |
234 | | debug(" (%" IGRAPH_PRId ", %" IGRAPH_PRId ", %.7f)", list->heap[i]->maxdq->first, |
235 | | list->heap[i]->maxdq->second, *list->heap[i]->maxdq->dq); |
236 | | } |
237 | | debug("\n"); |
238 | | } |
239 | | debug("Heap index:\n"); |
240 | | for (i = 0; i < list->no_of_communities; i++) { |
241 | | debug("%" IGRAPH_PRId " ", list->heapindex[i]); |
242 | | } |
243 | | debug("\nEND\n"); |
244 | | } |
245 | | */ |
246 | | |
247 | | /* Checks if the community heap satisfies the heap property. |
248 | | * Only useful for debugging. */ |
249 | | /* |
250 | | static void igraph_i_fastgreedy_community_list_check_heap( |
251 | | igraph_i_fastgreedy_community_list *list) { |
252 | | igraph_integer_t i; |
253 | | for (i = 0; i < list->no_of_communities / 2; i++) { |
254 | | if ((2 * i + 1 < list->no_of_communities && *list->heap[i]->maxdq->dq < *list->heap[2 * i + 1]->maxdq->dq) || |
255 | | (2 * i + 2 < list->no_of_communities && *list->heap[i]->maxdq->dq < *list->heap[2 * i + 2]->maxdq->dq)) { |
256 | | IGRAPH_WARNING("Heap property violated"); |
257 | | debug("Position: %" IGRAPH_PRId ", %" IGRAPH_PRId " and %" IGRAPH_PRId "\n", i, 2 * i + 1, 2 * i + 2); |
258 | | igraph_i_fastgreedy_community_list_dump_heap(list); |
259 | | } |
260 | | } |
261 | | } |
262 | | */ |
263 | | |
264 | | /* Removes a given element from the heap */ |
265 | | static void igraph_i_fastgreedy_community_list_remove( |
266 | 40.9k | igraph_i_fastgreedy_community_list *list, igraph_integer_t idx) { |
267 | 40.9k | igraph_real_t old; |
268 | 40.9k | igraph_integer_t commidx; |
269 | | |
270 | | /* First adjust the index */ |
271 | 40.9k | commidx = list->heap[list->no_of_communities - 1]->maxdq->first; |
272 | 40.9k | list->heapindex[commidx] = idx; |
273 | 40.9k | commidx = list->heap[idx]->maxdq->first; |
274 | 40.9k | list->heapindex[commidx] = -1; |
275 | | |
276 | | /* Now remove the element */ |
277 | 40.9k | old = *list->heap[idx]->maxdq->dq; |
278 | 40.9k | list->heap[idx] = list->heap[list->no_of_communities - 1]; |
279 | 40.9k | list->no_of_communities--; |
280 | | |
281 | | /* Recover heap property */ |
282 | 40.9k | if (old > *list->heap[idx]->maxdq->dq) { |
283 | 40.6k | igraph_i_fastgreedy_community_list_sift_down(list, idx); |
284 | 40.6k | } else { |
285 | 266 | igraph_i_fastgreedy_community_list_sift_up(list, idx); |
286 | 266 | } |
287 | 40.9k | } |
288 | | |
289 | | /* Removes a given element from the heap when there are no more neighbors |
290 | | * for it (comm->maxdq is NULL) */ |
291 | | static void igraph_i_fastgreedy_community_list_remove2( |
292 | 3.28k | igraph_i_fastgreedy_community_list *list, igraph_integer_t idx, igraph_integer_t comm) { |
293 | 3.28k | igraph_integer_t i; |
294 | | |
295 | 3.28k | if (idx == list->no_of_communities - 1) { |
296 | | /* We removed the rightmost element on the bottom level, no problem, |
297 | | * there's nothing to be done */ |
298 | 72 | list->heapindex[comm] = -1; |
299 | 72 | list->no_of_communities--; |
300 | 72 | return; |
301 | 72 | } |
302 | | |
303 | | /* First adjust the index */ |
304 | 3.21k | i = list->heap[list->no_of_communities - 1]->maxdq->first; |
305 | 3.21k | list->heapindex[i] = idx; |
306 | 3.21k | list->heapindex[comm] = -1; |
307 | | |
308 | | /* Now remove the element */ |
309 | 3.21k | list->heap[idx] = list->heap[list->no_of_communities - 1]; |
310 | 3.21k | list->no_of_communities--; |
311 | | |
312 | | /* Recover heap property */ |
313 | 26.7k | for (i = list->no_of_communities / 2 - 1; i >= 0; i--) { |
314 | 23.5k | igraph_i_fastgreedy_community_list_sift_down(list, i); |
315 | 23.5k | } |
316 | 3.21k | } |
317 | | |
318 | | /* Removes the pair belonging to community k from the neighborhood list |
319 | | * of community c (that is, clist[c]) and recalculates maxdq */ |
320 | | static void igraph_i_fastgreedy_community_remove_nei( |
321 | 56.6k | igraph_i_fastgreedy_community_list *list, igraph_integer_t c, igraph_integer_t k) { |
322 | 56.6k | igraph_integer_t i, n; |
323 | 56.6k | igraph_bool_t rescan = false; |
324 | 56.6k | igraph_i_fastgreedy_commpair *p; |
325 | 56.6k | igraph_i_fastgreedy_community *comm; |
326 | 56.6k | igraph_real_t olddq; |
327 | | |
328 | 56.6k | comm = &list->e[c]; |
329 | 56.6k | n = igraph_vector_ptr_size(&comm->neis); |
330 | 256k | for (i = 0; i < n; i++) { |
331 | 256k | p = (igraph_i_fastgreedy_commpair*)VECTOR(comm->neis)[i]; |
332 | 256k | if (p->second == k) { |
333 | | /* Check current maxdq */ |
334 | 56.6k | if (comm->maxdq == p) { |
335 | 42.9k | rescan = true; |
336 | 42.9k | } |
337 | 56.6k | break; |
338 | 56.6k | } |
339 | 256k | } |
340 | 56.6k | if (i < n) { |
341 | 56.6k | olddq = *comm->maxdq->dq; |
342 | 56.6k | igraph_vector_ptr_remove(&comm->neis, i); |
343 | 56.6k | if (rescan) { |
344 | 42.9k | igraph_i_fastgreedy_community_rescan_max(comm); |
345 | 42.9k | i = igraph_i_fastgreedy_community_list_find_in_heap(list, c); |
346 | 42.9k | if (comm->maxdq) { |
347 | 39.6k | if (*comm->maxdq->dq > olddq) { |
348 | 382 | igraph_i_fastgreedy_community_list_sift_up(list, i); |
349 | 39.3k | } else { |
350 | 39.3k | igraph_i_fastgreedy_community_list_sift_down(list, i); |
351 | 39.3k | } |
352 | 39.6k | } else { |
353 | | /* no more neighbors for this community. we should remove this |
354 | | * community from the heap and restore the heap property */ |
355 | 3.28k | debug("REMOVING (NO MORE NEIS): %" IGRAPH_PRId "\n", i); |
356 | 3.28k | igraph_i_fastgreedy_community_list_remove2(list, i, c); |
357 | 3.28k | } |
358 | 42.9k | } |
359 | 56.6k | } |
360 | 56.6k | } |
361 | | |
362 | | /* Auxiliary function to sort a community pair list with respect to the |
363 | | * `second` field */ |
364 | 136k | static int igraph_i_fastgreedy_commpair_cmp(const void *p1, const void *p2) { |
365 | 136k | igraph_i_fastgreedy_commpair *cp1, *cp2; |
366 | 136k | igraph_integer_t diff; |
367 | 136k | cp1 = *(igraph_i_fastgreedy_commpair**)p1; |
368 | 136k | cp2 = *(igraph_i_fastgreedy_commpair**)p2; |
369 | 136k | diff = cp1->second - cp2->second; |
370 | 136k | return (diff < 0) ? -1 : (diff > 0) ? 1 : 0; |
371 | 136k | } |
372 | | |
373 | | /* Sorts the neighbor list of the community with the given index, optionally |
374 | | * optimizing the process if we know that the list is nearly sorted and only |
375 | | * a given pair is in the wrong place. */ |
376 | | static void igraph_i_fastgreedy_community_sort_neighbors_of( |
377 | | igraph_i_fastgreedy_community_list *list, igraph_integer_t index, |
378 | 265k | igraph_i_fastgreedy_commpair *changed_pair) { |
379 | 265k | igraph_vector_ptr_t *vec; |
380 | 265k | igraph_integer_t i, n; |
381 | 265k | igraph_bool_t can_skip_sort = false; |
382 | 265k | igraph_i_fastgreedy_commpair *other_pair; |
383 | | |
384 | 265k | vec = &list->e[index].neis; |
385 | 265k | if (changed_pair != NULL) { |
386 | | /* Optimized sorting */ |
387 | | |
388 | | /* First we look for changed_pair in vec */ |
389 | 116k | n = igraph_vector_ptr_size(vec); |
390 | 222k | for (i = 0; i < n; i++) { |
391 | 222k | if (VECTOR(*vec)[i] == changed_pair) { |
392 | 116k | break; |
393 | 116k | } |
394 | 222k | } |
395 | | |
396 | | /* Did we find it? We should have -- otherwise it's a bug */ |
397 | 116k | IGRAPH_ASSERT(i < n); |
398 | | |
399 | | /* Okay, the pair that changed is at index i. We need to figure out where |
400 | | * its new place should be. We can simply try moving the item all the way |
401 | | * to the left as long as the comparison function tells so (since the |
402 | | * rest of the vector is sorted), and then move all the way to the right |
403 | | * as long as the comparison function tells so, and we will be okay. */ |
404 | | |
405 | | /* Shifting to the left */ |
406 | 145k | while (i > 0) { |
407 | 58.2k | other_pair = VECTOR(*vec)[i - 1]; |
408 | 58.2k | if (other_pair->second > changed_pair->second) { |
409 | 29.3k | VECTOR(*vec)[i] = other_pair; |
410 | 29.3k | i--; |
411 | 29.3k | } else { |
412 | 28.8k | break; |
413 | 28.8k | } |
414 | 58.2k | } |
415 | 116k | VECTOR(*vec)[i] = changed_pair; |
416 | | |
417 | | /* Shifting to the right */ |
418 | 155k | while (i < n - 1) { |
419 | 75.4k | other_pair = VECTOR(*vec)[i + 1]; |
420 | 75.4k | if (other_pair->second < changed_pair->second) { |
421 | 38.5k | VECTOR(*vec)[i] = other_pair; |
422 | 38.5k | i++; |
423 | 38.5k | } else { |
424 | 36.8k | break; |
425 | 36.8k | } |
426 | 75.4k | } |
427 | 116k | VECTOR(*vec)[i] = changed_pair; |
428 | | |
429 | | /* Mark that we don't need a full sort */ |
430 | 116k | can_skip_sort = true; |
431 | 116k | } |
432 | | |
433 | 265k | if (!can_skip_sort) { |
434 | | /* Fallback to full sorting */ |
435 | 148k | igraph_vector_ptr_sort(vec, igraph_i_fastgreedy_commpair_cmp); |
436 | 148k | } |
437 | 265k | } |
438 | | |
439 | | /* Updates the dq value of community pair p in the community with index p->first |
440 | | * of the community list clist to newdq and restores the heap property |
441 | | * in community c if necessary. Returns 1 if the maximum in the row had |
442 | | * to be updated, zero otherwise */ |
443 | | static igraph_bool_t igraph_i_fastgreedy_community_update_dq( |
444 | | igraph_i_fastgreedy_community_list *list, |
445 | 276k | igraph_i_fastgreedy_commpair *p, igraph_real_t newdq) { |
446 | | |
447 | 276k | igraph_integer_t i, j, to, from; |
448 | 276k | igraph_real_t olddq; |
449 | 276k | igraph_i_fastgreedy_community *comm_to, *comm_from; |
450 | 276k | to = p->first; from = p->second; |
451 | 276k | comm_to = &list->e[to]; |
452 | 276k | comm_from = &list->e[from]; |
453 | 276k | if (comm_to->maxdq == p && newdq >= *p->dq) { |
454 | | /* If we are adjusting the current maximum and it is increased, we don't |
455 | | * have to re-scan for the new maximum */ |
456 | 141 | *p->dq = newdq; |
457 | | /* The maximum was increased, so perform a sift-up in the heap */ |
458 | 141 | i = igraph_i_fastgreedy_community_list_find_in_heap(list, to); |
459 | 141 | igraph_i_fastgreedy_community_list_sift_up(list, i); |
460 | | /* Let's check the opposite side. If the pair was not the maximal in |
461 | | * the opposite side (the other community list)... */ |
462 | 141 | if (comm_from->maxdq != p->opposite) { |
463 | 58 | if (*comm_from->maxdq->dq < newdq) { |
464 | | /* ...and it will become the maximal, we need to adjust and sift up */ |
465 | 50 | comm_from->maxdq = p->opposite; |
466 | 50 | j = igraph_i_fastgreedy_community_list_find_in_heap(list, from); |
467 | 50 | igraph_i_fastgreedy_community_list_sift_up(list, j); |
468 | 50 | } else { |
469 | | /* The pair was not the maximal in the opposite side and it will |
470 | | * NOT become the maximal, there's nothing to do there */ |
471 | 8 | } |
472 | 83 | } else { |
473 | | /* The pair was maximal in the opposite side, so we need to sift it up |
474 | | * with the new value */ |
475 | 83 | j = igraph_i_fastgreedy_community_list_find_in_heap(list, from); |
476 | 83 | igraph_i_fastgreedy_community_list_sift_up(list, j); |
477 | 83 | } |
478 | 141 | return true; |
479 | 276k | } else if (comm_to->maxdq != p && (newdq <= *comm_to->maxdq->dq)) { |
480 | | /* If we are modifying an item which is not the current maximum, and the |
481 | | * new value is less than the current maximum, we don't |
482 | | * have to re-scan for the new maximum */ |
483 | 265k | olddq = *p->dq; |
484 | 265k | *p->dq = newdq; |
485 | | /* However, if the item was the maximum on the opposite side, we'd better |
486 | | * re-scan it */ |
487 | 265k | if (comm_from->maxdq == p->opposite) { |
488 | 167k | if (olddq > newdq) { |
489 | | /* Decreased the maximum on the other side, we have to re-scan for the |
490 | | * new maximum */ |
491 | 167k | igraph_i_fastgreedy_community_rescan_max(comm_from); |
492 | 167k | j = igraph_i_fastgreedy_community_list_find_in_heap(list, from); |
493 | 167k | igraph_i_fastgreedy_community_list_sift_down(list, j); |
494 | 167k | } else { |
495 | | /* Increased the maximum on the other side, we don't have to re-scan |
496 | | * but we might have to sift up */ |
497 | 412 | j = igraph_i_fastgreedy_community_list_find_in_heap(list, from); |
498 | 412 | igraph_i_fastgreedy_community_list_sift_up(list, j); |
499 | 412 | } |
500 | 167k | } |
501 | 265k | return false; |
502 | 265k | } else { |
503 | | /* We got here in two cases: |
504 | | (1) the pair we are modifying right now is the maximum in the given |
505 | | community and we are decreasing it |
506 | | (2) the pair we are modifying right now is NOT the maximum in the |
507 | | given community, but we increase it so much that it will become |
508 | | the new maximum |
509 | | */ |
510 | 10.9k | *p->dq = newdq; |
511 | 10.9k | if (comm_to->maxdq != p) { |
512 | | /* case (2) */ |
513 | 948 | comm_to->maxdq = p; |
514 | | /* The maximum was increased, so perform a sift-up in the heap */ |
515 | 948 | i = igraph_i_fastgreedy_community_list_find_in_heap(list, to); |
516 | 948 | igraph_i_fastgreedy_community_list_sift_up(list, i); |
517 | | /* Opposite side. Chances are that the new value became the maximum |
518 | | * in the opposite side, but check it first */ |
519 | 948 | if (comm_from->maxdq != p->opposite) { |
520 | 589 | if (*comm_from->maxdq->dq < newdq) { |
521 | | /* Yes, it will become the new maximum */ |
522 | 569 | comm_from->maxdq = p->opposite; |
523 | 569 | j = igraph_i_fastgreedy_community_list_find_in_heap(list, from); |
524 | 569 | igraph_i_fastgreedy_community_list_sift_up(list, j); |
525 | 569 | } else { |
526 | | /* No, nothing to do there */ |
527 | 20 | } |
528 | 589 | } else { |
529 | | /* Already increased the maximum on the opposite side, so sift it up */ |
530 | 359 | j = igraph_i_fastgreedy_community_list_find_in_heap(list, from); |
531 | 359 | igraph_i_fastgreedy_community_list_sift_up(list, j); |
532 | 359 | } |
533 | 10.0k | } else { |
534 | | /* case (1) */ |
535 | | /* This is the worst, we have to re-scan the whole community to find |
536 | | * the new maximum and update the global maximum as well if necessary */ |
537 | 10.0k | igraph_i_fastgreedy_community_rescan_max(comm_to); |
538 | | /* The maximum was decreased, so perform a sift-down in the heap */ |
539 | 10.0k | i = igraph_i_fastgreedy_community_list_find_in_heap(list, to); |
540 | 10.0k | igraph_i_fastgreedy_community_list_sift_down(list, i); |
541 | 10.0k | if (comm_from->maxdq != p->opposite) { |
542 | | /* The one that we decreased on the opposite side is not the |
543 | | * maximal one. Nothing to do. */ |
544 | 9.89k | } else { |
545 | | /* We decreased the maximal on the opposite side as well. Re-scan |
546 | | * and sift down */ |
547 | 9.89k | igraph_i_fastgreedy_community_rescan_max(comm_from); |
548 | 9.89k | j = igraph_i_fastgreedy_community_list_find_in_heap(list, from); |
549 | 9.89k | igraph_i_fastgreedy_community_list_sift_down(list, j); |
550 | 9.89k | } |
551 | 10.0k | } |
552 | 10.9k | } |
553 | 10.9k | return true; |
554 | 276k | } |
555 | | |
556 | | /** |
557 | | * \function igraph_community_fastgreedy |
558 | | * \brief Finding community structure by greedy optimization of modularity. |
559 | | * |
560 | | * This function implements the fast greedy modularity optimization |
561 | | * algorithm for finding community structure, see |
562 | | * A Clauset, MEJ Newman, C Moore: Finding community structure in very |
563 | | * large networks, http://www.arxiv.org/abs/cond-mat/0408187 for the |
564 | | * details. |
565 | | * |
566 | | * </para><para> |
567 | | * Some improvements proposed in K Wakita, T Tsurumi: Finding community |
568 | | * structure in mega-scale social networks, |
569 | | * http://www.arxiv.org/abs/cs.CY/0702048v1 have also been implemented. |
570 | | * |
571 | | * \param graph The input graph. It must be a graph without multiple edges. |
572 | | * This is checked and an error message is given for graphs with multiple |
573 | | * edges. |
574 | | * \param weights Potentially a numeric vector containing edge |
575 | | * weights. Supply a null pointer here for unweighted graphs. The |
576 | | * weights are expected to be non-negative. |
577 | | * \param merges Pointer to an initialized matrix or \c NULL, the result of the |
578 | | * computation is stored here. The matrix has two columns and each |
579 | | * merge corresponds to one merge, the IDs of the two merged |
580 | | * components are stored. The component IDs are numbered from zero and |
581 | | * the first \c n components are the individual vertices, \c n is |
582 | | * the number of vertices in the graph. Component \c n is created |
583 | | * in the first merge, component <code>n+1</code> in the second merge, etc. |
584 | | * The matrix will be resized as needed. If this argument is \c NULL |
585 | | * then it is ignored completely. |
586 | | * \param modularity Pointer to an initialized vector or \c NULL pointer, |
587 | | * in the former case the modularity scores along the stages of the |
588 | | * computation are recorded here. The vector will be resized as |
589 | | * needed. |
590 | | * \param membership Pointer to a vector. If not a null pointer, then |
591 | | * the membership vector corresponding to the best split (in terms |
592 | | * of modularity) is stored here. |
593 | | * \return Error code. |
594 | | * |
595 | | * \sa \ref igraph_community_walktrap(), \ref |
596 | | * igraph_community_edge_betweenness() for other community detection |
597 | | * algorithms, \ref igraph_community_to_membership() to convert the |
598 | | * dendrogram to a membership vector. |
599 | | * |
600 | | * Time complexity: O(|E||V|log|V|) in the worst case, |
601 | | * O(|E|+|V|log^2|V|) typically, |V| is the number of vertices, |E| is |
602 | | * the number of edges. |
603 | | * |
604 | | * \example examples/simple/igraph_community_fastgreedy.c |
605 | | */ |
606 | | igraph_error_t igraph_community_fastgreedy(const igraph_t *graph, |
607 | | const igraph_vector_t *weights, |
608 | | igraph_matrix_int_t *merges, |
609 | | igraph_vector_t *modularity, |
610 | 3.30k | igraph_vector_int_t *membership) { |
611 | 3.30k | igraph_integer_t no_of_edges, no_of_nodes, no_of_joins, total_joins; |
612 | 3.30k | igraph_integer_t i, j, k, n, m, from, to, dummy, best_no_of_joins; |
613 | 3.30k | igraph_eit_t edgeit; |
614 | 3.30k | igraph_i_fastgreedy_commpair *pairs, *p1, *p2; |
615 | 3.30k | igraph_i_fastgreedy_community_list communities; |
616 | 3.30k | igraph_vector_t a; |
617 | 3.30k | igraph_vector_int_t degrees; |
618 | 3.30k | igraph_real_t q, *dq, bestq, weight_sum, loop_weight_sum; |
619 | 3.30k | igraph_bool_t has_multiple; |
620 | 3.30k | igraph_matrix_int_t merges_local; |
621 | | |
622 | | /*igraph_integer_t join_order[] = { 16,5, 5,6, 6,0, 4,0, 10,0, 26,29, 29,33, 23,33, 27,33, 25,24, 24,31, 12,3, 21,1, 30,8, 8,32, 9,2, 17,1, 11,0, 7,3, 3,2, 13,2, 1,2, 28,31, 31,33, 22,32, 18,32, 20,32, 32,33, 15,33, 14,33, 0,19, 19,2, -1,-1 };*/ |
623 | | /*igraph_integer_t join_order[] = { 43,42, 42,41, 44,41, 41,36, 35,36, 37,36, 36,29, 38,29, 34,29, 39,29, 33,29, 40,29, 32,29, 14,29, 30,29, 31,29, 6,18, 18,4, 23,4, 21,4, 19,4, 27,4, 20,4, 22,4, 26,4, 25,4, 24,4, 17,4, 0,13, 13,2, 1,2, 11,2, 8,2, 5,2, 3,2, 10,2, 9,2, 7,2, 2,28, 28,15, 12,15, 29,16, 4,15, -1,-1 };*/ |
624 | | |
625 | 3.30k | no_of_nodes = igraph_vcount(graph); |
626 | 3.30k | no_of_edges = igraph_ecount(graph); |
627 | | |
628 | 3.30k | if (igraph_is_directed(graph)) { |
629 | 0 | IGRAPH_ERROR("Fast greedy community detection works on undirected graphs only.", IGRAPH_UNIMPLEMENTED); |
630 | 0 | } |
631 | | |
632 | 3.30k | total_joins = no_of_nodes > 0 ? no_of_nodes - 1 : 0; |
633 | | |
634 | 3.30k | if (weights) { |
635 | 3.30k | if (igraph_vector_size(weights) != no_of_edges) { |
636 | 0 | IGRAPH_ERROR("Length of weight vector must agree with number of edges.", IGRAPH_EINVAL); |
637 | 0 | } |
638 | 3.30k | if (no_of_edges > 0) { |
639 | 3.22k | igraph_real_t minweight = igraph_vector_min(weights); |
640 | 3.22k | if (minweight < 0) { |
641 | 0 | IGRAPH_ERROR("Weights must not be negative.", IGRAPH_EINVAL); |
642 | 0 | } |
643 | 3.22k | if (isnan(minweight)) { |
644 | 0 | IGRAPH_ERROR("Weights must not be NaN.", IGRAPH_EINVAL); |
645 | 0 | } |
646 | 3.22k | } |
647 | 3.30k | weight_sum = igraph_vector_sum(weights); |
648 | 3.30k | } else { |
649 | 0 | weight_sum = no_of_edges; |
650 | 0 | } |
651 | | |
652 | 3.30k | IGRAPH_CHECK(igraph_has_multiple(graph, &has_multiple)); |
653 | 3.30k | if (has_multiple) { |
654 | 0 | IGRAPH_ERROR("Fast greedy community detection works only on graphs without multi-edges.", IGRAPH_EINVAL); |
655 | 0 | } |
656 | | |
657 | 3.30k | if (membership != NULL && merges == NULL) { |
658 | | /* We need the merge matrix because the user wants the membership |
659 | | * vector, so we allocate one on our own */ |
660 | 0 | IGRAPH_CHECK(igraph_matrix_int_init(&merges_local, total_joins, 2)); |
661 | 0 | IGRAPH_FINALLY(igraph_matrix_int_destroy, &merges_local); |
662 | 0 | merges = &merges_local; |
663 | 0 | } |
664 | | |
665 | 3.30k | if (merges != NULL) { |
666 | 3.30k | IGRAPH_CHECK(igraph_matrix_int_resize(merges, total_joins, 2)); |
667 | 3.30k | igraph_matrix_int_null(merges); |
668 | 3.30k | } |
669 | | |
670 | 3.30k | if (modularity != NULL) { |
671 | 3.30k | IGRAPH_CHECK(igraph_vector_resize(modularity, total_joins + 1)); |
672 | 3.30k | } |
673 | | |
674 | | /* Create degree vector */ |
675 | 3.30k | IGRAPH_VECTOR_INIT_FINALLY(&a, no_of_nodes); |
676 | 3.30k | if (weights) { |
677 | 3.30k | debug("Calculating weighted degrees\n"); |
678 | 63.1k | for (i = 0; i < no_of_edges; i++) { |
679 | 59.8k | VECTOR(a)[IGRAPH_FROM(graph, i)] += VECTOR(*weights)[i]; |
680 | 59.8k | VECTOR(a)[IGRAPH_TO(graph, i)] += VECTOR(*weights)[i]; |
681 | 59.8k | } |
682 | 3.30k | } else { |
683 | 0 | debug("Calculating degrees\n"); |
684 | 0 | IGRAPH_VECTOR_INT_INIT_FINALLY(°rees, no_of_nodes); |
685 | 0 | IGRAPH_CHECK(igraph_degree(graph, °rees, igraph_vss_all(), IGRAPH_ALL, IGRAPH_LOOPS)); |
686 | 0 | for (i = 0; i < no_of_nodes; i++) { |
687 | 0 | VECTOR(a)[i] = VECTOR(degrees)[i]; |
688 | 0 | } |
689 | 0 | igraph_vector_int_destroy(°rees); |
690 | 0 | IGRAPH_FINALLY_CLEAN(1); |
691 | 0 | } |
692 | | |
693 | | /* Create list of communities */ |
694 | 3.30k | debug("Creating community list\n"); |
695 | 3.30k | communities.n = no_of_nodes; |
696 | 3.30k | communities.no_of_communities = no_of_nodes; |
697 | 3.30k | communities.e = IGRAPH_CALLOC(no_of_nodes, igraph_i_fastgreedy_community); |
698 | 3.30k | IGRAPH_CHECK_OOM(communities.e, "Insufficient memory for fast greedy community detection."); |
699 | 3.30k | IGRAPH_FINALLY(igraph_free, communities.e); |
700 | | |
701 | 3.30k | communities.heap = IGRAPH_CALLOC(no_of_nodes, igraph_i_fastgreedy_community*); |
702 | 3.30k | IGRAPH_CHECK_OOM(communities.heap, "Insufficient memory for fast greedy community detection."); |
703 | 3.30k | IGRAPH_FINALLY(igraph_free, communities.heap); |
704 | | |
705 | 3.30k | communities.heapindex = IGRAPH_CALLOC(no_of_nodes, igraph_integer_t); |
706 | 3.30k | IGRAPH_CHECK_OOM(communities.heapindex, "Insufficient memory for fast greedy community detection."); |
707 | | |
708 | 3.30k | IGRAPH_FINALLY_CLEAN(2); |
709 | 3.30k | IGRAPH_FINALLY(igraph_i_fastgreedy_community_list_destroy, &communities); |
710 | | |
711 | 151k | for (i = 0; i < no_of_nodes; i++) { |
712 | 148k | IGRAPH_CHECK(igraph_vector_ptr_init(&communities.e[i].neis, 0)); |
713 | 148k | communities.e[i].id = i; |
714 | 148k | communities.e[i].size = 1; |
715 | 148k | } |
716 | | |
717 | | /* Create list of community pairs from edges */ |
718 | 3.30k | debug("Allocating dq vector\n"); |
719 | 3.30k | dq = IGRAPH_CALLOC(no_of_edges, igraph_real_t); |
720 | 3.30k | IGRAPH_CHECK_OOM(dq, "Insufficient memory for fast greedy community detection."); |
721 | 3.30k | IGRAPH_FINALLY(igraph_free, dq); |
722 | | |
723 | 3.30k | debug("Creating community pair list\n"); |
724 | 3.30k | IGRAPH_CHECK(igraph_eit_create(graph, igraph_ess_all(IGRAPH_EDGEORDER_ID), &edgeit)); |
725 | 3.30k | IGRAPH_FINALLY(igraph_eit_destroy, &edgeit); |
726 | 3.30k | pairs = IGRAPH_CALLOC(2 * no_of_edges, igraph_i_fastgreedy_commpair); |
727 | 3.30k | IGRAPH_CHECK_OOM(pairs, "Insufficient memory for fast greedy community detection."); |
728 | 3.30k | IGRAPH_FINALLY(igraph_free, pairs); |
729 | | |
730 | 3.30k | loop_weight_sum = 0; |
731 | 63.1k | for (i = 0, j = 0; !IGRAPH_EIT_END(edgeit); i += 2, j++, IGRAPH_EIT_NEXT(edgeit)) { |
732 | 59.8k | igraph_integer_t eidx = IGRAPH_EIT_GET(edgeit); |
733 | | |
734 | | /* Create the pairs themselves */ |
735 | 59.8k | from = IGRAPH_FROM(graph, eidx); to = IGRAPH_TO(graph, eidx); |
736 | 59.8k | if (from == to) { |
737 | 0 | loop_weight_sum += weights ? 2 * VECTOR(*weights)[eidx] : 2; |
738 | 0 | continue; |
739 | 0 | } |
740 | | |
741 | 59.8k | if (from > to) { |
742 | 59.8k | dummy = from; from = to; to = dummy; |
743 | 59.8k | } |
744 | 59.8k | if (weights) { |
745 | 59.8k | dq[j] = 2 * (VECTOR(*weights)[eidx] / (weight_sum * 2.0) - VECTOR(a)[from] * VECTOR(a)[to] / (4.0 * weight_sum * weight_sum)); |
746 | 59.8k | } else { |
747 | 0 | dq[j] = 2 * (1.0 / (no_of_edges * 2.0) - VECTOR(a)[from] * VECTOR(a)[to] / (4.0 * no_of_edges * no_of_edges)); |
748 | 0 | } |
749 | 59.8k | pairs[i].first = from; |
750 | 59.8k | pairs[i].second = to; |
751 | 59.8k | pairs[i].dq = &dq[j]; |
752 | 59.8k | pairs[i].opposite = &pairs[i + 1]; |
753 | 59.8k | pairs[i + 1].first = to; |
754 | 59.8k | pairs[i + 1].second = from; |
755 | 59.8k | pairs[i + 1].dq = pairs[i].dq; |
756 | 59.8k | pairs[i + 1].opposite = &pairs[i]; |
757 | | /* Link the pair to the communities */ |
758 | 59.8k | IGRAPH_CHECK(igraph_vector_ptr_push_back(&communities.e[from].neis, &pairs[i])); |
759 | 59.8k | IGRAPH_CHECK(igraph_vector_ptr_push_back(&communities.e[to].neis, &pairs[i + 1])); |
760 | | /* Update maximums */ |
761 | 59.8k | if (communities.e[from].maxdq == NULL || *communities.e[from].maxdq->dq < *pairs[i].dq) { |
762 | 25.5k | communities.e[from].maxdq = &pairs[i]; |
763 | 25.5k | } |
764 | 59.8k | if (communities.e[to].maxdq == NULL || *communities.e[to].maxdq->dq < *pairs[i + 1].dq) { |
765 | 46.5k | communities.e[to].maxdq = &pairs[i + 1]; |
766 | 46.5k | } |
767 | 59.8k | } |
768 | 3.30k | igraph_eit_destroy(&edgeit); |
769 | 3.30k | IGRAPH_FINALLY_CLEAN(1); |
770 | | |
771 | | /* Sorting community neighbor lists by community IDs */ |
772 | 3.30k | debug("Sorting community neighbor lists\n"); |
773 | 151k | for (i = 0, j = 0; i < no_of_nodes; i++) { |
774 | 148k | igraph_i_fastgreedy_community_sort_neighbors_of(&communities, i, NULL); |
775 | | /* Isolated vertices and vertices with loop edges only won't be stored in |
776 | | * the heap (to avoid maxdq == NULL) */ |
777 | 148k | if (communities.e[i].maxdq != NULL) { |
778 | 50.6k | communities.heap[j] = &communities.e[i]; |
779 | 50.6k | communities.heapindex[i] = j; |
780 | 50.6k | j++; |
781 | 97.9k | } else { |
782 | 97.9k | communities.heapindex[i] = -1; |
783 | 97.9k | } |
784 | 148k | } |
785 | 3.30k | communities.no_of_communities = j; |
786 | | |
787 | | /* Calculate proper vector a (see paper) and initial modularity */ |
788 | 3.30k | q = 2.0 * (weights ? weight_sum : no_of_edges); |
789 | 3.30k | if (q == 0) { |
790 | | /* All the weights are zero */ |
791 | 3.22k | } else { |
792 | 3.22k | igraph_vector_scale(&a, 1.0 / q); |
793 | 3.22k | q = loop_weight_sum / q; |
794 | 149k | for (i = 0; i < no_of_nodes; i++) { |
795 | 146k | q -= VECTOR(a)[i] * VECTOR(a)[i]; |
796 | 146k | } |
797 | 3.22k | } |
798 | | |
799 | | /* Initialize "best modularity" value and best merge counter */ |
800 | 3.30k | bestq = q; |
801 | 3.30k | best_no_of_joins = 0; |
802 | | |
803 | | /* Initializing community heap */ |
804 | 3.30k | debug("Initializing community heap\n"); |
805 | 3.30k | igraph_i_fastgreedy_community_list_build_heap(&communities); |
806 | | |
807 | 3.30k | debug("Initial modularity: %.4f\n", q); |
808 | | |
809 | | /* Let's rock ;) */ |
810 | 3.30k | no_of_joins = 0; |
811 | 47.4k | while (no_of_joins < total_joins) { |
812 | 46.9k | IGRAPH_ALLOW_INTERRUPTION(); |
813 | 46.9k | IGRAPH_PROGRESS("Fast greedy community detection", no_of_joins * 100.0 / total_joins, 0); |
814 | | |
815 | | /* Store the modularity */ |
816 | 46.9k | if (modularity) { |
817 | 46.9k | VECTOR(*modularity)[no_of_joins] = q; |
818 | 46.9k | } |
819 | | |
820 | | /* Update best modularity if needed */ |
821 | 46.9k | if (q >= bestq) { |
822 | 42.7k | bestq = q; |
823 | 42.7k | best_no_of_joins = no_of_joins; |
824 | 42.7k | } |
825 | | |
826 | | /* Some debug info if needed */ |
827 | | /* igraph_i_fastgreedy_community_list_check_heap(&communities); */ |
828 | | #ifdef IGRAPH_FASTCOMM_DEBUG |
829 | | debug("===========================================\n"); |
830 | | for (i = 0; i < communities.n; i++) { |
831 | | if (communities.e[i].maxdq == 0) { |
832 | | debug("Community #%ld: PASSIVE\n", i); |
833 | | continue; |
834 | | } |
835 | | debug("Community #%ld\n ", i); |
836 | | for (j = 0; j < igraph_vector_ptr_size(&communities.e[i].neis); j++) { |
837 | | p1 = (igraph_i_fastgreedy_commpair*)VECTOR(communities.e[i].neis)[j]; |
838 | | debug(" (%ld,%ld,%.4f)", p1->first, p1->second, *p1->dq); |
839 | | } |
840 | | p1 = communities.e[i].maxdq; |
841 | | debug("\n Maxdq: (%ld,%ld,%.4f)\n", p1->first, p1->second, *p1->dq); |
842 | | } |
843 | | debug("Global maxdq is: (%ld,%ld,%.4f)\n", communities.heap[0]->maxdq->first, |
844 | | communities.heap[0]->maxdq->second, *communities.heap[0]->maxdq->dq); |
845 | | for (i = 0; i < communities.no_of_communities; i++) { |
846 | | debug("(%ld,%ld,%.4f) ", communities.heap[i]->maxdq->first, communities.heap[i]->maxdq->second, *communities.heap[0]->maxdq->dq); |
847 | | } |
848 | | debug("\n"); |
849 | | #endif |
850 | 46.9k | if (communities.heap[0] == NULL) { |
851 | 72 | break; /* no more communities */ |
852 | 72 | } |
853 | 46.9k | if (communities.heap[0]->maxdq == NULL) { |
854 | 2.76k | break; /* there are only isolated comms */ |
855 | 2.76k | } |
856 | 44.1k | to = communities.heap[0]->maxdq->second; |
857 | 44.1k | from = communities.heap[0]->maxdq->first; |
858 | | |
859 | 44.1k | debug("Q[%ld] = %.7f\tdQ = %.7f\t |H| = %ld\n", |
860 | 44.1k | no_of_joins, q, *communities.heap[0]->maxdq->dq, no_of_nodes - no_of_joins - 1); |
861 | | |
862 | | /* IGRAPH_FASTCOMM_DEBUG */ |
863 | | /* from=join_order[no_of_joins*2]; to=join_order[no_of_joins*2+1]; |
864 | | if (to == -1) break; |
865 | | for (i=0; i<igraph_vector_ptr_size(&communities.e[to].neis); i++) { |
866 | | p1=(igraph_i_fastgreedy_commpair*)VECTOR(communities.e[to].neis)[i]; |
867 | | if (p1->second == from) communities.maxdq = p1; |
868 | | } */ |
869 | | |
870 | 44.1k | n = igraph_vector_ptr_size(&communities.e[to].neis); |
871 | 44.1k | m = igraph_vector_ptr_size(&communities.e[from].neis); |
872 | | /*if (n>m) { |
873 | | dummy=n; n=m; m=dummy; |
874 | | dummy=to; to=from; from=dummy; |
875 | | }*/ |
876 | 44.1k | debug(" joining: %ld <- %ld\n", to, from); |
877 | 44.1k | q += *communities.heap[0]->maxdq->dq; |
878 | | |
879 | | /* Merge the second community into the first */ |
880 | 44.1k | i = j = 0; |
881 | 268k | while (i < n && j < m) { |
882 | 224k | p1 = (igraph_i_fastgreedy_commpair*)VECTOR(communities.e[to].neis)[i]; |
883 | 224k | p2 = (igraph_i_fastgreedy_commpair*)VECTOR(communities.e[from].neis)[j]; |
884 | 224k | debug("Pairs: %" IGRAPH_PRId "-%" IGRAPH_PRId " and %" IGRAPH_PRId "-%" IGRAPH_PRId "\n", p1->first, p1->second, |
885 | 224k | p2->first, p2->second); |
886 | 224k | if (p1->second < p2->second) { |
887 | | /* Considering p1 from now on */ |
888 | 113k | debug(" Considering: %" IGRAPH_PRId "-%" IGRAPH_PRId "\n", p1->first, p1->second); |
889 | 113k | if (p1->second == from) { |
890 | 30.2k | debug(" WILL REMOVE: %" IGRAPH_PRId "-%" IGRAPH_PRId "\n", to, from); |
891 | 83.3k | } else { |
892 | | /* chain, case 1 */ |
893 | 83.3k | debug(" CHAIN(1): %ld-%ld %ld, now=%.7f, adding=%.7f, newdq(%ld,%ld)=%.7f\n", |
894 | 83.3k | to, p1->second, from, *p1->dq, -2 * VECTOR(a)[from]*VECTOR(a)[p1->second], p1->first, p1->second, *p1->dq - 2 * VECTOR(a)[from]*VECTOR(a)[p1->second]); |
895 | 83.3k | igraph_i_fastgreedy_community_update_dq(&communities, p1, *p1->dq - 2 * VECTOR(a)[from]*VECTOR(a)[p1->second]); |
896 | 83.3k | } |
897 | 113k | i++; |
898 | 113k | } else if (p1->second == p2->second) { |
899 | | /* p1->first, p1->second and p2->first form a triangle */ |
900 | 15.7k | debug(" Considering: %" IGRAPH_PRId "-%" IGRAPH_PRId " and %" IGRAPH_PRId "-%" IGRAPH_PRId "\n", p1->first, p1->second, |
901 | 15.7k | p2->first, p2->second); |
902 | | /* Update dq value */ |
903 | 15.7k | debug(" TRIANGLE: %ld-%ld-%ld, now=%.7f, adding=%.7f, newdq(%ld,%ld)=%.7f\n", |
904 | 15.7k | to, p1->second, from, *p1->dq, *p2->dq, p1->first, p1->second, *p1->dq + *p2->dq); |
905 | 15.7k | igraph_i_fastgreedy_community_update_dq(&communities, p1, *p1->dq + *p2->dq); |
906 | 15.7k | igraph_i_fastgreedy_community_remove_nei(&communities, p1->second, from); |
907 | 15.7k | i++; |
908 | 15.7k | j++; |
909 | 95.0k | } else { |
910 | 95.0k | debug(" Considering: %" IGRAPH_PRId "-%" IGRAPH_PRId "\n", p2->first, p2->second); |
911 | 95.0k | if (p2->second == to) { |
912 | 29.2k | debug(" WILL REMOVE: %" IGRAPH_PRId "-%" IGRAPH_PRId "\n", p2->second, p2->first); |
913 | 65.8k | } else { |
914 | | /* chain, case 2 */ |
915 | 65.8k | debug(" CHAIN(2): %ld %ld-%ld, newdq(%ld,%ld)=%.7f\n", |
916 | 65.8k | to, p2->second, from, to, p2->second, *p2->dq - 2 * VECTOR(a)[to]*VECTOR(a)[p2->second]); |
917 | 65.8k | p2->opposite->second = to; |
918 | | /* p2->opposite->second changed, so it means that |
919 | | * communities.e[p2->second].neis (which contains p2->opposite) is |
920 | | * not sorted anymore. We have to find the index of p2->opposite in |
921 | | * this vector and move it to the correct place. Moving should be an |
922 | | * O(n) operation; re-sorting would be O(n*logn) or even worse, |
923 | | * depending on the pivoting strategy used by qsort() since the |
924 | | * vector is nearly sorted */ |
925 | 65.8k | igraph_i_fastgreedy_community_sort_neighbors_of( |
926 | 65.8k | &communities, p2->second, p2->opposite); |
927 | | /* link from.neis[j] to the current place in to.neis if |
928 | | * from.neis[j] != to */ |
929 | 65.8k | p2->first = to; |
930 | 65.8k | IGRAPH_CHECK(igraph_vector_ptr_insert(&communities.e[to].neis, i, p2)); |
931 | 65.8k | n++; i++; |
932 | 65.8k | if (*p2->dq > *communities.e[to].maxdq->dq) { |
933 | 101 | communities.e[to].maxdq = p2; |
934 | 101 | k = igraph_i_fastgreedy_community_list_find_in_heap(&communities, to); |
935 | 101 | igraph_i_fastgreedy_community_list_sift_up(&communities, k); |
936 | 101 | } |
937 | 65.8k | igraph_i_fastgreedy_community_update_dq(&communities, p2, *p2->dq - 2 * VECTOR(a)[to]*VECTOR(a)[p2->second]); |
938 | 65.8k | } |
939 | 95.0k | j++; |
940 | 95.0k | } |
941 | 224k | } |
942 | | |
943 | 44.1k | p1 = NULL; |
944 | 118k | while (i < n) { |
945 | 74.3k | p1 = (igraph_i_fastgreedy_commpair*)VECTOR(communities.e[to].neis)[i]; |
946 | 74.3k | if (p1->second == from) { |
947 | 13.8k | debug(" WILL REMOVE: %" IGRAPH_PRId "-%" IGRAPH_PRId "\n", p1->first, from); |
948 | 60.5k | } else { |
949 | | /* chain, case 1 */ |
950 | 60.5k | debug(" CHAIN(1): %ld-%ld %ld, now=%.7f, adding=%.7f, newdq(%ld,%ld)=%.7f\n", |
951 | 60.5k | to, p1->second, from, *p1->dq, -2 * VECTOR(a)[from]*VECTOR(a)[p1->second], p1->first, p1->second, *p1->dq - 2 * VECTOR(a)[from]*VECTOR(a)[p1->second]); |
952 | 60.5k | igraph_i_fastgreedy_community_update_dq(&communities, p1, *p1->dq - 2 * VECTOR(a)[from]*VECTOR(a)[p1->second]); |
953 | 60.5k | } |
954 | 74.3k | i++; |
955 | 74.3k | } |
956 | 109k | while (j < m) { |
957 | 65.6k | p2 = (igraph_i_fastgreedy_commpair*)VECTOR(communities.e[from].neis)[j]; |
958 | 65.6k | if (to == p2->second) { |
959 | 14.8k | j++; |
960 | 14.8k | continue; |
961 | 14.8k | } |
962 | | /* chain, case 2 */ |
963 | 50.7k | debug(" CHAIN(2): %ld %ld-%ld, newdq(%ld,%ld)=%.7f\n", |
964 | 50.7k | to, p2->second, from, p1 ? p1->first : -1, p2->second, *p2->dq - 2 * VECTOR(a)[to]*VECTOR(a)[p2->second]); |
965 | 50.7k | p2->opposite->second = to; |
966 | | /* need to re-sort community nei list `p2->second` */ |
967 | 50.7k | igraph_i_fastgreedy_community_sort_neighbors_of(&communities, p2->second, p2->opposite); |
968 | | /* link from.neis[j] to the current place in to.neis if |
969 | | * from.neis[j] != to */ |
970 | 50.7k | p2->first = to; |
971 | 50.7k | IGRAPH_CHECK(igraph_vector_ptr_push_back(&communities.e[to].neis, p2)); |
972 | 50.7k | if (*p2->dq > *communities.e[to].maxdq->dq) { |
973 | 27 | communities.e[to].maxdq = p2; |
974 | 27 | k = igraph_i_fastgreedy_community_list_find_in_heap(&communities, to); |
975 | 27 | igraph_i_fastgreedy_community_list_sift_up(&communities, k); |
976 | 27 | } |
977 | 50.7k | igraph_i_fastgreedy_community_update_dq(&communities, p2, *p2->dq - 2 * VECTOR(a)[to]*VECTOR(a)[p2->second]); |
978 | 50.7k | j++; |
979 | 50.7k | } |
980 | | |
981 | | /* Now, remove community `from` from the neighbors of community `to` */ |
982 | 44.1k | if (communities.no_of_communities > 2) { |
983 | 40.9k | debug(" REMOVING: %" IGRAPH_PRId "-%" IGRAPH_PRId "\n", to, from); |
984 | 40.9k | igraph_i_fastgreedy_community_remove_nei(&communities, to, from); |
985 | 40.9k | i = igraph_i_fastgreedy_community_list_find_in_heap(&communities, from); |
986 | 40.9k | igraph_i_fastgreedy_community_list_remove(&communities, i); |
987 | 40.9k | } |
988 | 44.1k | communities.e[from].maxdq = NULL; |
989 | | |
990 | | /* Update community sizes */ |
991 | 44.1k | communities.e[to].size += communities.e[from].size; |
992 | 44.1k | communities.e[from].size = 0; |
993 | | |
994 | | /* record what has been merged */ |
995 | | /* igraph_vector_ptr_clear is not enough here as it won't free |
996 | | * the memory consumed by communities.e[from].neis. Thanks |
997 | | * to Tom Gregorovic for pointing that out. */ |
998 | 44.1k | igraph_vector_ptr_destroy(&communities.e[from].neis); |
999 | 44.1k | if (merges) { |
1000 | 44.1k | MATRIX(*merges, no_of_joins, 0) = communities.e[to].id; |
1001 | 44.1k | MATRIX(*merges, no_of_joins, 1) = communities.e[from].id; |
1002 | 44.1k | communities.e[to].id = no_of_nodes + no_of_joins; |
1003 | 44.1k | } |
1004 | | |
1005 | | /* Update vector a */ |
1006 | 44.1k | VECTOR(a)[to] += VECTOR(a)[from]; |
1007 | 44.1k | VECTOR(a)[from] = 0.0; |
1008 | | |
1009 | 44.1k | no_of_joins++; |
1010 | 44.1k | } |
1011 | | /* TODO: continue merging when some isolated communities remained. Always |
1012 | | * joining the communities with the least number of nodes results in the |
1013 | | * smallest decrease in modularity every step. Now we're simply deleting |
1014 | | * the excess rows from the merge matrix */ |
1015 | 3.30k | if (merges != NULL) { |
1016 | 3.30k | if (no_of_joins < total_joins) { |
1017 | 2.84k | igraph_integer_t *ivec; |
1018 | 2.84k | igraph_integer_t merges_nrow = igraph_matrix_int_nrow(merges); |
1019 | | |
1020 | 2.84k | ivec = IGRAPH_CALLOC(merges_nrow, igraph_integer_t); |
1021 | 2.84k | IGRAPH_CHECK_OOM(ivec, "Insufficient memory for fast greedy community detection."); |
1022 | 2.84k | IGRAPH_FINALLY(igraph_free, ivec); |
1023 | | |
1024 | 41.3k | for (i = 0; i < no_of_joins; i++) { |
1025 | 38.5k | ivec[i] = i + 1; |
1026 | 38.5k | } |
1027 | | |
1028 | 2.84k | igraph_matrix_int_permdelete_rows(merges, ivec, total_joins - no_of_joins); |
1029 | | |
1030 | 2.84k | IGRAPH_FREE(ivec); |
1031 | 2.84k | IGRAPH_FINALLY_CLEAN(1); |
1032 | 2.84k | } |
1033 | 3.30k | } |
1034 | 3.30k | IGRAPH_PROGRESS("Fast greedy community detection", 100.0, 0); |
1035 | | |
1036 | 3.30k | if (modularity) { |
1037 | 3.30k | VECTOR(*modularity)[no_of_joins] = q; |
1038 | 3.30k | IGRAPH_CHECK(igraph_vector_resize(modularity, no_of_joins + 1)); |
1039 | 3.30k | } |
1040 | | |
1041 | | /* Internally, the algorithm does not create NaN values. |
1042 | | * If the graph has no edges, the final modularity will be zero. |
1043 | | * We change this to NaN for consistency. */ |
1044 | 3.30k | if (modularity && no_of_edges == 0) { |
1045 | 84 | IGRAPH_ASSERT(no_of_joins == 0); |
1046 | 84 | VECTOR(*modularity)[0] = IGRAPH_NAN; |
1047 | 84 | } |
1048 | | |
1049 | 3.30k | debug("Freeing memory\n"); |
1050 | 3.30k | IGRAPH_FREE(pairs); |
1051 | 3.30k | IGRAPH_FREE(dq); |
1052 | 3.30k | igraph_i_fastgreedy_community_list_destroy(&communities); |
1053 | 3.30k | igraph_vector_destroy(&a); |
1054 | 3.30k | IGRAPH_FINALLY_CLEAN(4); |
1055 | | |
1056 | 3.30k | if (membership) { |
1057 | 3.30k | IGRAPH_CHECK(igraph_community_to_membership(merges, |
1058 | 3.30k | no_of_nodes, |
1059 | 3.30k | /*steps=*/ best_no_of_joins, |
1060 | 3.30k | membership, |
1061 | 3.30k | /*csize=*/ NULL)); |
1062 | 3.30k | } |
1063 | | |
1064 | 3.30k | if (merges == &merges_local) { |
1065 | 0 | igraph_matrix_int_destroy(&merges_local); |
1066 | 0 | IGRAPH_FINALLY_CLEAN(1); |
1067 | 0 | } |
1068 | | |
1069 | 3.30k | return IGRAPH_SUCCESS; |
1070 | 3.30k | } |
1071 | | |
1072 | | #ifdef IGRAPH_FASTCOMM_DEBUG |
1073 | | #undef IGRAPH_FASTCOMM_DEBUG |
1074 | | #endif |