/src/igraph/src/community/fluid.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | igraph library. |
3 | | Copyright (C) 2007-2020 The igraph development team |
4 | | |
5 | | This program is free software; you can redistribute it and/or modify |
6 | | it under the terms of the GNU General Public License as published by |
7 | | the Free Software Foundation; either version 2 of the License, or |
8 | | (at your option) any later version. |
9 | | |
10 | | This program is distributed in the hope that it will be useful, |
11 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | GNU General Public License for more details. |
14 | | |
15 | | You should have received a copy of the GNU General Public License |
16 | | along with this program; if not, write to the Free Software |
17 | | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
18 | | 02110-1301 USA |
19 | | |
20 | | */ |
21 | | |
22 | | #include "igraph_community.h" |
23 | | |
24 | | #include "igraph_adjlist.h" |
25 | | #include "igraph_components.h" |
26 | | #include "igraph_interface.h" |
27 | | #include "igraph_random.h" |
28 | | #include "igraph_structural.h" |
29 | | |
30 | | /** |
31 | | * \ingroup communities |
32 | | * \function igraph_community_fluid_communities |
33 | | * \brief Community detection based on fluids interacting on the graph. |
34 | | * |
35 | | * The algorithm is based on the simple idea of |
36 | | * several fluids interacting in a non-homogeneous environment |
37 | | * (the graph topology), expanding and contracting based on their |
38 | | * interaction and density. Weighted graphs are not supported. |
39 | | * |
40 | | * </para><para> |
41 | | * This function implements the community detection method described in: |
42 | | * Parés F, Gasulla DG, et. al. (2018) Fluid Communities: A Competitive, |
43 | | * Scalable and Diverse Community Detection Algorithm. In: Complex Networks |
44 | | * & Their Applications VI: Proceedings of Complex Networks 2017 (The Sixth |
45 | | * International Conference on Complex Networks and Their Applications), |
46 | | * Springer, vol 689, p 229. https://doi.org/10.1007/978-3-319-72150-7_19 |
47 | | * |
48 | | * \param graph The input graph. The graph must be simple and connected. |
49 | | * Edge directions will be ignored. |
50 | | * \param no_of_communities The number of communities to be found. Must be |
51 | | * greater than 0 and fewer than number of vertices in the graph. |
52 | | * \param membership The result vector mapping vertices to the communities |
53 | | * they are assigned to. |
54 | | * \return Error code. |
55 | | * |
56 | | * Time complexity: O(|E|) |
57 | | */ |
58 | | igraph_error_t igraph_community_fluid_communities( |
59 | | const igraph_t *graph, |
60 | | igraph_int_t no_of_communities, |
61 | 223 | igraph_vector_int_t *membership) { |
62 | | |
63 | 223 | const igraph_int_t no_of_nodes = igraph_vcount(graph); |
64 | 223 | igraph_int_t i, j, k, kv1; |
65 | 223 | igraph_adjlist_t al; |
66 | 223 | igraph_real_t max_density; |
67 | 223 | igraph_bool_t is_simple, is_connected, running; |
68 | 223 | igraph_vector_t density, label_counters; |
69 | 223 | igraph_vector_int_t dominant_labels, node_order, com_to_numvertices; |
70 | | |
71 | | /* Checking input values */ |
72 | 223 | if (igraph_is_directed(graph)) { |
73 | 0 | IGRAPH_WARNING("Edge directions are ignored by fluid community detection."); |
74 | 0 | } |
75 | 223 | IGRAPH_CHECK(igraph_is_simple(graph, &is_simple, IGRAPH_UNDIRECTED)); |
76 | 223 | if (!is_simple) { |
77 | 0 | IGRAPH_ERROR("Fluid community detection supports only simple graphs.", IGRAPH_EINVAL); |
78 | 0 | } |
79 | | |
80 | | /* This must come before the connectedness check so we can support the null |
81 | | * graph (considered disconnected) for purposes of convenience. */ |
82 | 223 | if (no_of_nodes < 2) { |
83 | 9 | if (membership) { |
84 | 9 | IGRAPH_CHECK(igraph_vector_int_resize(membership, no_of_nodes)); |
85 | 9 | igraph_vector_int_null(membership); |
86 | 9 | } |
87 | 9 | return IGRAPH_SUCCESS; |
88 | 9 | } |
89 | | |
90 | 214 | if (no_of_communities < 1) { |
91 | 0 | IGRAPH_ERROR("Number of requested communities must be positive.", IGRAPH_EINVAL); |
92 | 0 | } |
93 | | |
94 | 214 | if (no_of_communities > no_of_nodes) { |
95 | 0 | IGRAPH_ERROR("Number of requested communities must not be greater than the number of nodes.", |
96 | 0 | IGRAPH_EINVAL); |
97 | 0 | } |
98 | | |
99 | 214 | IGRAPH_CHECK(igraph_is_connected(graph, &is_connected, IGRAPH_WEAK)); |
100 | 214 | if (!is_connected) { |
101 | 0 | IGRAPH_ERROR("Fluid community detection supports only connected graphs.", IGRAPH_EINVAL); |
102 | 0 | } |
103 | | |
104 | | /* Internal variables initialization */ |
105 | 214 | max_density = 1.0; |
106 | | |
107 | | /* Resize membership vector (number of nodes) */ |
108 | 214 | IGRAPH_CHECK(igraph_vector_int_resize(membership, no_of_nodes)); |
109 | | |
110 | | /* Initialize density and com_to_numvertices vectors */ |
111 | 214 | IGRAPH_CHECK(igraph_vector_init(&density, no_of_communities)); |
112 | 214 | IGRAPH_FINALLY(igraph_vector_destroy, &density); |
113 | 214 | IGRAPH_CHECK(igraph_vector_int_init(&com_to_numvertices, no_of_communities)); |
114 | 214 | IGRAPH_FINALLY(igraph_vector_int_destroy, &com_to_numvertices); |
115 | | |
116 | | /* Initialize node ordering vector */ |
117 | 214 | IGRAPH_CHECK(igraph_vector_int_init_range(&node_order, 0, no_of_nodes)); |
118 | 214 | IGRAPH_FINALLY(igraph_vector_int_destroy, &node_order); |
119 | | |
120 | | /* Initialize the membership vector with 0 values */ |
121 | 214 | igraph_vector_int_null(membership); |
122 | | /* Initialize densities to max_density */ |
123 | 214 | igraph_vector_fill(&density, max_density); |
124 | | |
125 | | /* Initialize com_to_numvertices and initialize communities into membership vector */ |
126 | 214 | igraph_vector_int_shuffle(&node_order); |
127 | 997 | for (i = 0; i < no_of_communities; i++) { |
128 | | /* Initialize membership at initial nodes for each community |
129 | | * where 0 refers to have no label*/ |
130 | 783 | VECTOR(*membership)[VECTOR(node_order)[i]] = i + 1; |
131 | | /* Initialize com_to_numvertices list: Number of vertices for each community */ |
132 | 783 | VECTOR(com_to_numvertices)[i] = 1; |
133 | 783 | } |
134 | | |
135 | | /* Create an adjacency list representation for efficiency. */ |
136 | 214 | IGRAPH_CHECK(igraph_adjlist_init(graph, &al, IGRAPH_ALL, IGRAPH_LOOPS_TWICE, IGRAPH_MULTIPLE)); |
137 | 214 | IGRAPH_FINALLY(igraph_adjlist_destroy, &al); |
138 | | |
139 | | /* Create storage space for counting distinct labels and dominant ones */ |
140 | 214 | IGRAPH_VECTOR_INT_INIT_FINALLY(&dominant_labels, no_of_communities); |
141 | | |
142 | 214 | IGRAPH_CHECK(igraph_vector_init(&label_counters, no_of_communities)); |
143 | 214 | IGRAPH_FINALLY(igraph_vector_destroy, &label_counters); |
144 | | |
145 | | /* running is the convergence boolean variable */ |
146 | 214 | running = true; |
147 | 876 | while (running) { |
148 | | /* Declarations of variables used inside main loop */ |
149 | 662 | igraph_int_t v1, size, rand_idx; |
150 | 662 | igraph_real_t max_count, label_counter_diff; |
151 | 662 | igraph_vector_int_t *neis; |
152 | 662 | igraph_bool_t same_label_in_dominant; |
153 | | |
154 | 662 | running = false; |
155 | | |
156 | | /* Shuffle the node ordering vector */ |
157 | 662 | igraph_vector_int_shuffle(&node_order); |
158 | | /* In the prescribed order, loop over the vertices and reassign labels */ |
159 | 19.0k | for (i = 0; i < no_of_nodes; i++) { |
160 | | /* Clear dominant_labels and nonzero_labels vectors */ |
161 | 18.4k | igraph_vector_int_clear(&dominant_labels); |
162 | 18.4k | igraph_vector_null(&label_counters); |
163 | | |
164 | | /* Obtain actual node index */ |
165 | 18.4k | v1 = VECTOR(node_order)[i]; |
166 | | /* Take into account same label in updating rule */ |
167 | 18.4k | kv1 = VECTOR(*membership)[v1]; |
168 | 18.4k | max_count = 0.0; |
169 | 18.4k | if (kv1 != 0) { |
170 | 13.1k | VECTOR(label_counters)[kv1 - 1] += VECTOR(density)[kv1 - 1]; |
171 | | /* Set up max_count */ |
172 | 13.1k | max_count = VECTOR(density)[kv1 - 1]; |
173 | | /* Initialize dominant_labels */ |
174 | 13.1k | IGRAPH_CHECK(igraph_vector_int_resize(&dominant_labels, 1)); |
175 | 13.1k | VECTOR(dominant_labels)[0] = kv1; |
176 | 13.1k | } |
177 | | |
178 | | /* Count the weights corresponding to different labels */ |
179 | 18.4k | neis = igraph_adjlist_get(&al, v1); |
180 | 18.4k | size = igraph_vector_int_size(neis); |
181 | 64.5k | for (j = 0; j < size; j++) { |
182 | 46.1k | k = VECTOR(*membership)[VECTOR(*neis)[j]]; |
183 | | /* skip if it has no label yet */ |
184 | 46.1k | if (k == 0) { |
185 | 9.00k | continue; |
186 | 9.00k | } |
187 | | /* Update label counter and evaluate diff against max_count*/ |
188 | 37.1k | VECTOR(label_counters)[k - 1] += VECTOR(density)[k - 1]; |
189 | 37.1k | label_counter_diff = VECTOR(label_counters)[k - 1] - max_count; |
190 | | /* Check if this label must be included in dominant_labels vector */ |
191 | 37.1k | if (label_counter_diff > 0.0001) { |
192 | 25.9k | max_count = VECTOR(label_counters)[k - 1]; |
193 | 25.9k | IGRAPH_CHECK(igraph_vector_int_resize(&dominant_labels, 1)); |
194 | 25.9k | VECTOR(dominant_labels)[0] = k; |
195 | 25.9k | } else if (-0.0001 < label_counter_diff && label_counter_diff < 0.0001) { |
196 | 2.35k | IGRAPH_CHECK(igraph_vector_int_push_back(&dominant_labels, k)); |
197 | 2.35k | } |
198 | 37.1k | } |
199 | | |
200 | 18.4k | if (!igraph_vector_int_empty(&dominant_labels)) { |
201 | | /* Maintain same label if it exists in dominant_labels */ |
202 | 16.0k | same_label_in_dominant = igraph_vector_int_contains(&dominant_labels, kv1); |
203 | | |
204 | 16.0k | if (!same_label_in_dominant) { |
205 | | /* We need at least one more iteration */ |
206 | 3.62k | running = true; |
207 | | |
208 | | /* Select randomly from the dominant labels */ |
209 | 3.62k | rand_idx = RNG_INTEGER(0, igraph_vector_int_size(&dominant_labels) - 1); |
210 | 3.62k | k = VECTOR(dominant_labels)[rand_idx]; |
211 | | |
212 | 3.62k | if (kv1 != 0) { |
213 | | /* Subtract 1 vertex from corresponding community in com_to_numvertices */ |
214 | 763 | VECTOR(com_to_numvertices)[kv1 - 1] -= 1; |
215 | | /* Re-calculate density for community kv1 */ |
216 | 763 | VECTOR(density)[kv1 - 1] = max_density / VECTOR(com_to_numvertices)[kv1 - 1]; |
217 | 763 | } |
218 | | |
219 | | /* Update vertex new label */ |
220 | 3.62k | VECTOR(*membership)[v1] = k; |
221 | | |
222 | | /* Add 1 vertex to corresponding new community in com_to_numvertices */ |
223 | 3.62k | VECTOR(com_to_numvertices)[k - 1] += 1; |
224 | | /* Re-calculate density for new community k */ |
225 | 3.62k | VECTOR(density)[k - 1] = max_density / VECTOR(com_to_numvertices)[k - 1]; |
226 | 3.62k | } |
227 | 16.0k | } |
228 | 18.4k | } |
229 | 662 | } |
230 | | |
231 | | /* Shift back the membership vector */ |
232 | | /* There must be no 0 labels in membership vector at this point */ |
233 | 3.85k | for (i = 0; i < no_of_nodes; i++) { |
234 | 3.64k | VECTOR(*membership)[i] -= 1; |
235 | 3.64k | IGRAPH_ASSERT(VECTOR(*membership)[i] >= 0); /* all vertices must have a community assigned */ |
236 | 3.64k | } |
237 | | |
238 | 214 | igraph_adjlist_destroy(&al); |
239 | 214 | igraph_vector_int_destroy(&node_order); |
240 | 214 | igraph_vector_destroy(&density); |
241 | 214 | igraph_vector_int_destroy(&com_to_numvertices); |
242 | 214 | igraph_vector_destroy(&label_counters); |
243 | 214 | igraph_vector_int_destroy(&dominant_labels); |
244 | 214 | IGRAPH_FINALLY_CLEAN(6); |
245 | | |
246 | 214 | return IGRAPH_SUCCESS; |
247 | 214 | } |