Line | Count | Source |
1 | | /* |
2 | | igraph library. |
3 | | Copyright (C) 2005-2022 The igraph development team |
4 | | |
5 | | This program is free software; you can redistribute it and/or modify |
6 | | it under the terms of the GNU General Public License as published by |
7 | | the Free Software Foundation; either version 2 of the License, or |
8 | | (at your option) any later version. |
9 | | |
10 | | This program is distributed in the hope that it will be useful, |
11 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | GNU General Public License for more details. |
14 | | |
15 | | You should have received a copy of the GNU General Public License |
16 | | along with this program; if not, write to the Free Software |
17 | | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
18 | | 02110-1301 USA |
19 | | */ |
20 | | |
21 | | #include "igraph_foreign.h" |
22 | | |
23 | | #include "igraph_attributes.h" |
24 | | #include "igraph_interface.h" |
25 | | #include "igraph_memory.h" |
26 | | #include "igraph_version.h" |
27 | | |
28 | | #include "core/trie.h" |
29 | | #include "graph/attributes.h" |
30 | | #include "internal/hacks.h" /* strdup, strncasecmp */ |
31 | | #include "math/safe_intop.h" |
32 | | |
33 | | #include "io/gml-header.h" |
34 | | #include "io/parsers/gml-parser.h" |
35 | | |
36 | | #include <ctype.h> |
37 | | #include <time.h> |
38 | | #include <string.h> |
39 | | |
40 | | int igraph_gml_yylex_init_extra(igraph_i_gml_parsedata_t *user_defined, void *scanner); |
41 | | int igraph_gml_yylex_destroy(void *scanner); |
42 | | int igraph_gml_yyparse(igraph_i_gml_parsedata_t *context); |
43 | | void igraph_gml_yyset_in(FILE *in_str, void *yyscanner); |
44 | | |
45 | | /* Checks if a null-terminated string needs encoding or decoding. |
46 | | * |
47 | | * Encoding is needed when an " or & character is present. |
48 | | * |
49 | | * Decoding is needed when an &xyz; style entity is present, so it's sufficient to look |
50 | | * for & characters. " characters are never present in the raw strings returned by the |
51 | | * GML parser, so we can use the same function to detect the need for either encoding |
52 | | * or decoding. |
53 | | */ |
54 | 13.8M | static igraph_bool_t needs_coding(const char *str) { |
55 | 143M | while (*str) { |
56 | 129M | if (*str == '&' || *str == '"') { |
57 | 28.5k | return true; |
58 | 28.5k | } |
59 | 129M | str++; |
60 | 129M | } |
61 | 13.7M | return false; |
62 | 13.8M | } |
63 | | |
64 | | /* Encode & and " character in 'src' to & and " |
65 | | * '*dest' must be deallocated by the caller. |
66 | | */ |
67 | 11.7k | static igraph_error_t entity_encode(const char *src, char **dest, igraph_bool_t only_quot) { |
68 | 11.7k | igraph_int_t destlen = 0; |
69 | 11.7k | const char *s; |
70 | 11.7k | char *d; |
71 | | |
72 | 89.0M | for (s = src; *s != '\0'; s++, destlen++) { |
73 | 89.0M | switch (*s) { |
74 | 5.42M | case '&': /* & */ |
75 | 5.42M | if (! only_quot) { |
76 | 5.42M | destlen += 4; |
77 | 5.42M | } |
78 | 5.42M | break; |
79 | 10.1M | case '"': /* " */ |
80 | 10.1M | destlen += 5; break; |
81 | 89.0M | } |
82 | 89.0M | } |
83 | 11.7k | *dest = IGRAPH_CALLOC(destlen + 1, char); |
84 | 11.7k | IGRAPH_CHECK_OOM(dest, "Not enough memory to encode string for GML export."); |
85 | 89.0M | for (s = src, d = *dest; *s != '\0'; s++, d++) { |
86 | 89.0M | switch (*s) { |
87 | 5.42M | case '&': |
88 | 5.42M | if (! only_quot) { |
89 | 5.42M | strcpy(d, "&"); |
90 | 5.42M | d += 4; |
91 | 5.42M | } else { |
92 | 0 | *d = *s; |
93 | 0 | } |
94 | 5.42M | break; |
95 | 10.1M | case '"': |
96 | 10.1M | strcpy(d, """); d += 5; break; |
97 | 73.5M | default: |
98 | 73.5M | *d = *s; |
99 | 89.0M | } |
100 | 89.0M | } |
101 | 11.7k | *d = '\0'; |
102 | 11.7k | return IGRAPH_SUCCESS; |
103 | 11.7k | } |
104 | | |
105 | | /* Decode the five standard predefined XML entities. Unknown entities or stray & characters |
106 | | * will be passed through unchanged. '*dest' must be deallocated by the caller. |
107 | | * If '*warned' is false, warnings will be issued for unsupported entities and |
108 | | * '*warned' will be set to true. This is to prevent a flood of warnings in some files. |
109 | | */ |
110 | 16.8k | static igraph_error_t entity_decode(const char *src, char **dest, igraph_bool_t *warned) { |
111 | 16.8k | const char *entity_names[] = { |
112 | 16.8k | """, "&", "'", "<", ">" |
113 | 16.8k | }; |
114 | | |
115 | 16.8k | const char entity_values[] = { |
116 | 16.8k | '"', '&', '\'', '<', '>' |
117 | 16.8k | }; |
118 | | |
119 | 16.8k | const int entity_count = sizeof entity_values / sizeof entity_values[0]; |
120 | | |
121 | 16.8k | const char *s; |
122 | 16.8k | char *d; |
123 | 16.8k | size_t len = strlen(src); |
124 | 16.8k | *dest = IGRAPH_CALLOC(len+1, char); /* at most as much storage needed as for 'src' */ |
125 | 16.8k | IGRAPH_CHECK_OOM(dest, "Not enough memory to decode string during GML import."); |
126 | | |
127 | 110M | for (s = src, d = *dest; *s != '\0';) { |
128 | 110M | if (*s == '&') { |
129 | 19.2M | int i; |
130 | 50.0M | for (i=0; i < entity_count; i++) { |
131 | 44.5M | size_t entity_len = strlen(entity_names[i]); |
132 | 44.5M | if (!strncasecmp(s, entity_names[i], entity_len)) { |
133 | 13.7M | *d++ = entity_values[i]; |
134 | 13.7M | s += entity_len; |
135 | 13.7M | break; |
136 | 13.7M | } |
137 | 44.5M | } |
138 | | /* None of the known entities match, report warning and pass through unchanged. */ |
139 | 19.2M | if (i == entity_count) { |
140 | 5.43M | if (! *warned) { |
141 | 930 | const int max_entity_name_length = 34; |
142 | 930 | int j = 0; |
143 | 18.4k | while (s[j] != '\0' && s[j] != ';' && j < max_entity_name_length) { |
144 | 17.5k | j++; |
145 | 17.5k | } |
146 | 930 | if (s[j] == '\0' || j == max_entity_name_length) { |
147 | 839 | IGRAPH_WARNING("Unterminated entity or stray & character found, will be returned verbatim."); |
148 | 839 | } else { |
149 | 91 | IGRAPH_WARNINGF("One or more unknown entities will be returned verbatim (%.*s).", j+1, s); |
150 | 91 | } |
151 | 930 | *warned = true; /* warn only once */ |
152 | 930 | } |
153 | 5.43M | *d++ = *s++; |
154 | 5.43M | } |
155 | 91.4M | } else { |
156 | 91.4M | *d++ = *s++; |
157 | 91.4M | } |
158 | 110M | } |
159 | 16.8k | *d = '\0'; |
160 | | |
161 | 16.8k | return IGRAPH_SUCCESS; |
162 | 16.8k | } |
163 | | |
164 | 5.76M | static igraph_real_t igraph_i_gml_toreal(igraph_gml_tree_t *node, igraph_int_t pos) { |
165 | 5.76M | igraph_i_gml_tree_type_t type = igraph_gml_tree_type(node, pos); |
166 | | |
167 | 5.76M | switch (type) { |
168 | 5.63M | case IGRAPH_I_GML_TREE_INTEGER: |
169 | 5.63M | return igraph_gml_tree_get_integer(node, pos); |
170 | 81.1k | case IGRAPH_I_GML_TREE_REAL: |
171 | 81.1k | return igraph_gml_tree_get_real(node, pos); |
172 | 46.9k | case IGRAPH_I_GML_TREE_TREE: |
173 | 46.9k | return IGRAPH_NAN; /* default value of NaN when composite is ignored */ |
174 | 0 | default: |
175 | | /* Must never reach here, regardless of the contents of the GML file. */ |
176 | 0 | IGRAPH_FATALF("Unexpected node type in GML tree, line %" IGRAPH_PRId ".", |
177 | 5.76M | igraph_gml_tree_line(node, pos)); /* LCOV_EXCL_LINE */ |
178 | 5.76M | } |
179 | 5.76M | } |
180 | | |
181 | 4.72M | static const char *igraph_i_gml_tostring(igraph_gml_tree_t *node, igraph_int_t pos) { |
182 | 4.72M | igraph_i_gml_tree_type_t type = igraph_gml_tree_type(node, pos); |
183 | 4.72M | static char tmp[100]; |
184 | 4.72M | const char *p = tmp; |
185 | 4.72M | igraph_int_t i; |
186 | 4.72M | igraph_real_t d; |
187 | | |
188 | 4.72M | switch (type) { |
189 | 1.18M | case IGRAPH_I_GML_TREE_INTEGER: |
190 | 1.18M | i = igraph_gml_tree_get_integer(node, pos); |
191 | 1.18M | snprintf(tmp, sizeof(tmp) / sizeof(char), "%" IGRAPH_PRId, i); |
192 | 1.18M | break; |
193 | 8.46k | case IGRAPH_I_GML_TREE_REAL: |
194 | 8.46k | d = igraph_gml_tree_get_real(node, pos); |
195 | 8.46k | igraph_real_snprintf_precise(tmp, sizeof(tmp) / sizeof(char), d); |
196 | 8.46k | break; |
197 | 3.43M | case IGRAPH_I_GML_TREE_STRING: |
198 | 3.43M | p = igraph_gml_tree_get_string(node, pos); |
199 | 3.43M | break; |
200 | 100k | case IGRAPH_I_GML_TREE_TREE: |
201 | 100k | tmp[0] = '\0'; /* default value of "" when composite is ignored */ |
202 | 100k | break; |
203 | 0 | default: |
204 | | /* Must never reach here, regardless of the contents of the GML file. */ |
205 | 0 | IGRAPH_FATALF("Unexpected node type in GML tree, line %" IGRAPH_PRId ".", |
206 | 4.72M | igraph_gml_tree_line(node, pos)); /* LCOV_EXCL_LINE */ |
207 | 4.72M | } |
208 | | |
209 | 4.72M | return p; |
210 | 4.72M | } |
211 | | |
212 | 18.7k | igraph_error_t igraph_i_gml_parsedata_init(igraph_i_gml_parsedata_t *context) { |
213 | 18.7k | context->depth = 0; |
214 | 18.7k | context->scanner = NULL; |
215 | 18.7k | context->tree = NULL; |
216 | 18.7k | context->errmsg[0] = '\0'; |
217 | 18.7k | context->igraph_errno = IGRAPH_SUCCESS; |
218 | | |
219 | 18.7k | return IGRAPH_SUCCESS; |
220 | 18.7k | } |
221 | | |
222 | 18.7k | void igraph_i_gml_parsedata_destroy(igraph_i_gml_parsedata_t *context) { |
223 | 18.7k | if (context->tree != NULL) { |
224 | 18.2k | igraph_gml_tree_destroy(context->tree); |
225 | 18.2k | context->tree = NULL; |
226 | 18.2k | } |
227 | | |
228 | 18.7k | if (context->scanner != NULL) { |
229 | 18.7k | (void) igraph_gml_yylex_destroy(context->scanner); |
230 | 18.7k | context->scanner = NULL; |
231 | 18.7k | } |
232 | 18.7k | } |
233 | | |
234 | | /* Takes a vector of attribute records and removes those elements |
235 | | * whose type is unspecified, i.e. IGRAPH_ATTRIBUTE_UNSPECIFIED. */ |
236 | 50.8k | static igraph_error_t prune_unknown_attributes(igraph_attribute_record_list_t *attrs) { |
237 | 50.8k | igraph_int_t i, n; |
238 | 50.8k | igraph_vector_int_t to_remove; |
239 | | |
240 | 50.8k | IGRAPH_VECTOR_INT_INIT_FINALLY(&to_remove, 0); |
241 | | |
242 | 50.8k | n = igraph_attribute_record_list_size(attrs); |
243 | 209k | for (i = 0; i < n; i++) { |
244 | 158k | igraph_attribute_record_t *atrec = igraph_attribute_record_list_get_ptr(attrs, i); |
245 | 158k | if (atrec->type == IGRAPH_ATTRIBUTE_UNSPECIFIED) { |
246 | 16.2k | IGRAPH_CHECK(igraph_vector_int_push_back(&to_remove, i)); |
247 | 16.2k | } |
248 | 158k | } |
249 | | |
250 | 50.8k | n = igraph_vector_int_size(&to_remove); |
251 | 67.0k | for (i = n - 1; i >= 0; i--) { |
252 | 16.2k | igraph_attribute_record_list_discard(attrs, VECTOR(to_remove)[i]); |
253 | 16.2k | } |
254 | | |
255 | 50.8k | igraph_vector_int_destroy(&to_remove); |
256 | 50.8k | IGRAPH_FINALLY_CLEAN(1); |
257 | | |
258 | 50.8k | return IGRAPH_SUCCESS; |
259 | 50.8k | } |
260 | | |
261 | | /* Converts an integer id to an optionally prefixed string id. */ |
262 | 21.4M | static const char *strid(igraph_int_t id, const char *prefix) { |
263 | 21.4M | static char name[100]; |
264 | 21.4M | snprintf(name, sizeof(name) / sizeof(char) - 1, "%s%" IGRAPH_PRId, prefix, id); |
265 | 21.4M | return name; |
266 | 21.4M | } |
267 | | |
268 | | /* Creates an empty attribute record or if it exists, updates its type as needed. |
269 | | * 'name' is the attribute name. 'type' is the current type in the GML tree, |
270 | | * which will determine the igraph attribute type to use. */ |
271 | | static igraph_error_t create_or_update_attribute(const char *name, |
272 | | igraph_i_gml_tree_type_t type, |
273 | | igraph_trie_t *attrnames, |
274 | 11.5M | igraph_attribute_record_list_t *attrs) { |
275 | | |
276 | 11.5M | igraph_int_t trieid, triesize = igraph_trie_size(attrnames); |
277 | 11.5M | igraph_attribute_type_t desired_type; |
278 | | |
279 | 11.5M | IGRAPH_CHECK(igraph_trie_get(attrnames, name, &trieid)); |
280 | 11.5M | if (trieid == triesize) { |
281 | | /* new attribute */ |
282 | 164k | igraph_attribute_record_t atrec; |
283 | | |
284 | 164k | if (type == IGRAPH_I_GML_TREE_INTEGER || type == IGRAPH_I_GML_TREE_REAL) { |
285 | 69.6k | desired_type = IGRAPH_ATTRIBUTE_NUMERIC; |
286 | 94.5k | } else if (type == IGRAPH_I_GML_TREE_STRING) { |
287 | 77.2k | desired_type = IGRAPH_ATTRIBUTE_STRING; |
288 | 77.2k | } else { |
289 | 17.2k | desired_type = IGRAPH_ATTRIBUTE_UNSPECIFIED; |
290 | 17.2k | } |
291 | | |
292 | 164k | IGRAPH_CHECK(igraph_attribute_record_init(&atrec, name, desired_type)); |
293 | 164k | IGRAPH_FINALLY(igraph_attribute_record_destroy, &atrec); |
294 | 164k | IGRAPH_CHECK(igraph_attribute_record_list_push_back(attrs, &atrec)); |
295 | 164k | IGRAPH_FINALLY_CLEAN(1); /* ownership of 'atrec' taken by 'attrs' */ |
296 | 11.4M | } else { |
297 | | /* already seen, should we update type? */ |
298 | 11.4M | igraph_attribute_record_t *atrec = igraph_attribute_record_list_get_ptr(attrs, trieid); |
299 | 11.4M | igraph_attribute_type_t existing_type = atrec->type; |
300 | 11.4M | if (type == IGRAPH_I_GML_TREE_STRING) { |
301 | 3.35M | if (existing_type != IGRAPH_ATTRIBUTE_STRING) { |
302 | 751 | IGRAPH_CHECK(igraph_attribute_record_set_type(atrec, IGRAPH_ATTRIBUTE_STRING)); |
303 | 751 | } |
304 | 8.05M | } else if (existing_type == IGRAPH_ATTRIBUTE_UNSPECIFIED) { |
305 | 1.04M | if (type == IGRAPH_I_GML_TREE_INTEGER || type == IGRAPH_I_GML_TREE_REAL) { |
306 | 286 | IGRAPH_CHECK(igraph_attribute_record_set_type(atrec, IGRAPH_ATTRIBUTE_NUMERIC)); |
307 | 286 | } |
308 | 1.04M | } |
309 | 11.4M | } |
310 | | |
311 | 11.5M | return IGRAPH_SUCCESS; |
312 | 11.5M | } |
313 | | |
314 | | /* Allocates the contents of attribute records stored in 'attrs'. |
315 | | * 'no_of_items' is the length of attribute vectors, i.e. no_of_nodes, |
316 | | * no_of_edges, or 1 for vertex, edge and graph attributes. |
317 | | * The 'kind' parameter can be "vertex", "edge" or "graph", and |
318 | | * is used solely for showing better warning messages. */ |
319 | | static igraph_error_t allocate_attributes( |
320 | | igraph_attribute_record_list_t *attrs, igraph_int_t no_of_items, |
321 | | const char *kind |
322 | 51.5k | ) { |
323 | 51.5k | igraph_int_t i, n = igraph_attribute_record_list_size(attrs); |
324 | 212k | for (i = 0; i < n; i++) { |
325 | 161k | igraph_attribute_record_t *atrec = igraph_attribute_record_list_get_ptr(attrs, i); |
326 | | |
327 | | /* We have unspecified attribute types in the attribute record list at |
328 | | * this point because we need to keep the same order in the attribute |
329 | | * record list as it was in the trie that we use to look up an |
330 | | * attribute record by name. However, we cannot resize unknown attributes |
331 | | * so we need to take care of this */ |
332 | 161k | if (atrec->type != IGRAPH_ATTRIBUTE_UNSPECIFIED) { |
333 | 144k | IGRAPH_CHECK(igraph_attribute_record_resize(atrec, no_of_items)); |
334 | 144k | } else { |
335 | 16.5k | IGRAPH_WARNINGF("Composite %s attribute '%s' ignored in GML file.", kind, atrec->name); |
336 | 16.5k | } |
337 | 161k | } |
338 | 51.5k | return IGRAPH_SUCCESS; |
339 | 51.5k | } |
340 | | |
341 | | /** |
342 | | * \function igraph_read_graph_gml |
343 | | * \brief Read a graph in GML format. |
344 | | * |
345 | | * GML is a simple textual format, see |
346 | | * https://web.archive.org/web/20190207140002/http://www.fim.uni-passau.de/index.php?id=17297%26L=1 |
347 | | * for details. |
348 | | * |
349 | | * </para><para> |
350 | | * Although all syntactically correct GML can be parsed, |
351 | | * we implement only a subset of this format. Some attributes might be |
352 | | * ignored. Here is a list of all the differences: |
353 | | * \olist |
354 | | * \oli Only attributes with a simple type are used: integer, real or |
355 | | * string. If an attribute is composite, i.e. an array or a record, |
356 | | * then it is ignored. When some values of the attribute are simple and |
357 | | * some compound, the composite ones are replaced with a default value |
358 | | * (NaN for numeric, <code>""</code> for string). |
359 | | * \oli <code>comment</code> fields are not ignored. They are treated as any |
360 | | * other field and converted to attributes. |
361 | | * \oli Top level attributes except for <code>Version</code> and the |
362 | | * first <code>graph</code> attribute are completely ignored. |
363 | | * \oli There is no maximum line length or maximum keyword length. |
364 | | * \oli Only the \c quot, \c amp, \c apos, \c lt and \c gt character entities |
365 | | * are supported. Any other entity is passed through unchanged by the reader |
366 | | * after issuing a warning, and is expected to be decoded by the user. |
367 | | * \oli We allow <code>inf</code>, <code>-inf</code> and <code>nan</code> |
368 | | * (not a number) as a real number. This is case insensitive, so |
369 | | * <code>nan</code>, <code>NaN</code> and <code>NAN</code> are equivalent. |
370 | | * \endolist |
371 | | * |
372 | | * </para><para> Please contact us if you cannot live with these |
373 | | * limitations of the GML parser. |
374 | | * |
375 | | * \param graph Pointer to an uninitialized graph object. |
376 | | * \param instream The stream to read the GML file from. |
377 | | * \return Error code. |
378 | | * |
379 | | * Time complexity: should be proportional to the length of the file. |
380 | | * |
381 | | * \sa \ref igraph_read_graph_graphml() for a more modern format, |
382 | | * \ref igraph_write_graph_gml() for writing GML files. |
383 | | * |
384 | | * \example examples/simple/gml.c |
385 | | */ |
386 | 18.7k | igraph_error_t igraph_read_graph_gml(igraph_t *graph, FILE *instream) { |
387 | | |
388 | 18.7k | igraph_int_t i; |
389 | 18.7k | igraph_int_t no_of_nodes = 0, no_of_edges = 0; |
390 | 18.7k | igraph_int_t node_no; |
391 | 18.7k | igraph_trie_t trie; |
392 | 18.7k | igraph_vector_int_t edges; |
393 | 18.7k | igraph_bool_t directed = IGRAPH_UNDIRECTED; |
394 | 18.7k | igraph_bool_t has_directed = false; |
395 | 18.7k | igraph_gml_tree_t *gtree; |
396 | 18.7k | igraph_int_t gidx; |
397 | 18.7k | igraph_trie_t vattrnames; |
398 | 18.7k | igraph_trie_t eattrnames; |
399 | 18.7k | igraph_trie_t gattrnames; |
400 | 18.7k | igraph_attribute_record_list_t gattrs, vattrs, eattrs; |
401 | 18.7k | igraph_attribute_record_list_t *attrs[3]; |
402 | 18.7k | igraph_int_t edgeptr = 0; |
403 | 18.7k | igraph_i_gml_parsedata_t context; |
404 | 18.7k | igraph_bool_t entity_warned = false; /* used to warn at most once about unsupported entities */ |
405 | | |
406 | 18.7k | attrs[0] = &gattrs; attrs[1] = &vattrs; attrs[2] = &eattrs; |
407 | | |
408 | 18.7k | IGRAPH_CHECK(igraph_i_gml_parsedata_init(&context)); |
409 | 18.7k | IGRAPH_FINALLY(igraph_i_gml_parsedata_destroy, &context); |
410 | | |
411 | 18.7k | igraph_gml_yylex_init_extra(&context, &context.scanner); |
412 | | |
413 | 18.7k | igraph_gml_yyset_in(instream, context.scanner); |
414 | | |
415 | | /* Protect 'context' from being destroyed before returning from yyparse() */ |
416 | 18.7k | IGRAPH_FINALLY_ENTER(); |
417 | 18.7k | int err = igraph_gml_yyparse(&context); |
418 | 18.7k | IGRAPH_FINALLY_EXIT(); |
419 | 18.7k | switch (err) { |
420 | 18.0k | case 0: /* success */ |
421 | 18.0k | break; |
422 | 679 | case 1: /* parse error */ |
423 | 679 | if (context.errmsg[0] != '\0') { |
424 | 679 | IGRAPH_ERROR(context.errmsg, IGRAPH_PARSEERROR); |
425 | 679 | } else if (context.igraph_errno != IGRAPH_SUCCESS) { |
426 | 0 | IGRAPH_ERROR("", context.igraph_errno); |
427 | 0 | } else { |
428 | 0 | IGRAPH_ERROR("Cannot read GML file.", IGRAPH_PARSEERROR); |
429 | 0 | } |
430 | 0 | break; |
431 | 0 | case 2: /* out of memory */ |
432 | 0 | IGRAPH_ERROR("Cannot read GML file.", IGRAPH_ENOMEM); /* LCOV_EXCL_LINE */ |
433 | 0 | break; |
434 | 0 | default: /* must never reach here */ |
435 | | /* Hint: This will usually be triggered if an IGRAPH_CHECK() is used in a Bison |
436 | | * action instead of an IGRAPH_YY_CHECK(), resulting in an igraph errno being |
437 | | * returned in place of a Bison error code. |
438 | | * TODO: What if future Bison versions introduce error codes other than 0, 1 and 2? |
439 | | */ |
440 | 0 | IGRAPH_FATALF("Parser returned unexpected error code (%d) when reading GML file.", err); /* LCOV_EXCL_LINE */ |
441 | 18.7k | } |
442 | | |
443 | | /* Check version, if present, integer and not '1' then ignored */ |
444 | 18.0k | i = igraph_gml_tree_find(context.tree, "Version", 0); |
445 | 18.0k | if (i >= 0 && |
446 | 9.22k | igraph_gml_tree_type(context.tree, i) == IGRAPH_I_GML_TREE_INTEGER && |
447 | 9.20k | igraph_gml_tree_get_integer(context.tree, i) != 1) { |
448 | 195 | IGRAPH_WARNINGF("Unknown GML version: %" IGRAPH_PRId ". " |
449 | 195 | "Parsing will continue assuming GML version 1, but may fail.", |
450 | 195 | igraph_gml_tree_get_integer(context.tree, i)); |
451 | 195 | } |
452 | | |
453 | | /* Get the graph */ |
454 | 18.0k | gidx = igraph_gml_tree_find(context.tree, "graph", 0); |
455 | 18.0k | if (gidx == -1) { |
456 | 597 | IGRAPH_ERROR("No 'graph' object in GML file.", IGRAPH_PARSEERROR); |
457 | 597 | } |
458 | 17.4k | if (igraph_gml_tree_type(context.tree, gidx) != |
459 | 17.4k | IGRAPH_I_GML_TREE_TREE) { |
460 | 7 | IGRAPH_ERRORF("Invalid type for 'graph' object in GML file, line %" IGRAPH_PRId ".", IGRAPH_PARSEERROR, |
461 | 7 | igraph_gml_tree_line(context.tree, gidx)); |
462 | 7 | } |
463 | 17.4k | gtree = igraph_gml_tree_get_tree(context.tree, gidx); |
464 | | |
465 | 17.4k | IGRAPH_CHECK(igraph_attribute_record_list_init(&gattrs, 0)); |
466 | 17.4k | IGRAPH_FINALLY(igraph_attribute_record_list_destroy, &gattrs); |
467 | 17.4k | IGRAPH_CHECK(igraph_attribute_record_list_init(&vattrs, 0)); |
468 | 17.4k | IGRAPH_FINALLY(igraph_attribute_record_list_destroy, &vattrs); |
469 | 17.4k | IGRAPH_CHECK(igraph_attribute_record_list_init(&eattrs, 0)); |
470 | 17.4k | IGRAPH_FINALLY(igraph_attribute_record_list_destroy, &eattrs); |
471 | | |
472 | 17.4k | IGRAPH_TRIE_INIT_FINALLY(&trie, 0); |
473 | 17.4k | IGRAPH_TRIE_INIT_FINALLY(&vattrnames, 0); |
474 | 17.4k | IGRAPH_TRIE_INIT_FINALLY(&eattrnames, 0); |
475 | 17.4k | IGRAPH_TRIE_INIT_FINALLY(&gattrnames, 0); |
476 | | |
477 | | /* Now we go over all objects in the graph to |
478 | | * - collect the attribute names and types |
479 | | * - collect node IDs |
480 | | * - set directedness |
481 | | * - do some checks which the following code relies on |
482 | | * |
483 | | * The 'id' fields of 'node' objects are converted into strings, so that they |
484 | | * can be inserted into a trie and re-encoded as consecutive integers starting |
485 | | * at 0. The GML spec allows isolated nodes with no 'id' field. These get a |
486 | | * generated string id of the form "n123" consisting of "n" and their count |
487 | | * (i.e. ordinal position) within the GML file. |
488 | | * |
489 | | * We use an attribute type value of IGRAPH_ATTRIBUTE_UNSPECIFIED to mark attribute |
490 | | * records which correspond to composite GML values and must therefore be removed |
491 | | * before creating the graph. |
492 | | */ |
493 | 17.4k | node_no = 0; |
494 | 11.6M | for (i = 0; i < igraph_gml_tree_length(gtree); i++) { |
495 | 11.6M | const char *name = igraph_gml_tree_name(gtree, i); |
496 | 11.6M | if (!strcmp(name, "node")) { |
497 | 10.0M | igraph_gml_tree_t *node; |
498 | 10.0M | igraph_bool_t hasid; |
499 | 10.0M | node_no++; |
500 | 10.0M | no_of_nodes++; |
501 | 10.0M | if (igraph_gml_tree_type(gtree, i) != IGRAPH_I_GML_TREE_TREE) { |
502 | 12 | IGRAPH_ERRORF("'node' is not a list in GML file, line %" IGRAPH_PRId ".", IGRAPH_PARSEERROR, |
503 | 12 | igraph_gml_tree_line(gtree, i)); |
504 | 12 | } |
505 | 10.0M | node = igraph_gml_tree_get_tree(gtree, i); |
506 | 10.0M | hasid = false; |
507 | 18.0M | for (igraph_int_t j = 0; j < igraph_gml_tree_length(node); j++) { |
508 | 8.03M | const char *name = igraph_gml_tree_name(node, j); |
509 | 8.03M | igraph_i_gml_tree_type_t type = igraph_gml_tree_type(node, j); |
510 | 8.03M | IGRAPH_CHECK(create_or_update_attribute(name, type, &vattrnames, &vattrs)); |
511 | | /* check id */ |
512 | 8.03M | if (!strcmp(name, "id")) { |
513 | 3.44M | igraph_int_t id, trie_id; |
514 | 3.44M | igraph_int_t trie_size = igraph_trie_size(&trie); |
515 | 3.44M | if (hasid) { |
516 | | /* A 'node' must not have more than one 'id' field. |
517 | | * This error cannot be relaxed into a warning because all ids we find are |
518 | | * added to the trie, and eventually converted to igraph vertex ids. */ |
519 | 3 | IGRAPH_ERRORF("Node has multiple 'id' fields in GML file, line %" IGRAPH_PRId ".", |
520 | 3 | IGRAPH_PARSEERROR, |
521 | 3 | igraph_gml_tree_line(node, j)); |
522 | 3 | } |
523 | 3.44M | if (type != IGRAPH_I_GML_TREE_INTEGER) { |
524 | 6 | IGRAPH_ERRORF("Non-integer node id in GML file, line %" IGRAPH_PRId ".", IGRAPH_PARSEERROR, |
525 | 6 | igraph_gml_tree_line(node, j)); |
526 | 6 | } |
527 | 3.44M | id = igraph_gml_tree_get_integer(node, j); |
528 | 3.44M | IGRAPH_CHECK(igraph_trie_get(&trie, strid(id, ""), &trie_id)); |
529 | 3.44M | if (trie_id != trie_size) { |
530 | | /* This id has already been seen in a previous node. */ |
531 | 18 | IGRAPH_ERRORF("Duplicate node id in GML file, line %" IGRAPH_PRId ".", IGRAPH_PARSEERROR, |
532 | 18 | igraph_gml_tree_line(node, j)); |
533 | 18 | } |
534 | 3.44M | hasid = true; |
535 | 3.44M | } |
536 | 8.03M | } |
537 | 10.0M | if (!hasid) { |
538 | | /* Isolated nodes are allowed not to have an id. |
539 | | * We generate an "n"-prefixed string id to be used in the trie. */ |
540 | 6.58M | igraph_int_t trie_id; |
541 | 6.58M | IGRAPH_CHECK(igraph_trie_get(&trie, strid(node_no, "n"), &trie_id)); |
542 | 6.58M | } |
543 | 10.0M | } else if (!strcmp(name, "edge")) { |
544 | 726k | igraph_gml_tree_t *edge; |
545 | 726k | igraph_bool_t has_source = false, has_target = false; |
546 | 726k | no_of_edges++; |
547 | 726k | if (igraph_gml_tree_type(gtree, i) != IGRAPH_I_GML_TREE_TREE) { |
548 | 7 | IGRAPH_ERRORF("'edge' is not a list in GML file, line %" IGRAPH_PRId ".", IGRAPH_PARSEERROR, |
549 | 7 | igraph_gml_tree_line(gtree, i)); |
550 | 7 | } |
551 | 726k | edge = igraph_gml_tree_get_tree(gtree, i); |
552 | 4.86M | for (igraph_int_t j = 0; j < igraph_gml_tree_length(edge); j++) { |
553 | 4.13M | const char *name = igraph_gml_tree_name(edge, j); |
554 | 4.13M | igraph_i_gml_tree_type_t type = igraph_gml_tree_type(edge, j); |
555 | 4.13M | if (!strcmp(name, "source")) { |
556 | 726k | if (has_source) { |
557 | | /* An edge must not have more than one 'source' field. |
558 | | * This could be relaxed to a warning, but we keep it as an error |
559 | | * for consistency with the handling of duplicate node 'id' field, |
560 | | * and because it indicates a serious corruption in the GML file. */ |
561 | 2 | IGRAPH_ERRORF("Duplicate 'source' in an edge in GML file, line %" IGRAPH_PRId ".", |
562 | 2 | IGRAPH_PARSEERROR, |
563 | 2 | igraph_gml_tree_line(edge, j)); |
564 | 2 | } |
565 | 726k | has_source = true; |
566 | 726k | if (type != IGRAPH_I_GML_TREE_INTEGER) { |
567 | 4 | IGRAPH_ERRORF("Non-integer 'source' for an edge in GML file, line %" IGRAPH_PRId ".", |
568 | 4 | IGRAPH_PARSEERROR, |
569 | 4 | igraph_gml_tree_line(edge, j)); |
570 | 4 | } |
571 | 3.40M | } else if (!strcmp(name, "target")) { |
572 | 726k | if (has_target) { |
573 | | /* An edge must not have more than one 'target' field. */ |
574 | 2 | IGRAPH_ERRORF("Duplicate 'target' in an edge in GML file, line %" IGRAPH_PRId ".", |
575 | 2 | IGRAPH_PARSEERROR, |
576 | 2 | igraph_gml_tree_line(edge, j)); |
577 | 2 | } |
578 | 726k | has_target = true; |
579 | 726k | if (type != IGRAPH_I_GML_TREE_INTEGER) { |
580 | 3 | IGRAPH_ERRORF("Non-integer 'target' for an edge in GML file, line %" IGRAPH_PRId ".", |
581 | 3 | IGRAPH_PARSEERROR, |
582 | 3 | igraph_gml_tree_line(edge, j)); |
583 | 3 | } |
584 | 2.68M | } else { |
585 | 2.68M | IGRAPH_CHECK(create_or_update_attribute(name, type, &eattrnames, &eattrs)); |
586 | 2.68M | } |
587 | 4.13M | } /* for */ |
588 | 726k | if (!has_source) { |
589 | 210 | IGRAPH_ERRORF("No 'source' for edge in GML file, line %" IGRAPH_PRId ".", IGRAPH_PARSEERROR, |
590 | 210 | igraph_gml_tree_line(gtree, i)); |
591 | 210 | } |
592 | 726k | if (!has_target) { |
593 | 14 | IGRAPH_ERRORF("No 'target' for edge in GML file, line %" IGRAPH_PRId ".", IGRAPH_PARSEERROR, |
594 | 14 | igraph_gml_tree_line(gtree, i)); |
595 | 14 | } |
596 | 888k | } else if (! strcmp(name, "directed")) { |
597 | | /* Set directedness of graph. */ |
598 | 20.1k | if (has_directed) { |
599 | | /* Be tolerant of duplicate entries, but do show a warning. */ |
600 | 9.89k | IGRAPH_WARNINGF("Duplicate 'directed' field in 'graph', line %" IGRAPH_PRId ". " |
601 | 9.89k | "Ignoring previous 'directed' fields.", |
602 | 9.89k | igraph_gml_tree_line(gtree, i)); |
603 | 9.89k | } |
604 | 20.1k | if (igraph_gml_tree_type(gtree, i) == IGRAPH_I_GML_TREE_INTEGER) { |
605 | 19.5k | igraph_int_t dir = igraph_gml_tree_get_integer(gtree, i); |
606 | 19.5k | if (dir != 0 && dir != 1) { |
607 | 5.71k | IGRAPH_WARNINGF( |
608 | 5.71k | "Invalid value %" IGRAPH_PRId " for 'directed' attribute on line %" IGRAPH_PRId ", should be 0 or 1.", |
609 | 5.71k | dir, igraph_gml_tree_line(gtree, i)); |
610 | 5.71k | } |
611 | 19.5k | if (dir) { |
612 | 10.7k | directed = IGRAPH_DIRECTED; |
613 | 10.7k | } |
614 | 19.5k | has_directed = true; |
615 | 19.5k | } else { |
616 | 644 | IGRAPH_WARNINGF("Invalid type for 'directed' attribute on line %" IGRAPH_PRId ", assuming undirected.", |
617 | 644 | igraph_gml_tree_line(gtree, i)); |
618 | 644 | } |
619 | 867k | } else { |
620 | | /* Add the rest of items as graph attributes. */ |
621 | 867k | igraph_i_gml_tree_type_t type = igraph_gml_tree_type(gtree, i); |
622 | 867k | IGRAPH_CHECK(create_or_update_attribute(name, type, &gattrnames, &gattrs)); |
623 | 867k | } |
624 | 11.6M | } |
625 | | |
626 | | /* At this point, all nodes must have an id (from the file or generated) stored |
627 | | * in the trie. Any condition that violates this should have been caught during |
628 | | * the preceding checks. */ |
629 | 17.1k | IGRAPH_ASSERT(igraph_trie_size(&trie) == no_of_nodes); |
630 | | |
631 | | /* Now we allocate the vectors and strvectors for the attributes */ |
632 | 17.1k | IGRAPH_CHECK(allocate_attributes(&vattrs, no_of_nodes, "vertex")); |
633 | 17.1k | IGRAPH_CHECK(allocate_attributes(&eattrs, no_of_edges, "edge")); |
634 | 17.1k | IGRAPH_CHECK(allocate_attributes(&gattrs, 1, "graph")); |
635 | | |
636 | | /* Add edges, edge attributes and vertex attributes */ |
637 | 17.1k | IGRAPH_VECTOR_INT_INIT_FINALLY(&edges, no_of_edges * 2); |
638 | 17.1k | node_no = 0; |
639 | 11.5M | for (i = 0; i < igraph_gml_tree_length(gtree); i++) { |
640 | 11.5M | const char *name; |
641 | 11.5M | name = igraph_gml_tree_name(gtree, i); |
642 | 11.5M | if (!strcmp(name, "node")) { |
643 | 9.91M | igraph_gml_tree_t *node = igraph_gml_tree_get_tree(gtree, i); |
644 | 9.91M | igraph_int_t iidx = igraph_gml_tree_find(node, "id", 0); |
645 | 9.91M | igraph_int_t trie_id; |
646 | 9.91M | const char *sid; |
647 | 9.91M | node_no++; |
648 | 9.91M | if (iidx < 0) { |
649 | | /* Isolated node with no id field, use n-prefixed generated id */ |
650 | 6.46M | sid = strid(node_no, "n"); |
651 | 6.46M | } else { |
652 | 3.44M | sid = strid(igraph_gml_tree_get_integer(node, iidx), ""); |
653 | 3.44M | } |
654 | 9.91M | IGRAPH_CHECK(igraph_trie_get(&trie, sid, &trie_id)); |
655 | 17.9M | for (igraph_int_t j = 0; j < igraph_gml_tree_length(node); j++) { |
656 | 8.02M | const char *aname = igraph_gml_tree_name(node, j); |
657 | 8.02M | igraph_attribute_record_t *atrec; |
658 | 8.02M | igraph_attribute_type_t type; |
659 | 8.02M | igraph_int_t ai; |
660 | 8.02M | IGRAPH_CHECK(igraph_trie_get(&vattrnames, aname, &ai)); |
661 | 8.02M | atrec = igraph_attribute_record_list_get_ptr(&vattrs, ai); |
662 | 8.02M | type = atrec->type; |
663 | 8.02M | if (type == IGRAPH_ATTRIBUTE_NUMERIC) { |
664 | 4.56M | VECTOR(*atrec->value.as_vector)[trie_id] = igraph_i_gml_toreal(node, j); |
665 | 4.56M | } else if (type == IGRAPH_ATTRIBUTE_STRING) { |
666 | 3.13M | igraph_strvector_t *v = atrec->value.as_strvector; |
667 | 3.13M | const char *value = igraph_i_gml_tostring(node, j); |
668 | 3.13M | if (needs_coding(value)) { |
669 | 10.5k | char *value_decoded; |
670 | 10.5k | IGRAPH_CHECK(entity_decode(value, &value_decoded, &entity_warned)); |
671 | 10.5k | IGRAPH_FINALLY(igraph_free, value_decoded); |
672 | 10.5k | IGRAPH_CHECK(igraph_strvector_set(v, trie_id, value_decoded)); |
673 | 10.5k | IGRAPH_FREE(value_decoded); |
674 | 10.5k | IGRAPH_FINALLY_CLEAN(1); |
675 | 3.12M | } else { |
676 | 3.12M | IGRAPH_CHECK(igraph_strvector_set(v, trie_id, value)); |
677 | 3.12M | } |
678 | 3.13M | } else { |
679 | | /* Ignored composite attribute */ |
680 | 338k | } |
681 | 8.02M | } |
682 | 9.91M | } else if (!strcmp(name, "edge")) { |
683 | 725k | igraph_gml_tree_t *edge; |
684 | 725k | igraph_int_t from, to, fromidx = 0, toidx = 0; |
685 | 725k | edge = igraph_gml_tree_get_tree(gtree, i); |
686 | 4.77M | for (igraph_int_t j = 0; j < igraph_gml_tree_length(edge); j++) { |
687 | 4.04M | const char *aname = igraph_gml_tree_name(edge, j); |
688 | 4.04M | if (!strcmp(aname, "source")) { |
689 | 725k | fromidx = igraph_gml_tree_find(edge, "source", 0); |
690 | 3.32M | } else if (!strcmp(aname, "target")) { |
691 | 725k | toidx = igraph_gml_tree_find(edge, "target", 0); |
692 | 2.59M | } else { |
693 | 2.59M | igraph_int_t edgeid = edgeptr / 2; |
694 | 2.59M | igraph_int_t ai; |
695 | 2.59M | igraph_attribute_record_t *atrec; |
696 | 2.59M | igraph_attribute_type_t type; |
697 | 2.59M | IGRAPH_CHECK(igraph_trie_get(&eattrnames, aname, &ai)); |
698 | 2.59M | atrec = igraph_attribute_record_list_get_ptr(&eattrs, ai); |
699 | 2.59M | type = atrec->type; |
700 | 2.59M | if (type == IGRAPH_ATTRIBUTE_NUMERIC) { |
701 | 1.00M | VECTOR(*atrec->value.as_vector)[edgeid] = igraph_i_gml_toreal(edge, j); |
702 | 1.59M | } else if (type == IGRAPH_ATTRIBUTE_STRING) { |
703 | 1.56M | igraph_strvector_t *v = atrec->value.as_strvector; |
704 | 1.56M | const char *value = igraph_i_gml_tostring(edge, j); |
705 | 1.56M | if (needs_coding(value)) { |
706 | 4.88k | char *value_decoded; |
707 | 4.88k | IGRAPH_CHECK(entity_decode(value, &value_decoded, &entity_warned)); |
708 | 4.88k | IGRAPH_FINALLY(igraph_free, value_decoded); |
709 | 4.88k | IGRAPH_CHECK(igraph_strvector_set(v, edgeid, value_decoded)); |
710 | 4.88k | IGRAPH_FREE(value_decoded); |
711 | 4.88k | IGRAPH_FINALLY_CLEAN(1); |
712 | 1.56M | } else { |
713 | 1.56M | IGRAPH_CHECK(igraph_strvector_set(v, edgeid, value)); |
714 | 1.56M | } |
715 | 1.56M | } else { |
716 | | /* Ignored composite attribute */ |
717 | 28.0k | } |
718 | 2.59M | } |
719 | 4.04M | } |
720 | 725k | from = igraph_gml_tree_get_integer(edge, fromidx); |
721 | 725k | to = igraph_gml_tree_get_integer(edge, toidx); |
722 | 725k | IGRAPH_CHECK(igraph_trie_check(&trie, strid(from, ""), &from)); |
723 | 725k | if (from < 0) { |
724 | 229 | IGRAPH_ERRORF("Unknown source node id found in an edge in GML file, line %" IGRAPH_PRId ".", |
725 | 229 | IGRAPH_PARSEERROR, igraph_gml_tree_line(edge, fromidx)); |
726 | 229 | } |
727 | 725k | IGRAPH_CHECK(igraph_trie_check(&trie, strid(to, ""), &to)); |
728 | 725k | if (to < 0) { |
729 | 11 | IGRAPH_ERRORF("Unknown target node id found in an edge in GML file, line %" IGRAPH_PRId ".", |
730 | 11 | IGRAPH_PARSEERROR, igraph_gml_tree_line(edge, toidx)); |
731 | 11 | } |
732 | 725k | VECTOR(edges)[edgeptr++] = from; |
733 | 725k | VECTOR(edges)[edgeptr++] = to; |
734 | 880k | } else if (! strcmp(name, "directed")) { |
735 | | /* Nothing to do for 'directed' field, already handled earlier. */ |
736 | 860k | } else { |
737 | | /* Set the rest as graph attributes */ |
738 | 860k | igraph_int_t ai; |
739 | 860k | igraph_attribute_record_t *atrec; |
740 | 860k | igraph_attribute_type_t type; |
741 | 860k | IGRAPH_CHECK(igraph_trie_get(&gattrnames, name, &ai)); |
742 | 860k | atrec = igraph_attribute_record_list_get_ptr(&gattrs, ai); |
743 | 860k | type = atrec->type; |
744 | 860k | if (type == IGRAPH_ATTRIBUTE_NUMERIC) { |
745 | 201k | VECTOR(*atrec->value.as_vector)[0] = igraph_i_gml_toreal(gtree, i); |
746 | 659k | } else if (type == IGRAPH_ATTRIBUTE_STRING) { |
747 | 30.3k | igraph_strvector_t *v = atrec->value.as_strvector; |
748 | 30.3k | const char *value = igraph_i_gml_tostring(gtree, i); |
749 | 30.3k | if (needs_coding(value)) { |
750 | 1.41k | char *value_decoded; |
751 | 1.41k | IGRAPH_CHECK(entity_decode(value, &value_decoded, &entity_warned)); |
752 | 1.41k | IGRAPH_FINALLY(igraph_free, value_decoded); |
753 | 1.41k | IGRAPH_CHECK(igraph_strvector_set(v, 0, value_decoded)); |
754 | 1.41k | IGRAPH_FREE(value_decoded); |
755 | 1.41k | IGRAPH_FINALLY_CLEAN(1); |
756 | 28.9k | } else { |
757 | 28.9k | IGRAPH_CHECK(igraph_strvector_set(v, 0, value)); |
758 | 28.9k | } |
759 | 628k | } else { |
760 | | /* Ignored composite attribute */ |
761 | 628k | } |
762 | 860k | } |
763 | 11.5M | } |
764 | | |
765 | | /* Remove composite attributes that we cannot represent in igraph */ |
766 | 16.9k | IGRAPH_CHECK(prune_unknown_attributes(&vattrs)); |
767 | 16.9k | IGRAPH_CHECK(prune_unknown_attributes(&eattrs)); |
768 | 16.9k | IGRAPH_CHECK(prune_unknown_attributes(&gattrs)); |
769 | | |
770 | 16.9k | igraph_trie_destroy(&trie); |
771 | 16.9k | igraph_trie_destroy(&gattrnames); |
772 | 16.9k | igraph_trie_destroy(&vattrnames); |
773 | 16.9k | igraph_trie_destroy(&eattrnames); |
774 | 16.9k | IGRAPH_FINALLY_CLEAN(4); |
775 | | |
776 | 16.9k | IGRAPH_CHECK(igraph_empty_attrs(graph, 0, directed, &gattrs)); |
777 | 16.9k | IGRAPH_FINALLY(igraph_destroy, graph); |
778 | 16.9k | IGRAPH_CHECK(igraph_add_vertices(graph, no_of_nodes, &vattrs)); |
779 | 16.9k | IGRAPH_CHECK(igraph_add_edges(graph, &edges, &eattrs)); |
780 | 16.9k | IGRAPH_FINALLY_CLEAN(1); /* do not destroy 'graph', just pop it from the stack */ |
781 | | |
782 | 16.9k | igraph_vector_int_destroy(&edges); |
783 | 16.9k | igraph_attribute_record_list_destroy(&eattrs); |
784 | 16.9k | igraph_attribute_record_list_destroy(&vattrs); |
785 | 16.9k | igraph_attribute_record_list_destroy(&gattrs); |
786 | 16.9k | igraph_i_gml_parsedata_destroy(&context); |
787 | 16.9k | IGRAPH_FINALLY_CLEAN(5); |
788 | | |
789 | 16.9k | return IGRAPH_SUCCESS; |
790 | 16.9k | } |
791 | | |
792 | 21.2M | static igraph_error_t igraph_i_gml_convert_to_key(const char *orig, char **key) { |
793 | 21.2M | char strno[50]; |
794 | 21.2M | size_t i, len = strlen(orig), newlen = 0, plen = 0; |
795 | | |
796 | | /* do we need a prefix? */ |
797 | 21.2M | if (len == 0 || !isalpha(orig[0])) { |
798 | 49.3k | snprintf(strno, sizeof(strno) - 1, "igraph"); |
799 | 49.3k | plen = newlen = strlen(strno); |
800 | 49.3k | } |
801 | 508M | for (i = 0; i < len; i++) { |
802 | 487M | if (isalnum(orig[i])) { |
803 | 350M | newlen++; |
804 | 350M | } |
805 | 487M | } |
806 | 21.2M | *key = IGRAPH_CALLOC(newlen + 1, char); |
807 | 21.2M | IGRAPH_CHECK_OOM(*key, "Writing GML format failed."); |
808 | 21.2M | memcpy(*key, strno, plen * sizeof(char)); |
809 | 508M | for (i = 0; i < len; i++) { |
810 | 487M | if (isalnum(orig[i])) { |
811 | 350M | (*key)[plen++] = orig[i]; |
812 | 350M | } |
813 | 487M | } |
814 | 21.2M | (*key)[newlen] = '\0'; |
815 | | |
816 | 21.2M | return IGRAPH_SUCCESS; |
817 | 21.2M | } |
818 | | |
819 | | /* Checks if a vector is free of duplicates. Since NaN == NaN is false, duplicate NaN values |
820 | | * will not be detected. */ |
821 | 2.24k | static igraph_error_t igraph_i_vector_is_duplicate_free(const igraph_vector_t *v, igraph_bool_t *res) { |
822 | 2.24k | igraph_vector_t u; |
823 | 2.24k | igraph_int_t n = igraph_vector_size(v); |
824 | | |
825 | 2.24k | if (n < 2) { |
826 | 1.31k | *res = true; |
827 | 1.31k | return IGRAPH_SUCCESS; |
828 | 1.31k | } |
829 | | |
830 | 930 | IGRAPH_CHECK(igraph_vector_init_copy(&u, v)); |
831 | 930 | IGRAPH_FINALLY(igraph_vector_destroy, &u); |
832 | 930 | igraph_vector_sort(&u); |
833 | | |
834 | 930 | *res = true; |
835 | 40.8k | for (igraph_int_t i=1; i < n; i++) { |
836 | 39.9k | if (VECTOR(u)[i-1] == VECTOR(u)[i]) { |
837 | 27 | *res = false; |
838 | 27 | break; |
839 | 27 | } |
840 | 39.9k | } |
841 | | |
842 | 930 | igraph_vector_destroy(&u); |
843 | 930 | IGRAPH_FINALLY_CLEAN(1); |
844 | | |
845 | 930 | return IGRAPH_SUCCESS; |
846 | 930 | } |
847 | | |
848 | 32.4M | #define CHECK(cmd) do { int ret=cmd; if (ret<0) IGRAPH_ERROR("Writing GML format failed.", IGRAPH_EFILE); } while (0) |
849 | | |
850 | | /** |
851 | | * \function igraph_write_graph_gml |
852 | | * \brief Write the graph to a stream in GML format. |
853 | | * |
854 | | * GML is a quite general textual format, see |
855 | | * https://web.archive.org/web/20190207140002/http://www.fim.uni-passau.de/index.php?id=17297%26L=1 |
856 | | * for details. |
857 | | * |
858 | | * </para><para> |
859 | | * The graph, vertex and edges attributes are written to the |
860 | | * file as well, if they are numeric or string. Boolean attributes are converted |
861 | | * to numeric, with 0 and 1 used for false and true, respectively. |
862 | | * NaN values of numeric attributes are skipped, as NaN is not part of the GML |
863 | | * specification and other software may not be able to read files containing them. |
864 | | * This is consistent with \ref igraph_read_graph_gml(), which produces NaN |
865 | | * when an attribute value is missing. In contrast with NaN, infinite values |
866 | | * are retained. Ensure that none of the numeric attributes values are infinite |
867 | | * to produce a conformant GML file that can be read by other software. |
868 | | * |
869 | | * </para><para> |
870 | | * As igraph is more forgiving about attribute names, it might |
871 | | * be necessary to simplify the them before writing to the GML file. |
872 | | * This way we'll have a syntactically correct GML file. The following |
873 | | * simple procedure is performed on each attribute name: first the alphanumeric |
874 | | * characters are extracted, the others are ignored. Then if the first character |
875 | | * is not a letter then the attribute name is prefixed with <quote>igraph</quote>. |
876 | | * Note that this might result identical names for two attributes, igraph |
877 | | * does not check this. |
878 | | * |
879 | | * </para><para> |
880 | | * The <quote>id</quote> vertex attribute is treated specially. |
881 | | * If the <parameter>id</parameter> argument is not \c NULL then it should be a numeric |
882 | | * vector with the vertex IDs and the <quote>id</quote> vertex attribute is |
883 | | * ignored (if there is one). If <parameter>id</parameter> is \c NULL and there is a |
884 | | * numeric <quote>id</quote> vertex attribute, it will be used instead. If ids |
885 | | * are not specified in either way then the regular igraph vertex IDs are used. |
886 | | * If some of the supplied id values are invalid (non-integer or NaN), all supplied |
887 | | * id are ignored and igraph vertex IDs are used instead. |
888 | | * |
889 | | * </para><para> |
890 | | * Note that whichever way vertex IDs are specified, their uniqueness is not checked. |
891 | | * |
892 | | * </para><para> |
893 | | * If the graph has edge attributes that become <quote>source</quote> |
894 | | * or <quote>target</quote> after encoding, or the graph has an attribute that becomes |
895 | | * <quote>directed</quote>, they will be ignored with a warning. GML uses these attributes |
896 | | * to specify the edge endpoints, and the graph directedness, so we cannot write them |
897 | | * to the file. Rename them before calling this function if you want to preserve them. |
898 | | * |
899 | | * \param graph The graph to write to the stream. |
900 | | * \param outstream The stream to write the file to. |
901 | | * \param options Set of <code>|</code>-combinable boolean flags for writing the GML file. |
902 | | * \clist |
903 | | * \cli 0 |
904 | | * All options turned off. |
905 | | * \cli IGRAPH_WRITE_GML_DEFAULT_SW |
906 | | * Default options, currently equivalent to 0. May change in future versions. |
907 | | * \cli IGRAPH_WRITE_GML_ENCODE_ONLY_QUOT_SW |
908 | | * Do not encode any other characters than " as entities. Specifically, this |
909 | | * option prevents the encoding of &. Useful when re-exporting a graph |
910 | | * that was read from a GML file in which igraph could not interpret all entities, |
911 | | * and thus passed them through without decoding. |
912 | | * \endclist |
913 | | * \param id Either <code>NULL</code> or a numeric vector with the vertex IDs. |
914 | | * See details above. |
915 | | * \param creator An optional string to write to the stream in the creator line. |
916 | | * If \c NULL, the igraph version with the current date and time is added. |
917 | | * If <code>""</code>, the creator line is omitted. Otherwise, the |
918 | | * supplied string is used verbatim. |
919 | | * \return Error code. |
920 | | * |
921 | | * Time complexity: should be proportional to the number of characters written |
922 | | * to the file. |
923 | | * |
924 | | * \sa \ref igraph_read_graph_gml() for reading GML files, |
925 | | * \ref igraph_read_graph_graphml() for a more modern format. |
926 | | * |
927 | | * \example examples/simple/gml.c |
928 | | */ |
929 | | |
930 | | igraph_error_t igraph_write_graph_gml(const igraph_t *graph, FILE *outstream, |
931 | | igraph_write_gml_sw_t options, |
932 | 11.3k | const igraph_vector_t *id, const char *creator) { |
933 | 11.3k | igraph_strvector_t gnames, vnames, enames; /* attribute names */ |
934 | 11.3k | igraph_vector_int_t gtypes, vtypes, etypes; /* attribute types */ |
935 | 11.3k | igraph_int_t gattr_no, vattr_no, eattr_no; /* attribute counts */ |
936 | 11.3k | igraph_vector_t numv; |
937 | 11.3k | igraph_strvector_t strv; |
938 | 11.3k | igraph_vector_bool_t boolv; |
939 | 11.3k | igraph_int_t i; |
940 | 11.3k | igraph_int_t no_of_nodes = igraph_vcount(graph); |
941 | 11.3k | igraph_int_t no_of_edges = igraph_ecount(graph); |
942 | | |
943 | | /* Each element is a bit field used to prevent showing more |
944 | | * than one warning for each vertex or edge attribute. */ |
945 | 11.3k | igraph_vector_int_t warning_shown; |
946 | | |
947 | 11.3k | igraph_vector_t v_myid; |
948 | 11.3k | const igraph_vector_t *myid = id; |
949 | | |
950 | | /* Creator line */ |
951 | 11.3k | if (creator == NULL) { |
952 | 2.35k | time_t curtime = time(0); |
953 | 2.35k | char *timestr = ctime(&curtime); |
954 | 2.35k | timestr[strlen(timestr) - 1] = '\0'; /* nicely remove \n */ |
955 | | |
956 | 2.35k | CHECK(fprintf(outstream, |
957 | 2.35k | "Creator \"igraph version %s %s\"\n", |
958 | 2.35k | IGRAPH_VERSION, timestr)); |
959 | 8.99k | } else if (creator[0] == '\0') { |
960 | | /* creator == "", omit Creator line */ |
961 | 8.99k | } else { |
962 | 8.99k | if (needs_coding(creator)) { |
963 | 0 | char *d; |
964 | 0 | IGRAPH_CHECK(entity_encode(creator, &d, IGRAPH_WRITE_GML_ENCODE_ONLY_QUOT_SW & options)); |
965 | 0 | IGRAPH_FINALLY(igraph_free, d); |
966 | 0 | CHECK(fprintf(outstream, |
967 | 0 | "Creator \"%s\"\n", |
968 | 0 | creator)); |
969 | 0 | IGRAPH_FREE(d); |
970 | 0 | IGRAPH_FINALLY_CLEAN(1); |
971 | 8.99k | } else { |
972 | 8.99k | CHECK(fprintf(outstream, |
973 | 8.99k | "Creator \"%s\"\n", |
974 | 8.99k | creator)); |
975 | 8.99k | } |
976 | 8.99k | } |
977 | | |
978 | | /* Version line */ |
979 | 11.3k | CHECK(fprintf(outstream, "Version 1\n")); |
980 | | |
981 | | /* The graph */ |
982 | 11.3k | CHECK(fprintf(outstream, "graph\n[\n")); |
983 | | |
984 | 11.3k | IGRAPH_STRVECTOR_INIT_FINALLY(&gnames, 0); |
985 | 11.3k | IGRAPH_STRVECTOR_INIT_FINALLY(&vnames, 0); |
986 | 11.3k | IGRAPH_STRVECTOR_INIT_FINALLY(&enames, 0); |
987 | 11.3k | IGRAPH_VECTOR_INT_INIT_FINALLY(>ypes, 0); |
988 | 11.3k | IGRAPH_VECTOR_INT_INIT_FINALLY(&vtypes, 0); |
989 | 11.3k | IGRAPH_VECTOR_INT_INIT_FINALLY(&etypes, 0); |
990 | 11.3k | IGRAPH_CHECK(igraph_i_attribute_get_info(graph, |
991 | 11.3k | &gnames, >ypes, |
992 | 11.3k | &vnames, &vtypes, |
993 | 11.3k | &enames, &etypes)); |
994 | 11.3k | gattr_no = igraph_vector_int_size(>ypes); |
995 | 11.3k | vattr_no = igraph_vector_int_size(&vtypes); |
996 | 11.3k | eattr_no = igraph_vector_int_size(&etypes); |
997 | | |
998 | 11.3k | IGRAPH_VECTOR_INIT_FINALLY(&numv, 1); |
999 | 11.3k | IGRAPH_STRVECTOR_INIT_FINALLY(&strv, 1); |
1000 | 11.3k | IGRAPH_VECTOR_BOOL_INIT_FINALLY(&boolv, 1); |
1001 | | |
1002 | | /* Check whether there is an 'id' node attribute if the supplied is 0 */ |
1003 | 11.3k | if (!id) { |
1004 | 11.3k | igraph_bool_t found = false; |
1005 | 51.6k | for (i = 0; i < igraph_vector_int_size(&vtypes); i++) { |
1006 | 43.2k | const char *n = igraph_strvector_get(&vnames, i); |
1007 | 43.2k | if (!strcmp(n, "id") && VECTOR(vtypes)[i] == IGRAPH_ATTRIBUTE_NUMERIC) { |
1008 | 2.96k | found = true; break; |
1009 | 2.96k | } |
1010 | 43.2k | } |
1011 | 11.3k | if (found) { |
1012 | 2.96k | IGRAPH_VECTOR_INIT_FINALLY(&v_myid, no_of_nodes); |
1013 | 2.96k | IGRAPH_CHECK(igraph_i_attribute_get_numeric_vertex_attr(graph, "id", |
1014 | 2.96k | igraph_vss_all(), |
1015 | 2.96k | &v_myid)); |
1016 | 2.96k | myid = &v_myid; |
1017 | 2.96k | } |
1018 | 11.3k | } |
1019 | | |
1020 | | /* Scan id vector for invalid values. If any are found, all ids are ignored. |
1021 | | * Invalid values may occur as a result of reading a GML file in which some |
1022 | | * nodes did not have an id, or by adding new vertices to a graph with an "id" |
1023 | | * attribute. In this case, the "id" attribute will contain NaN values. |
1024 | | */ |
1025 | 11.3k | if (myid) { |
1026 | 2.96k | if (igraph_vector_size(myid) != no_of_nodes) { |
1027 | 0 | IGRAPH_ERROR("Size of id vector must match vertex count.", IGRAPH_EINVAL); |
1028 | 0 | } |
1029 | 51.0k | for (i = 0; i < no_of_nodes; ++i) { |
1030 | 48.8k | igraph_real_t val = VECTOR(*myid)[i]; |
1031 | 48.8k | igraph_real_t trunc_val = trunc(val); |
1032 | 48.8k | if (! (val == trunc_val && igraph_i_is_real_representable_as_integer(trunc_val))) { |
1033 | 717 | IGRAPH_WARNINGF("%g is not a valid integer id for GML files, ignoring all supplied ids.", val); |
1034 | 717 | if (myid == &v_myid) { |
1035 | 717 | igraph_vector_destroy(&v_myid); |
1036 | 717 | IGRAPH_FINALLY_CLEAN(1); |
1037 | 717 | } |
1038 | 717 | myid = NULL; |
1039 | 717 | break; |
1040 | 717 | } |
1041 | 48.8k | } |
1042 | 2.96k | } |
1043 | | |
1044 | 11.3k | if (myid) { |
1045 | 2.24k | igraph_bool_t duplicate_free; |
1046 | 2.24k | IGRAPH_CHECK(igraph_i_vector_is_duplicate_free(myid, &duplicate_free)); |
1047 | 2.24k | if (! duplicate_free) { |
1048 | 27 | IGRAPH_WARNING("Duplicate id values found, ignoring supplies ids."); |
1049 | 27 | if (myid == &v_myid) { |
1050 | 27 | igraph_vector_destroy(&v_myid); |
1051 | 27 | IGRAPH_FINALLY_CLEAN(1); |
1052 | 27 | } |
1053 | 27 | myid = NULL; |
1054 | 27 | } |
1055 | 2.24k | } |
1056 | | |
1057 | | /* directedness */ |
1058 | 11.3k | CHECK(fprintf(outstream, " directed %i\n", igraph_is_directed(graph) ? 1 : 0)); |
1059 | | |
1060 | | /* Graph attributes first */ |
1061 | 27.8k | for (i = 0; i < gattr_no; i++) { |
1062 | 16.5k | const char *name; |
1063 | 16.5k | char *newname; |
1064 | 16.5k | name = igraph_strvector_get(&gnames, i); |
1065 | 16.5k | IGRAPH_CHECK(igraph_i_gml_convert_to_key(name, &newname)); |
1066 | 16.5k | IGRAPH_FINALLY(igraph_free, newname); |
1067 | 16.5k | if (!strcmp(newname, "directed")|| !strcmp(newname, "edge") || !strcmp(newname, "node")) { |
1068 | 130 | IGRAPH_WARNINGF("The graph attribute '%s' was ignored while writing GML format.", name); |
1069 | 16.3k | } else { |
1070 | 16.3k | if (VECTOR(gtypes)[i] == IGRAPH_ATTRIBUTE_NUMERIC) { |
1071 | 9.82k | IGRAPH_CHECK(igraph_i_attribute_get_numeric_graph_attr(graph, name, &numv)); |
1072 | | /* Treat NaN as missing, skip writing it. GML does not officially support NaN. */ |
1073 | 9.82k | if (! isnan(VECTOR(numv)[0])) { |
1074 | 9.59k | if (! isfinite(VECTOR(numv)[0])) { |
1075 | 356 | IGRAPH_WARNINGF("Infinite value in numeric graph attribute '%s'. " |
1076 | 356 | "Produced GML file will not be conformant.", name); |
1077 | 356 | } |
1078 | 9.59k | CHECK(fprintf(outstream, " %s ", newname)); |
1079 | 9.59k | CHECK(igraph_real_fprintf_precise(outstream, VECTOR(numv)[0])); |
1080 | 9.59k | CHECK(fputc('\n', outstream)); |
1081 | 9.59k | } |
1082 | 9.82k | } else if (VECTOR(gtypes)[i] == IGRAPH_ATTRIBUTE_STRING) { |
1083 | 6.41k | const char *s; |
1084 | 6.41k | IGRAPH_CHECK(igraph_i_attribute_get_string_graph_attr(graph, name, &strv)); |
1085 | 6.41k | s = igraph_strvector_get(&strv, 0); |
1086 | 6.41k | if (needs_coding(s)) { |
1087 | 730 | char *d; |
1088 | 730 | IGRAPH_CHECK(entity_encode(s, &d, IGRAPH_WRITE_GML_ENCODE_ONLY_QUOT_SW & options)); |
1089 | 730 | IGRAPH_FINALLY(igraph_free, d); |
1090 | 730 | CHECK(fprintf(outstream, " %s \"%s\"\n", newname, d)); |
1091 | 730 | IGRAPH_FREE(d); |
1092 | 730 | IGRAPH_FINALLY_CLEAN(1); |
1093 | 5.68k | } else { |
1094 | 5.68k | CHECK(fprintf(outstream, " %s \"%s\"\n", newname, s)); |
1095 | 5.68k | } |
1096 | 6.41k | } else if (VECTOR(gtypes)[i] == IGRAPH_ATTRIBUTE_BOOLEAN) { |
1097 | 154 | IGRAPH_CHECK(igraph_i_attribute_get_bool_graph_attr(graph, name, &boolv)); |
1098 | 154 | CHECK(fprintf(outstream, " %s %d\n", newname, VECTOR(boolv)[0] ? 1 : 0)); |
1099 | 154 | IGRAPH_WARNING("A boolean graph attribute was converted to numeric."); |
1100 | 154 | } else { |
1101 | 0 | IGRAPH_WARNING("A non-numeric, non-string, non-boolean graph attribute ignored."); |
1102 | 0 | } |
1103 | 16.3k | } |
1104 | 16.5k | IGRAPH_FREE(newname); |
1105 | 16.5k | IGRAPH_FINALLY_CLEAN(1); |
1106 | 16.5k | } |
1107 | | |
1108 | | /* Macros used to work with the bit fiels in 'warning_shown', |
1109 | | * and avoid showing warnings more than once for each attribute. */ |
1110 | 31.2k | #define GETBIT(k, i) ((k) & (1 << i)) |
1111 | 11.3k | #define SETBIT(k, i) ((k) |= (1 << i)) |
1112 | 11.3k | #define WARN_ONCE(attrno, bit, warn) \ |
1113 | 31.2k | do { \ |
1114 | 31.2k | igraph_int_t *p = &VECTOR(warning_shown)[attrno]; \ |
1115 | 31.2k | if (! GETBIT(*p, bit)) { \ |
1116 | 5.10k | warn; \ |
1117 | 2.55k | SETBIT(*p, bit); \ |
1118 | 2.55k | } \ |
1119 | 31.2k | } while (0) |
1120 | | |
1121 | | |
1122 | | /* Now come the vertices */ |
1123 | 11.3k | IGRAPH_VECTOR_INT_INIT_FINALLY(&warning_shown, vattr_no); |
1124 | 6.45M | for (i = 0; i < no_of_nodes; i++) { |
1125 | 6.44M | igraph_int_t j; |
1126 | 6.44M | CHECK(fprintf(outstream, " node\n [\n")); |
1127 | | /* id */ |
1128 | 6.44M | CHECK(fprintf(outstream, " id %" IGRAPH_PRId "\n", myid ? (igraph_int_t)VECTOR(*myid)[i] : i)); |
1129 | | /* other attributes */ |
1130 | 28.1M | for (j = 0; j < vattr_no; j++) { |
1131 | 21.6M | igraph_attribute_type_t type = (igraph_attribute_type_t) VECTOR(vtypes)[j]; |
1132 | 21.6M | const char *name; |
1133 | 21.6M | char *newname; |
1134 | 21.6M | name = igraph_strvector_get(&vnames, j); |
1135 | 21.6M | if (!strcmp(name, "id")) { |
1136 | | /* No warning, the presence of this attribute is expected, and is handled specially. */ |
1137 | 4.40M | continue; |
1138 | 4.40M | } |
1139 | 17.2M | IGRAPH_CHECK(igraph_i_gml_convert_to_key(name, &newname)); |
1140 | 17.2M | IGRAPH_FINALLY(igraph_free, newname); |
1141 | 17.2M | if (!strcmp(newname, "id")) { |
1142 | | /* In case an attribute name would conflict with 'id' only after encoding. */ |
1143 | 596 | WARN_ONCE(j, 0, |
1144 | 596 | IGRAPH_WARNINGF("The vertex attribute '%s' was ignored while writing GML format.", name)); |
1145 | 17.2M | } else { |
1146 | 17.2M | if (type == IGRAPH_ATTRIBUTE_NUMERIC) { |
1147 | 8.86M | IGRAPH_CHECK(igraph_i_attribute_get_numeric_vertex_attr(graph, name, |
1148 | 8.86M | igraph_vss_1(i), &numv)); |
1149 | | /* Treat NaN as missing, skip writing it. GML does not officially support NaN. */ |
1150 | 8.86M | if (! isnan(VECTOR(numv)[0])) { |
1151 | 265k | if (! isfinite(VECTOR(numv)[0])) { |
1152 | 1.62k | WARN_ONCE(j, 3, |
1153 | 1.62k | IGRAPH_WARNINGF("Infinite value in numeric vertex attribute '%s'. " |
1154 | 1.62k | "Produced GML file will not be conformant.", name)); |
1155 | 1.62k | } |
1156 | 265k | CHECK(fprintf(outstream, " %s ", newname)); |
1157 | 265k | CHECK(igraph_real_fprintf_precise(outstream, VECTOR(numv)[0])); |
1158 | 265k | CHECK(fputc('\n', outstream)); |
1159 | 265k | } |
1160 | 8.86M | } else if (type == IGRAPH_ATTRIBUTE_STRING) { |
1161 | 8.42M | const char *s; |
1162 | 8.42M | IGRAPH_CHECK(igraph_i_attribute_get_string_vertex_attr(graph, name, |
1163 | 8.42M | igraph_vss_1(i), &strv)); |
1164 | 8.42M | s = igraph_strvector_get(&strv, 0); |
1165 | 8.42M | if (needs_coding(s)) { |
1166 | 7.37k | char *d; |
1167 | 7.37k | IGRAPH_CHECK(entity_encode(s, &d, IGRAPH_WRITE_GML_ENCODE_ONLY_QUOT_SW & options)); |
1168 | 7.37k | IGRAPH_FINALLY(igraph_free, d); |
1169 | 7.37k | CHECK(fprintf(outstream, " %s \"%s\"\n", newname, d)); |
1170 | 7.37k | IGRAPH_FREE(d); |
1171 | 7.37k | IGRAPH_FINALLY_CLEAN(1); |
1172 | 8.41M | } else { |
1173 | 8.41M | CHECK(fprintf(outstream, " %s \"%s\"\n", newname, s)); |
1174 | 8.41M | } |
1175 | 8.42M | } else if (type == IGRAPH_ATTRIBUTE_BOOLEAN) { |
1176 | 3.56k | IGRAPH_CHECK(igraph_i_attribute_get_bool_vertex_attr(graph, name, |
1177 | 3.56k | igraph_vss_1(i), &boolv)); |
1178 | 3.56k | CHECK(fprintf(outstream, " %s %d\n", newname, VECTOR(boolv)[0] ? 1 : 0)); |
1179 | 3.56k | WARN_ONCE(j, 1, |
1180 | 3.56k | IGRAPH_WARNINGF("The boolean vertex attribute '%s' was converted to numeric.", name)); |
1181 | 3.56k | } else { |
1182 | 0 | WARN_ONCE(j, 2, |
1183 | 0 | IGRAPH_WARNINGF("The non-numeric, non-string, non-boolean vertex attribute '%s' was ignored.", name)); |
1184 | 0 | } |
1185 | 17.2M | } |
1186 | 17.2M | IGRAPH_FREE(newname); |
1187 | 17.2M | IGRAPH_FINALLY_CLEAN(1); |
1188 | 17.2M | } |
1189 | 6.44M | CHECK(fprintf(outstream, " ]\n")); |
1190 | 6.44M | } |
1191 | | |
1192 | | /* The edges too */ |
1193 | 11.3k | IGRAPH_CHECK(igraph_vector_int_resize(&warning_shown, eattr_no)); |
1194 | 11.3k | igraph_vector_int_null(&warning_shown); |
1195 | 609k | for (i = 0; i < no_of_edges; i++) { |
1196 | 598k | igraph_int_t from = IGRAPH_FROM(graph, i); |
1197 | 598k | igraph_int_t to = IGRAPH_TO(graph, i); |
1198 | 598k | igraph_int_t j; |
1199 | 598k | CHECK(fprintf(outstream, " edge\n [\n")); |
1200 | | /* source and target */ |
1201 | 598k | CHECK(fprintf(outstream, " source %" IGRAPH_PRId "\n", |
1202 | 598k | myid ? (igraph_int_t)VECTOR(*myid)[from] : from)); |
1203 | 598k | CHECK(fprintf(outstream, " target %" IGRAPH_PRId "\n", |
1204 | 598k | myid ? (igraph_int_t)VECTOR(*myid)[to] : to)); |
1205 | | |
1206 | | /* other attributes */ |
1207 | 4.53M | for (j = 0; j < eattr_no; j++) { |
1208 | 3.93M | igraph_attribute_type_t type = (igraph_attribute_type_t) VECTOR(etypes)[j]; |
1209 | 3.93M | const char *name; |
1210 | 3.93M | char *newname; |
1211 | 3.93M | name = igraph_strvector_get(&enames, j); |
1212 | 3.93M | IGRAPH_CHECK(igraph_i_gml_convert_to_key(name, &newname)); |
1213 | 3.93M | IGRAPH_FINALLY(igraph_free, newname); |
1214 | 3.93M | if (!strcmp(newname, "source") || !strcmp(newname, "target")) { |
1215 | 3.56k | WARN_ONCE(j, 0, |
1216 | 3.56k | IGRAPH_WARNINGF("The edge attribute '%s' was ignored while writing GML format.", name)); |
1217 | 3.93M | } else { |
1218 | 3.93M | if (type == IGRAPH_ATTRIBUTE_NUMERIC) { |
1219 | 3.28M | IGRAPH_CHECK(igraph_i_attribute_get_numeric_edge_attr(graph, name, |
1220 | 3.28M | igraph_ess_1(i), &numv)); |
1221 | | /* Treat NaN as missing, skip writing it. GML does not officially support NaN. */ |
1222 | 3.28M | if (! isnan(VECTOR(numv)[0])) { |
1223 | 249k | if (! isfinite(VECTOR(numv)[0])) { |
1224 | 15.0k | WARN_ONCE(j, 3, |
1225 | 15.0k | IGRAPH_WARNINGF("Infinite value in numeric edge attribute '%s'. " |
1226 | 15.0k | "Produced GML file will not be conformant.", name)); |
1227 | 15.0k | } |
1228 | 249k | CHECK(fprintf(outstream, " %s ", newname)); |
1229 | 249k | CHECK(igraph_real_fprintf_precise(outstream, VECTOR(numv)[0])); |
1230 | 249k | CHECK(fputc('\n', outstream)); |
1231 | 249k | } |
1232 | 3.28M | } else if (type == IGRAPH_ATTRIBUTE_STRING) { |
1233 | 639k | const char *s; |
1234 | 639k | IGRAPH_CHECK(igraph_i_attribute_get_string_edge_attr(graph, name, |
1235 | 639k | igraph_ess_1(i), &strv)); |
1236 | 639k | s = igraph_strvector_get(&strv, 0); |
1237 | 639k | if (needs_coding(s)) { |
1238 | 3.61k | char *d; |
1239 | 3.61k | IGRAPH_CHECK(entity_encode(s, &d, IGRAPH_WRITE_GML_ENCODE_ONLY_QUOT_SW & options)); |
1240 | 3.61k | IGRAPH_FINALLY(igraph_free, d); |
1241 | 3.61k | CHECK(fprintf(outstream, " %s \"%s\"\n", newname, d)); |
1242 | 3.61k | IGRAPH_FREE(d); |
1243 | 3.61k | IGRAPH_FINALLY_CLEAN(1); |
1244 | 636k | } else { |
1245 | 636k | CHECK(fprintf(outstream, " %s \"%s\"\n", newname, s)); |
1246 | 636k | } |
1247 | 639k | } else if (type == IGRAPH_ATTRIBUTE_BOOLEAN) { |
1248 | 6.93k | IGRAPH_CHECK(igraph_i_attribute_get_bool_edge_attr(graph, name, |
1249 | 6.93k | igraph_ess_1(i), &boolv)); |
1250 | 6.93k | CHECK(fprintf(outstream, " %s %d\n", newname, VECTOR(boolv)[0] ? 1 : 0)); |
1251 | 6.93k | WARN_ONCE(j, 1, |
1252 | 6.93k | IGRAPH_WARNINGF("The boolean edge attribute '%s' was converted to numeric.", name)); |
1253 | 6.93k | } else { |
1254 | 0 | WARN_ONCE(j, 2, |
1255 | 0 | IGRAPH_WARNINGF("The non-numeric, non-string, non-boolean edge attribute '%s' was ignored.", name)); |
1256 | 0 | } |
1257 | 3.93M | } |
1258 | 3.93M | IGRAPH_FREE(newname); |
1259 | 3.93M | IGRAPH_FINALLY_CLEAN(1); |
1260 | 3.93M | } |
1261 | 598k | CHECK(fprintf(outstream, " ]\n")); |
1262 | 598k | } |
1263 | | |
1264 | 11.3k | CHECK(fprintf(outstream, "]\n")); |
1265 | | |
1266 | 11.3k | #undef GETBIT |
1267 | 11.3k | #undef SETBIT |
1268 | 11.3k | #undef WARN_ONCE |
1269 | | |
1270 | 11.3k | if (&v_myid == myid) { |
1271 | 2.21k | igraph_vector_destroy(&v_myid); |
1272 | 2.21k | IGRAPH_FINALLY_CLEAN(1); |
1273 | 2.21k | } |
1274 | | |
1275 | 11.3k | igraph_vector_int_destroy(&warning_shown); |
1276 | 11.3k | igraph_vector_bool_destroy(&boolv); |
1277 | 11.3k | igraph_strvector_destroy(&strv); |
1278 | 11.3k | igraph_vector_destroy(&numv); |
1279 | 11.3k | igraph_vector_int_destroy(&etypes); |
1280 | 11.3k | igraph_vector_int_destroy(&vtypes); |
1281 | 11.3k | igraph_vector_int_destroy(>ypes); |
1282 | 11.3k | igraph_strvector_destroy(&enames); |
1283 | 11.3k | igraph_strvector_destroy(&vnames); |
1284 | 11.3k | igraph_strvector_destroy(&gnames); |
1285 | 11.3k | IGRAPH_FINALLY_CLEAN(10); |
1286 | | |
1287 | 11.3k | return IGRAPH_SUCCESS; |
1288 | 11.3k | } |
1289 | | |
1290 | | #undef CHECK |