/src/igraph/src/io/pajek.c
Line | Count | Source |
1 | | /* |
2 | | igraph library. |
3 | | Copyright (C) 2005-2020 The igraph development team |
4 | | |
5 | | This program is free software; you can redistribute it and/or modify |
6 | | it under the terms of the GNU General Public License as published by |
7 | | the Free Software Foundation; either version 2 of the License, or |
8 | | (at your option) any later version. |
9 | | |
10 | | This program is distributed in the hope that it will be useful, |
11 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | GNU General Public License for more details. |
14 | | |
15 | | You should have received a copy of the GNU General Public License |
16 | | along with this program; if not, write to the Free Software |
17 | | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
18 | | 02110-1301 USA |
19 | | |
20 | | */ |
21 | | |
22 | | #include "igraph_foreign.h" |
23 | | |
24 | | #include "igraph_attributes.h" |
25 | | #include "igraph_error.h" |
26 | | #include "igraph_interface.h" |
27 | | #include "igraph_memory.h" |
28 | | |
29 | | #include "graph/attributes.h" |
30 | | |
31 | | #include "internal/hacks.h" /* IGRAPH_STATIC_ASSERT */ |
32 | | |
33 | | #include "io/pajek-header.h" |
34 | | #include "io/parsers/pajek-parser.h" |
35 | | |
36 | | #include <ctype.h> |
37 | | #include <string.h> |
38 | | |
39 | | int igraph_pajek_yylex_init_extra(igraph_i_pajek_parsedata_t *user_defined, |
40 | | void *scanner); |
41 | | int igraph_pajek_yylex_destroy(void *scanner); |
42 | | int igraph_pajek_yyparse(igraph_i_pajek_parsedata_t *context); |
43 | | void igraph_pajek_yyset_in(FILE *in_str, void *yyscanner); |
44 | | |
45 | | /* for IGRAPH_FINALLY, which assumes that destructor functions return void */ |
46 | 0 | void igraph_pajek_yylex_destroy_wrapper (void *scanner ) { |
47 | 0 | (void) igraph_pajek_yylex_destroy(scanner); |
48 | 0 | } |
49 | | |
50 | | /** |
51 | | * \function igraph_read_graph_pajek |
52 | | * \brief Reads a file in Pajek format. |
53 | | * |
54 | | * Only a subset of the Pajek format is implemented. This is partially |
55 | | * because there is no formal specification for this format, but also because |
56 | | * <command>igraph</command> does not support some Pajek features, like |
57 | | * mixed graphs. |
58 | | * |
59 | | * </para><para> |
60 | | * Starting from version 0.6.1 igraph reads bipartite (two-mode) |
61 | | * graphs from Pajek files and adds the \c type Boolean vertex attribute for |
62 | | * them. Warnings are given for invalid edges, i.e. edges connecting |
63 | | * vertices of the same type. |
64 | | * |
65 | | * </para><para> |
66 | | * The list of the current limitations: |
67 | | * \olist |
68 | | * \oli Only <filename>.net</filename> files are supported, Pajek |
69 | | * project files (<filename>.paj</filename>) are not. |
70 | | * \oli Temporal networks (i.e. with time events) are not supported. |
71 | | * \oli Graphs with both directed and non-directed edges are not |
72 | | * supported, as they cannot be represented in <command>igraph</command>. |
73 | | * \oli Only Pajek networks are supported; permutations, hierarchies, |
74 | | * clusters and vectors are not. |
75 | | * \oli Multi-relational networks (i.e. networks with multiple edge |
76 | | * types) are not supported. |
77 | | * \oli Unicode characters encoded as <code>&#dddd;</code>, or newlines |
78 | | * encoded as <code>\n</code> will not be decoded. |
79 | | * \endolist |
80 | | * |
81 | | * </para><para> |
82 | | * If an attribute handler is installed, |
83 | | * <command>igraph</command> also reads the vertex and edge attributes |
84 | | * from the file. Most attributes are renamed to be more informative: |
85 | | * \c color instead of \c c, \c xfact instead of \c x_fact, |
86 | | * \c yfact instead of y_fact, \c labeldist instead of \c lr, |
87 | | * \c labeldegree2 instead of \c lphi, \c framewidth instead of \c bw, |
88 | | * \c fontsize instead of \c fos, \c rotation instead of \c phi, |
89 | | * \c radius instead of \c r, \c diamondratio instead of \c q, |
90 | | * \c labeldegree instead of \c la, |
91 | | * \c color instead of \c ic, \c framecolor instead of \c bc, |
92 | | * \c labelcolor instead of \c lc; these belong to vertices. |
93 | | * |
94 | | * </para><para> |
95 | | * Edge attributes are also renamed, \c s to \c arrowsize, |
96 | | * \c w to \c edgewidth, \c h1 to \c hook1, \c h2 to \c hook2, |
97 | | * \c a1 to \c angle1, \c a2 to \c angle2, \c k1 to |
98 | | * \c velocity1, \c k2 to \c velocity2, \c ap to \c arrowpos, |
99 | | * \c lp to \c labelpos, \c lr to \c labelangle, |
100 | | * \c lphi to \c labelangle2, \c la to \c labeldegree, |
101 | | * \c fos to \c fontsize, \c a to \c arrowtype, \c p to \c linepattern, |
102 | | * \c l to \c label, \c lc to \c labelcolor, \c c to \c color. |
103 | | * |
104 | | * </para><para> |
105 | | * Unknown vertex or edge parameters are read as string vertex |
106 | | * or edge attributes. If the parameter name conflicts with one |
107 | | * the standard attribute names mentioned above, a <code>_</code> |
108 | | * character is appended to it to avoid conflict. |
109 | | * |
110 | | * </para><para> |
111 | | * In addition the following vertex attributes might be added: \c name is added |
112 | | * (with the same value) if there are vertex IDs in the file. |
113 | | * \c x and \c y, and potentially \c z are also added if there are vertex |
114 | | * coordinates in the file. |
115 | | * |
116 | | * </para><para> |
117 | | * The \c weight edge attribute will be added if there are edge weights present. |
118 | | * |
119 | | * </para><para> |
120 | | * See the Pajek homepage: |
121 | | * http://vlado.fmf.uni-lj.si/pub/networks/pajek/ for more info on |
122 | | * Pajek. The Pajek manual, |
123 | | * http://vlado.fmf.uni-lj.si/pub/networks/pajek/doc/pajekman.pdf, |
124 | | * and http://mrvar.fdv.uni-lj.si/pajek/DrawEPS.htm |
125 | | * have information on the Pajek file format. There is additional |
126 | | * useful information and sample files at |
127 | | * http://mrvar.fdv.uni-lj.si/pajek/history.htm |
128 | | * |
129 | | * \param graph Pointer to an uninitialized graph object. |
130 | | * \param instream An already opened file handler. |
131 | | * \return Error code. |
132 | | * |
133 | | * Time complexity: O(|V|+|E|+|A|), |V| is the number of vertices, |E| |
134 | | * the number of edges, |A| the number of attributes (vertex + edge) |
135 | | * in the graph if there are attribute handlers installed. |
136 | | * |
137 | | * \sa \ref igraph_write_graph_pajek() for writing Pajek files, \ref |
138 | | * igraph_read_graph_graphml() for reading GraphML files. |
139 | | * |
140 | | * \example examples/simple/foreign.c |
141 | | */ |
142 | | |
143 | 0 | igraph_error_t igraph_read_graph_pajek(igraph_t *graph, FILE *instream) { |
144 | |
|
145 | 0 | igraph_vector_int_t edges; |
146 | 0 | igraph_trie_t vattrnames; |
147 | 0 | igraph_attribute_record_list_t vattrs; |
148 | 0 | igraph_trie_t eattrnames; |
149 | 0 | igraph_attribute_record_list_t eattrs; |
150 | 0 | igraph_int_t i; |
151 | 0 | igraph_i_pajek_parsedata_t context; |
152 | 0 | igraph_bitset_t seen; /* used to mark already seen vertex IDs */ |
153 | |
|
154 | 0 | IGRAPH_VECTOR_INT_INIT_FINALLY(&edges, 0); |
155 | | |
156 | 0 | IGRAPH_TRIE_INIT_FINALLY(&vattrnames, 1); |
157 | 0 | IGRAPH_CHECK(igraph_attribute_record_list_init(&vattrs, 0)); |
158 | 0 | IGRAPH_FINALLY(igraph_attribute_record_list_destroy, &vattrs); |
159 | |
|
160 | 0 | IGRAPH_TRIE_INIT_FINALLY(&eattrnames, 1); |
161 | 0 | IGRAPH_CHECK(igraph_attribute_record_list_init(&eattrs, 0)); |
162 | 0 | IGRAPH_FINALLY(igraph_attribute_record_list_destroy, &eattrs); |
163 | |
|
164 | 0 | IGRAPH_BITSET_INIT_FINALLY(&seen, 0); |
165 | | |
166 | 0 | context.directed = false; /* assume undirected until an element implying directedness is encountered */ |
167 | 0 | context.vector = &edges; |
168 | 0 | context.seen = &seen; |
169 | 0 | context.vcount = -1; |
170 | 0 | context.vertexid = 0; |
171 | 0 | context.vertex_attribute_names = &vattrnames; |
172 | 0 | context.vertex_attributes = &vattrs; |
173 | 0 | context.edge_attribute_names = &eattrnames; |
174 | 0 | context.edge_attributes = &eattrs; |
175 | 0 | context.actedge = 0; |
176 | 0 | context.eof = false; |
177 | 0 | context.errmsg[0] = '\0'; |
178 | 0 | context.igraph_errno = IGRAPH_SUCCESS; |
179 | |
|
180 | 0 | igraph_pajek_yylex_init_extra(&context, &context.scanner); |
181 | 0 | IGRAPH_FINALLY(igraph_pajek_yylex_destroy_wrapper, context.scanner); |
182 | |
|
183 | 0 | igraph_pajek_yyset_in(instream, context.scanner); |
184 | | |
185 | | /* Use ENTER/EXIT to avoid destroying context.scanner before this function returns */ |
186 | 0 | IGRAPH_FINALLY_ENTER(); |
187 | 0 | int err = igraph_pajek_yyparse(&context); |
188 | 0 | IGRAPH_FINALLY_EXIT(); |
189 | 0 | switch (err) { |
190 | 0 | case 0: /* success */ |
191 | 0 | break; |
192 | 0 | case 1: /* parse error */ |
193 | 0 | if (context.errmsg[0] != 0) { |
194 | 0 | IGRAPH_ERROR(context.errmsg, IGRAPH_PARSEERROR); |
195 | 0 | } else if (context.igraph_errno != IGRAPH_SUCCESS) { |
196 | 0 | IGRAPH_ERROR("", context.igraph_errno); |
197 | 0 | } else { |
198 | 0 | IGRAPH_ERROR("Cannot read Pajek file.", IGRAPH_PARSEERROR); |
199 | 0 | } |
200 | 0 | break; |
201 | 0 | case 2: /* out of memory */ |
202 | 0 | IGRAPH_ERROR("Cannot read Pajek file.", IGRAPH_ENOMEM); /* LCOV_EXCL_LINE */ |
203 | 0 | break; |
204 | 0 | default: /* must never reach here */ |
205 | | /* Hint: This will usually be triggered if an IGRAPH_CHECK() is used in a Bison |
206 | | * action instead of an IGRAPH_YY_CHECK(), resulting in an igraph errno being |
207 | | * returned in place of a Bison error code. |
208 | | * TODO: What if future Bison versions introduce error codes other than 0, 1 and 2? |
209 | | */ |
210 | 0 | IGRAPH_FATALF("Parser returned unexpected error code (%d) when reading Pajek file.", err); |
211 | 0 | } |
212 | | |
213 | 0 | igraph_pajek_yylex_destroy(context.scanner); |
214 | 0 | igraph_bitset_destroy(&seen); |
215 | 0 | IGRAPH_FINALLY_CLEAN(2); |
216 | | |
217 | | /* Prepare attributes */ |
218 | 0 | const igraph_int_t eattr_count = igraph_attribute_record_list_size(&eattrs); |
219 | 0 | for (i = 0; i < eattr_count; i++) { |
220 | 0 | igraph_attribute_record_t *rec = igraph_attribute_record_list_get_ptr(&eattrs, i); |
221 | 0 | IGRAPH_CHECK(igraph_attribute_record_resize(rec, context.actedge)); |
222 | 0 | } |
223 | | |
224 | | /* Create graph */ |
225 | 0 | IGRAPH_CHECK(igraph_empty(graph, 0, context.directed)); |
226 | 0 | IGRAPH_FINALLY(igraph_destroy, graph); |
227 | 0 | IGRAPH_CHECK(igraph_add_vertices(graph, context.vcount, &vattrs)); |
228 | 0 | IGRAPH_CHECK(igraph_add_edges(graph, &edges, &eattrs)); |
229 | | |
230 | 0 | igraph_vector_int_destroy(&edges); |
231 | 0 | igraph_attribute_record_list_destroy(&eattrs); |
232 | 0 | igraph_trie_destroy(&eattrnames); |
233 | 0 | igraph_attribute_record_list_destroy(&vattrs); |
234 | 0 | igraph_trie_destroy(&vattrnames); |
235 | 0 | IGRAPH_FINALLY_CLEAN(6); /* +1 for 'graph' */ |
236 | |
|
237 | 0 | return IGRAPH_SUCCESS; |
238 | 0 | } |
239 | | |
240 | | /***** Writing Pajek files *****/ |
241 | | |
242 | | /* Order matters here! */ |
243 | 1.09M | #define V_ID 0 |
244 | 547k | #define V_X 1 |
245 | 848 | #define V_Y 2 |
246 | 634 | #define V_Z 3 |
247 | 547k | #define V_SHAPE 4 |
248 | | #define V_XFACT 5 |
249 | | #define V_YFACT 6 |
250 | | #define V_LABELDIST 7 |
251 | | #define V_LABELDEGREE2 8 |
252 | | #define V_FRAMEWIDTH 9 |
253 | | #define V_FONTSIZE 10 |
254 | | #define V_ROTATION 11 |
255 | | #define V_RADIUS 12 |
256 | | #define V_DIAMONDRATIO 13 |
257 | | #define V_LABELDEGREE 14 |
258 | | #define V_FONT 15 |
259 | | #define V_URL 16 |
260 | | #define V_COLOR 17 |
261 | | #define V_FRAMECOLOR 18 |
262 | | #define V_LABELCOLOR 19 |
263 | 120k | #define V_LAST 20 |
264 | | |
265 | 129k | #define E_WEIGHT 0 |
266 | 10.9k | #define E_LAST 1 |
267 | | |
268 | | /* Pajek encodes newlines as \n, and any unicode character can be encoded |
269 | | * in the form &#hhhh;. Therefore we encode quotation marks as " */ |
270 | 3.05k | static igraph_error_t pajek_escape(const char* src, char** dest) { |
271 | 3.05k | igraph_int_t destlen = 0; |
272 | 3.05k | igraph_bool_t need_escape = false; |
273 | | |
274 | | /* Determine whether the string contains characters to be escaped */ |
275 | 3.05k | const char *s; |
276 | 3.05k | char *d; |
277 | 8.88M | for (s = src; *s; s++, destlen++) { |
278 | 8.88M | if (*s == '\n' || *s == '\r') { |
279 | 4.03M | need_escape = true; |
280 | 4.03M | destlen++; |
281 | 4.85M | } else if (*s == '"') { |
282 | 236 | need_escape = true; |
283 | 236 | destlen += 4; |
284 | 4.85M | } else if (!isalnum(*s)) { |
285 | 423k | need_escape = true; |
286 | 423k | } |
287 | 8.88M | } |
288 | | |
289 | 3.05k | if (!need_escape) { |
290 | | /* At this point, we know that the string does not contain any chars |
291 | | * that would warrant encoding. Therefore, we simply quote it and |
292 | | * return the quoted string. This is necessary because Pajek uses some |
293 | | * reserved words in its format (like 'c' standing for color) and they |
294 | | * have to be quoted as well. |
295 | | */ |
296 | 2.67k | *dest = IGRAPH_CALLOC(destlen + 3, char); |
297 | 2.67k | CHECK_OOM_WP(*dest); |
298 | | |
299 | 2.67k | d = *dest; |
300 | 2.67k | strcpy(d + 1, src); |
301 | 2.67k | d[0] = d[destlen + 1] = '"'; |
302 | 2.67k | d[destlen + 2] = 0; |
303 | 2.67k | return IGRAPH_SUCCESS; |
304 | 2.67k | } |
305 | | |
306 | 378 | *dest = IGRAPH_CALLOC(destlen + 3, char); |
307 | 378 | CHECK_OOM_WP(*dest); |
308 | | |
309 | 378 | d = *dest; |
310 | 378 | *d = '"'; d++; |
311 | | |
312 | 8.22M | for (s = src; *s; s++, d++) { |
313 | 8.22M | switch (*s) { |
314 | | /* Encode quotation marks as ", as they would otherwise signify |
315 | | the end/beginning of a string. */ |
316 | 236 | case '"': |
317 | 236 | strcpy(d, """); d += 4; break; |
318 | 0 | break; |
319 | | /* Encode both CR and LF as \n, as neither should apear in a quoted string. |
320 | | \n is the _only_ escape sequence Pajek understands. */ |
321 | 4.02M | case '\n': |
322 | 4.03M | case '\r': |
323 | 4.03M | *d = '\\'; d++; |
324 | 4.03M | *d = 'n'; |
325 | 4.03M | break; |
326 | 4.19M | default: |
327 | 4.19M | *d = *s; |
328 | 8.22M | } |
329 | 8.22M | } |
330 | 378 | *d = '"'; d++; *d = 0; |
331 | | |
332 | 378 | return IGRAPH_SUCCESS; |
333 | 378 | } |
334 | | |
335 | | /** |
336 | | * \function igraph_write_graph_pajek |
337 | | * \brief Writes a graph to a file in Pajek format. |
338 | | * |
339 | | * Writes files in the native format of the Pajek software. This format |
340 | | * is not recommended for data exchange or archival. It is meant solely |
341 | | * for interoperability with Pajek. |
342 | | * |
343 | | * </para><para> |
344 | | * The Pajek vertex and edge parameters (like color) are determined by |
345 | | * the attributes of the vertices and edges. Of course this requires |
346 | | * an attribute handler to be installed. The names of the |
347 | | * corresponding vertex and edge attributes are listed at \ref |
348 | | * igraph_read_graph_pajek(), e.g. the \c color vertex attributes |
349 | | * determines the color (\c c in Pajek) parameter. |
350 | | * |
351 | | * </para><para> |
352 | | * Vertex and edge attributes that do not correspond to any documented |
353 | | * Pajek parameter are discarded. |
354 | | * |
355 | | * </para><para> |
356 | | * As of version 0.6.1 igraph writes bipartite graphs into Pajek files |
357 | | * correctly, i.e. they will be also bipartite when read into Pajek. |
358 | | * As Pajek is less flexible for bipartite graphs (the numeric IDs of |
359 | | * the vertices must be sorted according to vertex type), igraph might |
360 | | * need to reorder the vertices when writing a bipartite Pajek file. |
361 | | * This effectively means that numeric vertex IDs usually change when |
362 | | * a bipartite graph is written to a Pajek file, and then read back |
363 | | * into igraph. |
364 | | * |
365 | | * </para><para> |
366 | | * Early versions of Pajek supported only Windows-style line endings |
367 | | * in Pajek files, but recent versions support both Windows and Unix |
368 | | * line endings. igraph therefore uses the platform-native line endings |
369 | | * when the input file is opened in text mode, and uses Unix-style |
370 | | * line endings when the input file is opened in binary mode. If you |
371 | | * are using an old version of Pajek, you are on Unix and you are having |
372 | | * problems reading files written by igraph on a Windows machine, convert the |
373 | | * line endings manually with a text editor or with \c unix2dos or \c iconv |
374 | | * from the command line). |
375 | | * |
376 | | * </para><para> |
377 | | * Pajek will only interpret UTF-8 encoded files if they contain a byte-order |
378 | | * mark (BOM) at the beginning. igraph is agnostic of string attribute encodings |
379 | | * and therefore it will never write a BOM. You need to add this manually |
380 | | * if/when necessary. |
381 | | * |
382 | | * \param graph The graph object to write. |
383 | | * \param outstream The file to write to. It should be opened and writable. |
384 | | * \return Error code. |
385 | | * |
386 | | * Time complexity: O(|V|+|E|+|A|), |V| is the number of vertices, |E| |
387 | | * is the number of edges, |A| the number of attributes (vertex + |
388 | | * edge) in the graph if there are attribute handlers installed. |
389 | | * |
390 | | * \sa \ref igraph_read_graph_pajek() for reading Pajek graphs, \ref |
391 | | * igraph_write_graph_graphml() for writing a graph in GraphML format, |
392 | | * this suites <command>igraph</command> graphs better. |
393 | | * |
394 | | * \example examples/simple/igraph_write_graph_pajek.c |
395 | | */ |
396 | | |
397 | 5.46k | igraph_error_t igraph_write_graph_pajek(const igraph_t *graph, FILE *outstream) { |
398 | 5.46k | igraph_int_t no_of_nodes = igraph_vcount(graph); |
399 | | |
400 | 5.46k | igraph_attribute_type_t vtypes[V_LAST], etypes[E_LAST]; |
401 | 5.46k | igraph_bool_t write_vertex_attrs = false; |
402 | | |
403 | | /* Same order as the #define's */ |
404 | 5.46k | const char *vnames[] = { "name", "x", "y", "z", "shape", "xfact", "yfact", |
405 | 5.46k | "labeldist", "labeldegree2", "framewidth", |
406 | 5.46k | "fontsize", "rotation", "radius", |
407 | 5.46k | "diamondratio", "labeldegree", |
408 | 5.46k | "font", "url", "color", "framecolor", |
409 | 5.46k | "labelcolor" |
410 | 5.46k | }; |
411 | 5.46k | IGRAPH_STATIC_ASSERT(sizeof(vnames) / sizeof(vnames[0]) == V_LAST); |
412 | | |
413 | | /* Arrays called xxx[] are igraph attribute names, |
414 | | * xxx2[] are the corresponding Pajek names. */ |
415 | 5.46k | const char *vnumnames[] = { "xfact", "yfact", "labeldist", |
416 | 5.46k | "labeldegree2", "framewidth", "fontsize", |
417 | 5.46k | "rotation", "radius", "diamondratio", |
418 | 5.46k | "labeldegree" |
419 | 5.46k | }; |
420 | 5.46k | const char *vnumnames2[] = { "x_fact", "y_fact", "lr", "lphi", "bw", |
421 | 5.46k | "fos", "phi", "r", "q", "la" |
422 | 5.46k | }; |
423 | 5.46k | IGRAPH_STATIC_ASSERT(sizeof(vnumnames) == sizeof(vnumnames2)); |
424 | | |
425 | 5.46k | const char *vstrnames[] = { "font", "url", "color", "framecolor", |
426 | 5.46k | "labelcolor" |
427 | 5.46k | }; |
428 | 5.46k | const char *vstrnames2[] = { "font", "url", "ic", "bc", "lc" }; |
429 | 5.46k | IGRAPH_STATIC_ASSERT(sizeof(vstrnames) == sizeof(vstrnames2)); |
430 | | |
431 | | /* Same order as the #define's */ |
432 | 5.46k | const char *enames[] = { "weight" }; |
433 | 5.46k | IGRAPH_STATIC_ASSERT(sizeof(enames) / sizeof(enames[0]) == E_LAST); |
434 | | |
435 | 5.46k | const char *enumnames[] = { "arrowsize", "edgewidth", "hook1", "hook2", |
436 | 5.46k | "angle1", "angle2", "velocity1", "velocity2", |
437 | 5.46k | "arrowpos", "labelpos", "labelangle", |
438 | 5.46k | "labelangle2", "labeldegree", "fontsize" |
439 | 5.46k | }; |
440 | 5.46k | const char *enumnames2[] = { "s", "w", "h1", "h2", "a1", "a2", "k1", "k2", |
441 | 5.46k | "ap", "lp", "lr", "lphi", "la", "fos" |
442 | 5.46k | }; |
443 | 5.46k | IGRAPH_STATIC_ASSERT(sizeof(enumnames) == sizeof(enumnames2)); |
444 | | |
445 | 5.46k | const char *estrnames[] = { "arrowtype", "linepattern", "label", |
446 | 5.46k | "labelcolor", "color", "font" |
447 | 5.46k | }; |
448 | 5.46k | const char *estrnames2[] = { "a", "p", "l", "lc", "c", "font" }; |
449 | 5.46k | IGRAPH_STATIC_ASSERT(sizeof(estrnames) == sizeof(estrnames2)); |
450 | | |
451 | | /* Newer Pajek versions support both Unix and Windows-style line endings, |
452 | | * so we just use Unix style. This will get converted to CRLF on Windows |
453 | | * when the file is opened in text mode */ |
454 | 5.46k | const char *newline = "\n"; |
455 | | |
456 | 5.46k | igraph_es_t es; |
457 | 5.46k | igraph_eit_t eit; |
458 | | |
459 | 5.46k | igraph_vector_t numv; |
460 | 5.46k | igraph_strvector_t strv; |
461 | | |
462 | 5.46k | igraph_vector_int_t ex_numa; |
463 | 5.46k | igraph_vector_int_t ex_stra; |
464 | 5.46k | igraph_vector_int_t vx_numa; |
465 | 5.46k | igraph_vector_int_t vx_stra; |
466 | | |
467 | 5.46k | const char *s; |
468 | 5.46k | char *escaped; |
469 | | |
470 | 5.46k | igraph_bool_t bipartite = false; |
471 | 5.46k | igraph_vector_int_t bip_index, bip_index2; |
472 | 5.46k | igraph_vector_bool_t bvec; |
473 | 5.46k | igraph_int_t notop = 0, nobottom = 0; |
474 | | |
475 | 5.46k | IGRAPH_VECTOR_INIT_FINALLY(&numv, 1); |
476 | 5.46k | IGRAPH_STRVECTOR_INIT_FINALLY(&strv, 1); |
477 | | |
478 | 5.46k | IGRAPH_VECTOR_INT_INIT_FINALLY(&ex_numa, 0); |
479 | 5.46k | IGRAPH_VECTOR_INT_INIT_FINALLY(&ex_stra, 0); |
480 | 5.46k | IGRAPH_VECTOR_INT_INIT_FINALLY(&vx_numa, 0); |
481 | 5.46k | IGRAPH_VECTOR_INT_INIT_FINALLY(&vx_stra, 0); |
482 | | |
483 | | /* Check if graph is bipartite, i.e. whether it has a Boolean 'type' vertex attribute. */ |
484 | 5.46k | if (igraph_i_attribute_has_attr(graph, IGRAPH_ATTRIBUTE_VERTEX, "type")) { |
485 | 2 | igraph_attribute_type_t type_type; |
486 | 2 | IGRAPH_CHECK(igraph_i_attribute_get_type(graph, &type_type, IGRAPH_ATTRIBUTE_VERTEX, "type")); |
487 | 2 | if (type_type == IGRAPH_ATTRIBUTE_BOOLEAN) { |
488 | 0 | bipartite = true; write_vertex_attrs = true; |
489 | | /* Count top and bottom vertices, we go over them twice, |
490 | | because we want to keep their original order */ |
491 | 0 | IGRAPH_VECTOR_INT_INIT_FINALLY(&bip_index, no_of_nodes); |
492 | 0 | IGRAPH_VECTOR_INT_INIT_FINALLY(&bip_index2, no_of_nodes); |
493 | 0 | IGRAPH_VECTOR_BOOL_INIT_FINALLY(&bvec, 1); |
494 | 0 | for (igraph_int_t i = 0; i < no_of_nodes; i++) { |
495 | 0 | IGRAPH_CHECK(igraph_i_attribute_get_bool_vertex_attr(graph, |
496 | 0 | "type", igraph_vss_1(i), &bvec)); |
497 | 0 | if (VECTOR(bvec)[0]) { |
498 | 0 | notop++; |
499 | 0 | } else { |
500 | 0 | nobottom++; |
501 | 0 | } |
502 | 0 | } |
503 | 0 | for (igraph_int_t i = 0, bptr = 0, tptr = nobottom; i < no_of_nodes; i++) { |
504 | 0 | IGRAPH_CHECK(igraph_i_attribute_get_bool_vertex_attr(graph, |
505 | 0 | "type", igraph_vss_1(i), &bvec)); |
506 | 0 | if (VECTOR(bvec)[0]) { |
507 | 0 | VECTOR(bip_index)[tptr] = i; |
508 | 0 | VECTOR(bip_index2)[i] = tptr; |
509 | 0 | tptr++; |
510 | 0 | } else { |
511 | 0 | VECTOR(bip_index)[bptr] = i; |
512 | 0 | VECTOR(bip_index2)[i] = bptr; |
513 | 0 | bptr++; |
514 | 0 | } |
515 | 0 | } |
516 | 0 | igraph_vector_bool_destroy(&bvec); |
517 | 0 | IGRAPH_FINALLY_CLEAN(1); |
518 | 0 | } |
519 | 2 | } |
520 | | |
521 | | /* Write header */ |
522 | 5.46k | if (bipartite) { |
523 | 0 | if (fprintf(outstream, "*Vertices %" IGRAPH_PRId " %" IGRAPH_PRId "%s", no_of_nodes, nobottom, |
524 | 0 | newline) < 0) { |
525 | 0 | IGRAPH_ERROR("Cannot write pajek file.", IGRAPH_EFILE); |
526 | 0 | } |
527 | 5.46k | } else { |
528 | 5.46k | if (fprintf(outstream, "*Vertices %" IGRAPH_PRId "%s", no_of_nodes, newline) < 0) { |
529 | 0 | IGRAPH_ERROR("Cannot write pajek file.", IGRAPH_EFILE); |
530 | 0 | } |
531 | 5.46k | } |
532 | | |
533 | | /* Check the vertex attributes, and determine if we need to write them. */ |
534 | 5.46k | memset(vtypes, 0, sizeof(vtypes[0])*V_LAST); |
535 | 114k | for (igraph_int_t i = 0; i < V_LAST; i++) { |
536 | 109k | if (igraph_i_attribute_has_attr(graph, IGRAPH_ATTRIBUTE_VERTEX, vnames[i])) { |
537 | 584 | IGRAPH_CHECK(igraph_i_attribute_get_type( |
538 | 584 | graph, &vtypes[i], IGRAPH_ATTRIBUTE_VERTEX, vnames[i])); |
539 | 584 | write_vertex_attrs = true; |
540 | 108k | } else { |
541 | 108k | vtypes[i] = (igraph_attribute_type_t) -1; |
542 | 108k | } |
543 | 109k | } |
544 | 60.1k | for (igraph_int_t i = 0; i < (igraph_int_t) (sizeof(vnumnames) / sizeof(vnumnames[0])); i++) { |
545 | 54.6k | igraph_attribute_type_t type; |
546 | 54.6k | if (igraph_i_attribute_has_attr(graph, IGRAPH_ATTRIBUTE_VERTEX, vnumnames[i])) { |
547 | 88 | IGRAPH_CHECK(igraph_i_attribute_get_type( |
548 | 88 | graph, &type, IGRAPH_ATTRIBUTE_VERTEX, vnumnames[i])); |
549 | 88 | if (type == IGRAPH_ATTRIBUTE_NUMERIC) { |
550 | 80 | IGRAPH_CHECK(igraph_vector_int_push_back(&vx_numa, i)); |
551 | 80 | } |
552 | 88 | } |
553 | 54.6k | } |
554 | 32.7k | for (igraph_int_t i = 0; i < (igraph_int_t) (sizeof(vstrnames) / sizeof(vstrnames[0])); i++) { |
555 | 27.3k | igraph_attribute_type_t type; |
556 | 27.3k | if (igraph_i_attribute_has_attr(graph, IGRAPH_ATTRIBUTE_VERTEX, vstrnames[i])) { |
557 | 248 | IGRAPH_CHECK(igraph_i_attribute_get_type( |
558 | 248 | graph, &type, IGRAPH_ATTRIBUTE_VERTEX, vstrnames[i])); |
559 | 248 | if (type == IGRAPH_ATTRIBUTE_STRING) { |
560 | 235 | IGRAPH_CHECK(igraph_vector_int_push_back(&vx_stra, i)); |
561 | 235 | } |
562 | 248 | } |
563 | 27.3k | } |
564 | | |
565 | | /* Write vertices */ |
566 | 5.46k | if (write_vertex_attrs) { |
567 | 548k | for (igraph_int_t i = 0; i < no_of_nodes; i++) { |
568 | 547k | igraph_int_t id = bipartite ? VECTOR(bip_index)[i] : i; |
569 | | |
570 | | /* vertex ID */ |
571 | 547k | fprintf(outstream, "%" IGRAPH_PRId, i + 1); |
572 | 547k | if (vtypes[V_ID] == IGRAPH_ATTRIBUTE_NUMERIC) { |
573 | 194 | IGRAPH_CHECK(igraph_i_attribute_get_numeric_vertex_attr( |
574 | 194 | graph, vnames[V_ID], igraph_vss_1(id), &numv)); |
575 | 194 | fputs(" \"", outstream); |
576 | 194 | igraph_real_fprintf_precise(outstream, VECTOR(numv)[0]); |
577 | 194 | fputc('"', outstream); |
578 | 547k | } else if (vtypes[V_ID] == IGRAPH_ATTRIBUTE_STRING) { |
579 | 258 | IGRAPH_CHECK(igraph_i_attribute_get_string_vertex_attr( |
580 | 258 | graph, vnames[V_ID], igraph_vss_1(id), &strv)); |
581 | 258 | s = igraph_strvector_get(&strv, 0); |
582 | 258 | IGRAPH_CHECK(pajek_escape(s, &escaped)); |
583 | 258 | fprintf(outstream, " %s", escaped); |
584 | 258 | IGRAPH_FREE(escaped); |
585 | 547k | } else { |
586 | 547k | fprintf(outstream, " \"%" IGRAPH_PRId "\"", id + 1); |
587 | 547k | } |
588 | | |
589 | | /* coordinates */ |
590 | 547k | if (vtypes[V_X] == IGRAPH_ATTRIBUTE_NUMERIC && |
591 | 848 | vtypes[V_Y] == IGRAPH_ATTRIBUTE_NUMERIC) { |
592 | 634 | IGRAPH_CHECK(igraph_i_attribute_get_numeric_vertex_attr( |
593 | 634 | graph, vnames[V_X], igraph_vss_1(id), &numv)); |
594 | 634 | fputc(' ', outstream); |
595 | 634 | igraph_real_fprintf_precise(outstream, VECTOR(numv)[0]); |
596 | 634 | IGRAPH_CHECK(igraph_i_attribute_get_numeric_vertex_attr( |
597 | 634 | graph, vnames[V_Y], igraph_vss_1(id), &numv)); |
598 | 634 | fputc(' ', outstream); |
599 | 634 | igraph_real_fprintf_precise(outstream, VECTOR(numv)[0]); |
600 | 634 | if (vtypes[V_Z] == IGRAPH_ATTRIBUTE_NUMERIC) { |
601 | 354 | IGRAPH_CHECK(igraph_i_attribute_get_numeric_vertex_attr(graph, vnames[V_Z], |
602 | 354 | igraph_vss_1(id), &numv)); |
603 | 354 | fputc(' ', outstream); |
604 | 354 | igraph_real_fprintf_precise(outstream, VECTOR(numv)[0]); |
605 | 354 | } |
606 | 634 | } |
607 | | |
608 | | /* shape */ |
609 | 547k | if (vtypes[V_SHAPE] == IGRAPH_ATTRIBUTE_STRING) { |
610 | 195 | IGRAPH_CHECK(igraph_i_attribute_get_string_vertex_attr( |
611 | 195 | graph, vnames[V_SHAPE], igraph_vss_1(id), &strv)); |
612 | 195 | s = igraph_strvector_get(&strv, 0); |
613 | 195 | IGRAPH_CHECK(pajek_escape(s, &escaped)); |
614 | 195 | fprintf(outstream, " %s", escaped); |
615 | 195 | IGRAPH_FREE(escaped); |
616 | 195 | } |
617 | | |
618 | | /* numeric parameters */ |
619 | 1.19M | for (igraph_int_t j = 0; j < igraph_vector_int_size(&vx_numa); j++) { |
620 | 645k | igraph_int_t idx = VECTOR(vx_numa)[j]; |
621 | 645k | IGRAPH_CHECK(igraph_i_attribute_get_numeric_vertex_attr( |
622 | 645k | graph, vnumnames[idx], igraph_vss_1(id), &numv)); |
623 | 645k | fprintf(outstream, " %s ", vnumnames2[idx]); |
624 | 645k | igraph_real_fprintf_precise(outstream, VECTOR(numv)[0]); |
625 | 645k | } |
626 | | |
627 | | /* string parameters */ |
628 | 548k | for (igraph_int_t j = 0; j < igraph_vector_int_size(&vx_stra); j++) { |
629 | 1.20k | igraph_int_t idx = VECTOR(vx_stra)[j]; |
630 | 1.20k | IGRAPH_CHECK(igraph_i_attribute_get_string_vertex_attr( |
631 | 1.20k | graph, vstrnames[idx], igraph_vss_1(id), &strv)); |
632 | 1.20k | s = igraph_strvector_get(&strv, 0); |
633 | 1.20k | IGRAPH_CHECK(pajek_escape(s, &escaped)); |
634 | 1.20k | fprintf(outstream, " %s %s", vstrnames2[idx], escaped); |
635 | 1.20k | IGRAPH_FREE(escaped); |
636 | 1.20k | } |
637 | | |
638 | | /* trailing newline */ |
639 | 547k | fprintf(outstream, "%s", newline); |
640 | 547k | } |
641 | 430 | } |
642 | | |
643 | | /* edges header */ |
644 | 5.46k | if (igraph_is_directed(graph)) { |
645 | 463 | fprintf(outstream, "*Arcs%s", newline); |
646 | 5.00k | } else { |
647 | 5.00k | fprintf(outstream, "*Edges%s", newline); |
648 | 5.00k | } |
649 | | |
650 | 5.46k | IGRAPH_CHECK(igraph_es_all(&es, IGRAPH_EDGEORDER_ID)); |
651 | 5.46k | IGRAPH_FINALLY(igraph_es_destroy, &es); |
652 | 5.46k | IGRAPH_CHECK(igraph_eit_create(graph, es, &eit)); |
653 | 5.46k | IGRAPH_FINALLY(igraph_eit_destroy, &eit); |
654 | | |
655 | | /* Check edge attributes */ |
656 | | /* TODO: refactor and simplify since only "weight" is relevant */ |
657 | 10.9k | for (igraph_int_t i = 0; i < E_LAST; i++) { |
658 | 5.46k | if (igraph_i_attribute_has_attr(graph, IGRAPH_ATTRIBUTE_EDGE, enames[i])) { |
659 | 395 | IGRAPH_CHECK(igraph_i_attribute_get_type( |
660 | 395 | graph, &etypes[i], IGRAPH_ATTRIBUTE_EDGE, enames[i])); |
661 | 5.07k | } else { |
662 | 5.07k | etypes[i] = (igraph_attribute_type_t) -1; |
663 | 5.07k | } |
664 | 5.46k | } |
665 | 81.9k | for (igraph_int_t i = 0; i < (igraph_int_t) (sizeof(enumnames) / sizeof(enumnames[0])); i++) { |
666 | 76.5k | igraph_attribute_type_t type; |
667 | 76.5k | if (igraph_i_attribute_has_attr(graph, IGRAPH_ATTRIBUTE_EDGE, enumnames[i])) { |
668 | 65 | IGRAPH_CHECK(igraph_i_attribute_get_type( |
669 | 65 | graph, &type, IGRAPH_ATTRIBUTE_EDGE, enumnames[i])); |
670 | 65 | if (type == IGRAPH_ATTRIBUTE_NUMERIC) { |
671 | 62 | IGRAPH_CHECK(igraph_vector_int_push_back(&ex_numa, i)); |
672 | 62 | } |
673 | 65 | } |
674 | 76.5k | } |
675 | 38.2k | for (igraph_int_t i = 0; i < (igraph_int_t) (sizeof(estrnames) / sizeof(estrnames[0])); i++) { |
676 | 32.7k | igraph_attribute_type_t type; |
677 | 32.7k | if (igraph_i_attribute_has_attr(graph, IGRAPH_ATTRIBUTE_EDGE, estrnames[i])) { |
678 | 78 | IGRAPH_CHECK(igraph_i_attribute_get_type( |
679 | 78 | graph, &type, IGRAPH_ATTRIBUTE_EDGE, estrnames[i])); |
680 | 78 | if (type == IGRAPH_ATTRIBUTE_STRING) { |
681 | 64 | IGRAPH_CHECK(igraph_vector_int_push_back(&ex_stra, i)); |
682 | 64 | } |
683 | 78 | } |
684 | 32.7k | } |
685 | | |
686 | 134k | for (igraph_int_t i = 0; !IGRAPH_EIT_END(eit); IGRAPH_EIT_NEXT(eit), i++) { |
687 | 129k | igraph_int_t edge = IGRAPH_EIT_GET(eit); |
688 | 129k | igraph_int_t from, to; |
689 | 129k | igraph_edge(graph, edge, &from, &to); |
690 | 129k | if (bipartite) { |
691 | 0 | from = VECTOR(bip_index2)[from]; |
692 | 0 | to = VECTOR(bip_index2)[to]; |
693 | 0 | } |
694 | 129k | fprintf(outstream, "%" IGRAPH_PRId " %" IGRAPH_PRId , from + 1, to + 1); |
695 | | |
696 | | /* Weights */ |
697 | 129k | if (etypes[E_WEIGHT] == IGRAPH_ATTRIBUTE_NUMERIC) { |
698 | 29.0k | IGRAPH_CHECK(igraph_i_attribute_get_numeric_edge_attr( |
699 | 29.0k | graph, enames[E_WEIGHT], igraph_ess_1(edge), &numv)); |
700 | 29.0k | fputc(' ', outstream); |
701 | 29.0k | igraph_real_fprintf_precise(outstream, VECTOR(numv)[0]); |
702 | 29.0k | } |
703 | | |
704 | | /* numeric parameters */ |
705 | 129k | for (igraph_int_t j = 0; j < igraph_vector_int_size(&ex_numa); j++) { |
706 | 673 | igraph_int_t idx = VECTOR(ex_numa)[j]; |
707 | 673 | IGRAPH_CHECK(igraph_i_attribute_get_numeric_edge_attr( |
708 | 673 | graph, enumnames[idx], igraph_ess_1(edge), &numv)); |
709 | 673 | fprintf(outstream, " %s ", enumnames2[idx]); |
710 | 673 | igraph_real_fprintf_precise(outstream, VECTOR(numv)[0]); |
711 | 673 | } |
712 | | |
713 | | /* string parameters */ |
714 | 130k | for (igraph_int_t j = 0; j < igraph_vector_int_size(&ex_stra); j++) { |
715 | 1.39k | igraph_int_t idx = VECTOR(ex_stra)[j]; |
716 | 1.39k | IGRAPH_CHECK(igraph_i_attribute_get_string_edge_attr( |
717 | 1.39k | graph, estrnames[idx], igraph_ess_1(edge), &strv)); |
718 | 1.39k | s = igraph_strvector_get(&strv, 0); |
719 | 1.39k | IGRAPH_CHECK(pajek_escape(s, &escaped)); |
720 | 1.39k | fprintf(outstream, " %s %s", estrnames2[idx], escaped); |
721 | 1.39k | IGRAPH_FREE(escaped); |
722 | 1.39k | } |
723 | | |
724 | | /* trailing newline */ |
725 | 129k | fprintf(outstream, "%s", newline); |
726 | 129k | } |
727 | | |
728 | 5.46k | igraph_eit_destroy(&eit); |
729 | 5.46k | igraph_es_destroy(&es); |
730 | 5.46k | IGRAPH_FINALLY_CLEAN(2); |
731 | | |
732 | 5.46k | if (bipartite) { |
733 | 0 | igraph_vector_int_destroy(&bip_index2); |
734 | 0 | igraph_vector_int_destroy(&bip_index); |
735 | 0 | IGRAPH_FINALLY_CLEAN(2); |
736 | 0 | } |
737 | | |
738 | 5.46k | igraph_vector_int_destroy(&ex_numa); |
739 | 5.46k | igraph_vector_int_destroy(&ex_stra); |
740 | 5.46k | igraph_vector_int_destroy(&vx_numa); |
741 | 5.46k | igraph_vector_int_destroy(&vx_stra); |
742 | 5.46k | igraph_strvector_destroy(&strv); |
743 | 5.46k | igraph_vector_destroy(&numv); |
744 | 5.46k | IGRAPH_FINALLY_CLEAN(6); |
745 | 5.46k | return IGRAPH_SUCCESS; |
746 | 5.46k | } |