Coverage Report

Created: 2025-04-11 06:16

/src/igraph/src/io/ncol.c
Line
Count
Source (jump to first uncovered line)
1
/* -*- mode: C -*-  */
2
/*
3
   IGraph library.
4
   Copyright (C) 2005-2020  The igraph development team
5
6
   This program is free software; you can redistribute it and/or modify
7
   it under the terms of the GNU General Public License as published by
8
   the Free Software Foundation; either version 2 of the License, or
9
   (at your option) any later version.
10
11
   This program is distributed in the hope that it will be useful,
12
   but WITHOUT ANY WARRANTY; without even the implied warranty of
13
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
   GNU General Public License for more details.
15
16
   You should have received a copy of the GNU General Public License
17
   along with this program; if not, write to the Free Software
18
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
   02110-1301 USA
20
21
*/
22
23
#include "igraph_foreign.h"
24
25
#include "igraph_attributes.h"
26
#include "igraph_interface.h"
27
28
#include "graph/attributes.h"
29
30
#include "io/ncol-header.h"
31
#include "io/parsers/ncol-parser.h"
32
33
int igraph_ncol_yylex_init_extra (igraph_i_ncol_parsedata_t *user_defined,
34
                                  void *scanner);
35
int igraph_ncol_yylex_destroy(void *scanner);
36
int igraph_ncol_yyparse(igraph_i_ncol_parsedata_t *context);
37
void igraph_ncol_yyset_in(FILE *in_str, void *yyscanner);
38
39
/* for IGRAPH_FINALLY, which assumes that destructor functions return void */
40
332
void igraph_ncol_yylex_destroy_wrapper (void *scanner ) {
41
332
    (void) igraph_ncol_yylex_destroy(scanner);
42
332
}
43
44
/**
45
 * \ingroup loadsave
46
 * \function igraph_read_graph_ncol
47
 * \brief Reads an <code>.ncol</code> file used by LGL.
48
 *
49
 * Also useful for creating graphs from \quote named\endquote (and
50
 * optionally weighted) edge lists.
51
 *
52
 * </para><para>
53
 * This format is used by the Large Graph Layout program
54
 * (https://lgl.sourceforge.net), and it is simply a
55
 * symbolic weighted edge list. It is a simple text file with one edge
56
 * per line. An edge is defined by two symbolic vertex names separated
57
 * by whitespace. The vertex names themselves cannot contain
58
 * whitespace. They may be followed by an optional number,
59
 * the weight of the edge; the number can be negative and can be in
60
 * scientific notation. If there is no weight specified to an edge it
61
 * is assumed to be zero.
62
 *
63
 * </para><para>
64
 * The resulting graph is always undirected.
65
 * LGL cannot deal with files which contain multiple or loop edges,
66
 * this is however not checked here, as \a igraph is happy with
67
 * these.
68
 *
69
 * \param graph Pointer to an uninitialized graph object.
70
 * \param instream Pointer to a stream, it should be readable.
71
 * \param predefnames Pointer to the symbolic names of the vertices in
72
 *        the file. If \c NULL is given here then vertex IDs will be
73
 *        assigned to vertex names in the order of their appearance in
74
 *        the <code>.ncol</code> file. If it is not \c NULL and some unknown
75
 *        vertex names are found in the <code>.ncol</code> file then new vertex
76
 *        ids will be assigned to them.
77
 * \param names Logical value, if \c true the symbolic names of the
78
 *        vertices will be added to the graph as a vertex attribute
79
 *        called \quote name\endquote.
80
 * \param weights Whether to add the weights of the edges to the
81
 *        graph as an edge attribute called \quote weight\endquote.
82
 *        \c IGRAPH_ADD_WEIGHTS_YES adds the weights (even if they
83
 *        are not present in the file, in this case they are assumed
84
 *        to be 1). \c IGRAPH_ADD_WEIGHTS_NO does not add any
85
 *        edge attribute. \c IGRAPH_ADD_WEIGHTS_IF_PRESENT adds the
86
 *        attribute if and only if there is at least one explicit
87
 *        edge weight in the input file, and edges without an explicit
88
 *        weight are assumed to have a weight of 1.
89
 * \param directed Whether to create a directed graph. As this format
90
 *        was originally used only for undirected graphs there is no
91
 *        information in the file about the directedness of the graph.
92
 *        Set this parameter to \c IGRAPH_DIRECTED or \c
93
 *        IGRAPH_UNDIRECTED to create a directed or undirected graph.
94
 * \return Error code:
95
 *         \c IGRAPH_PARSEERROR: if there is a
96
 *          problem reading
97
 *         the file, or the file is syntactically incorrect.
98
 *
99
 * Time complexity:
100
 * O(|V|+|E|log(|V|)) if we neglect
101
 * the time required by the parsing. As usual
102
 * |V| is the number of vertices,
103
 * while |E| is the number of edges.
104
 *
105
 * \sa \ref igraph_read_graph_lgl(), \ref igraph_write_graph_ncol()
106
 */
107
igraph_error_t igraph_read_graph_ncol(igraph_t *graph, FILE *instream,
108
                           const igraph_strvector_t *predefnames,
109
                           igraph_bool_t names,
110
                           igraph_add_weights_t weights,
111
968
                           igraph_bool_t directed) {
112
113
968
    igraph_vector_int_t edges;
114
968
    igraph_vector_t ws;
115
968
    igraph_trie_t trie = IGRAPH_TRIE_NULL;
116
968
    igraph_integer_t no_of_nodes;
117
968
    igraph_integer_t no_predefined = 0;
118
968
    igraph_attribute_record_list_t name, weight;
119
968
    igraph_attribute_record_list_t *pname = NULL, *pweight = NULL;
120
968
    igraph_attribute_record_t *namerec, *weightrec;
121
968
    const char *namestr = "name", *weightstr = "weight";
122
968
    igraph_i_ncol_parsedata_t context;
123
124
968
    IGRAPH_VECTOR_INT_INIT_FINALLY(&edges, 0);
125
126
968
    IGRAPH_TRIE_INIT_FINALLY(&trie, names);
127
968
    IGRAPH_VECTOR_INIT_FINALLY(&ws, 0);
128
129
    /* Add the predefined names, if any */
130
968
    if (predefnames != 0) {
131
0
        igraph_integer_t i, id, n;
132
0
        const char *key;
133
0
        n = no_predefined = igraph_strvector_size(predefnames);
134
0
        for (i = 0; i < n; i++) {
135
0
            key = igraph_strvector_get(predefnames, i);
136
0
            IGRAPH_CHECK(igraph_trie_get(&trie, key, &id));
137
0
            if (id != i) {
138
0
                IGRAPH_WARNING("Reading NCOL file, duplicate entry in predefined names.");
139
0
                no_predefined--;
140
0
            }
141
0
        }
142
0
    }
143
144
968
    context.has_weights = false;
145
968
    context.vector = &edges;
146
968
    context.weights = &ws;
147
968
    context.trie = &trie;
148
968
    context.errmsg[0] = '\0';
149
968
    context.igraph_errno = IGRAPH_SUCCESS;
150
151
968
    igraph_ncol_yylex_init_extra(&context, &context.scanner);
152
968
    IGRAPH_FINALLY(igraph_ncol_yylex_destroy_wrapper, context.scanner);
153
154
968
    igraph_ncol_yyset_in(instream, context.scanner);
155
156
    /* Use ENTER/EXIT to avoid destroying context.scanner before this function returns */
157
968
    IGRAPH_FINALLY_ENTER();
158
968
    int err = igraph_ncol_yyparse(&context);
159
968
    IGRAPH_FINALLY_EXIT();
160
968
    switch (err) {
161
636
    case 0: /* success */
162
636
        break;
163
332
    case 1: /* parse error */
164
332
        if (context.errmsg[0] != '\0') {
165
332
            IGRAPH_ERROR(context.errmsg, IGRAPH_PARSEERROR);
166
332
        } else if (context.igraph_errno != IGRAPH_SUCCESS) {
167
0
            IGRAPH_ERROR("", context.igraph_errno);
168
0
        } else {
169
0
            IGRAPH_ERROR("Cannot read NCOL file.", IGRAPH_PARSEERROR);
170
0
        }
171
0
        break;
172
0
    case 2: /* out of memory */
173
0
        IGRAPH_ERROR("Cannot read NCOL file.", IGRAPH_ENOMEM); /* LCOV_EXCL_LINE */
174
0
        break;
175
0
    default: /* must never reach here */
176
        /* Hint: This will usually be triggered if an IGRAPH_CHECK() is used in a Bison
177
         * action instead of an IGRAPH_YY_CHECK(), resulting in an igraph errno being
178
         * returned in place of a Bison error code.
179
         * TODO: What if future Bison versions introduce error codes other than 0, 1 and 2?
180
         */
181
0
        IGRAPH_FATALF("Parser returned unexpected error code (%d) when reading NCOL file.", err);
182
968
    }
183
184
636
    if (predefnames != 0 &&
185
636
        igraph_trie_size(&trie) != no_predefined) {
186
0
        IGRAPH_WARNING("Unknown vertex/vertices found in NCOL file, predefined names extended.");
187
0
    }
188
189
    /* Prepare attributes if needed */
190
636
    if (names) {
191
636
        IGRAPH_CHECK(igraph_attribute_record_list_init(&name, 1));
192
636
        IGRAPH_FINALLY(igraph_attribute_record_list_destroy, &name);
193
194
636
        namerec = igraph_attribute_record_list_get_ptr(&name, 0);
195
636
        IGRAPH_CHECK(igraph_attribute_record_set_name(namerec, namestr));
196
636
        IGRAPH_CHECK(igraph_attribute_record_set_type(namerec, IGRAPH_ATTRIBUTE_STRING));
197
636
        IGRAPH_CHECK(igraph_strvector_update(
198
636
            namerec->value.as_strvector, igraph_i_trie_borrow_keys(context.trie))
199
636
        );
200
201
636
        pname = &name;
202
636
    }
203
204
636
    if (weights == IGRAPH_ADD_WEIGHTS_YES ||
205
636
        (weights == IGRAPH_ADD_WEIGHTS_IF_PRESENT && context.has_weights)) {
206
242
        IGRAPH_CHECK(igraph_attribute_record_list_init(&weight, 1));
207
242
        IGRAPH_FINALLY(igraph_attribute_record_list_destroy, &weight);
208
209
242
        weightrec = igraph_attribute_record_list_get_ptr(&weight, 0);
210
242
        IGRAPH_CHECK(igraph_attribute_record_set_name(weightrec, weightstr));
211
242
        IGRAPH_CHECK(igraph_attribute_record_set_type(weightrec, IGRAPH_ATTRIBUTE_NUMERIC));
212
242
        igraph_vector_swap(weightrec->value.as_vector, context.weights);
213
214
242
        pweight = &weight;
215
242
    }
216
217
636
    if (igraph_vector_int_empty(&edges)) {
218
35
        no_of_nodes = 0;
219
601
    } else {
220
601
        no_of_nodes = igraph_vector_int_max(&edges) + 1;
221
601
    }
222
223
    /* Create graph */
224
636
    IGRAPH_CHECK(igraph_empty(graph, 0, directed));
225
636
    IGRAPH_FINALLY(igraph_destroy, graph);
226
636
    IGRAPH_CHECK(igraph_add_vertices(graph, no_of_nodes, pname));
227
636
    IGRAPH_CHECK(igraph_add_edges(graph, &edges, pweight));
228
229
636
    if (pname) {
230
636
        igraph_attribute_record_list_destroy(pname);
231
636
        IGRAPH_FINALLY_CLEAN(1);
232
636
    }
233
636
    if (pweight) {
234
242
        igraph_attribute_record_list_destroy(pweight);
235
242
        IGRAPH_FINALLY_CLEAN(1);
236
242
    }
237
636
    igraph_vector_destroy(&ws);
238
636
    igraph_trie_destroy(&trie);
239
636
    igraph_vector_int_destroy(&edges);
240
636
    igraph_ncol_yylex_destroy(context.scanner);
241
636
    IGRAPH_FINALLY_CLEAN(5); /* +1 for 'graph' */
242
243
636
    return IGRAPH_SUCCESS;
244
636
}
245
246
247
2.75M
static igraph_error_t check_name(const char *name) {
248
2.75M
    size_t len = 0;
249
250
14.2M
    for (; *name != '\0'; name++, len++) {
251
11.4M
        if (   *name <= 0x020 /* space or non-printable */
252
11.4M
            || *name == 0x7f /* del */) {
253
284
            IGRAPH_ERRORF("The NCOL format does not allow non-printable characters or spaces in vertex names. "
254
284
                          "Character code 0x%02X found.", IGRAPH_EINVAL,
255
284
                          *name);
256
284
        }
257
11.4M
    }
258
2.75M
    if (len == 0) {
259
0
        IGRAPH_ERROR("The NCOL format does not support empty vertex names.", IGRAPH_EINVAL);
260
0
    }
261
2.75M
    return IGRAPH_SUCCESS;
262
2.75M
}
263
264
265
/**
266
 * \ingroup loadsave
267
 * \function igraph_write_graph_ncol
268
 * \brief Writes the graph to a file in <code>.ncol</code> format.
269
 *
270
 * </para><para>
271
 * <code>.ncol</code> is a format used by LGL, see \ref
272
 * igraph_read_graph_ncol() for details.
273
 *
274
 * </para><para>
275
 * Note that having multiple or loop edges in an
276
 * <code>.ncol</code> file breaks the  LGL software but
277
 * \a igraph does not check for this condition.
278
 *
279
 * </para><para>
280
 * This format cannot represent zero-degree vertices.
281
 *
282
 * \param graph The graph to write.
283
 * \param outstream The stream object to write to, it should be
284
 *        writable.
285
 * \param names The name of a string vertex attribute, if symbolic names
286
 *        are to be written to the file. Supply \c NULL to write vertex
287
 *        ids instead.
288
 * \param weights The name of a numerical edge attribute, which will be
289
 *        written as weights to the file. Supply \c NULL to skip writing
290
 *        edge weights.
291
 * \return Error code:
292
 *         \c IGRAPH_EFILE if there is an error writing the
293
 *         file.
294
 *
295
 * Time complexity: O(|E|), the
296
 * number of edges. All file operations are expected to have time
297
 * complexity O(1).
298
 *
299
 * \sa \ref igraph_read_graph_ncol(), \ref igraph_write_graph_lgl()
300
 */
301
igraph_error_t igraph_write_graph_ncol(const igraph_t *graph, FILE *outstream,
302
636
                            const char *names, const char *weights) {
303
636
    igraph_eit_t it;
304
636
    igraph_attribute_type_t nametype, weighttype;
305
306
636
    IGRAPH_CHECK(igraph_eit_create(graph, igraph_ess_all(IGRAPH_EDGEORDER_ID),
307
636
                                   &it));
308
636
    IGRAPH_FINALLY(igraph_eit_destroy, &it);
309
310
    /* Check if we have the names attribute */
311
636
    if (names && !igraph_i_attribute_has_attr(graph, IGRAPH_ATTRIBUTE_VERTEX,
312
636
            names)) {
313
0
        IGRAPH_WARNINGF("Names attribute '%s' does not exist.", names);
314
0
        names = NULL;
315
0
    }
316
636
    if (names) {
317
636
        IGRAPH_CHECK(igraph_i_attribute_get_type(graph, &nametype,
318
636
                                                IGRAPH_ATTRIBUTE_VERTEX, names));
319
636
        if (nametype != IGRAPH_ATTRIBUTE_STRING) {
320
0
            IGRAPH_WARNINGF("Ignoring names attribute '%s', "
321
0
                    "attribute type is not a string.", names);
322
0
            names = NULL;
323
0
        }
324
636
    }
325
326
    /* Check the weights as well */
327
636
    if (weights && !igraph_i_attribute_has_attr(graph, IGRAPH_ATTRIBUTE_EDGE, weights)) {
328
394
        IGRAPH_WARNINGF("Weights attribute '%s' does not exist.", weights);
329
394
        weights = NULL;
330
394
    }
331
636
    if (weights) {
332
242
        IGRAPH_CHECK(igraph_i_attribute_get_type(graph, &weighttype,
333
242
                                                IGRAPH_ATTRIBUTE_EDGE, weights));
334
242
        if (weighttype != IGRAPH_ATTRIBUTE_NUMERIC) {
335
0
            IGRAPH_WARNINGF("Ignoring weights attribute '%s', "
336
0
                    "attribute type is not numeric.", weights);
337
0
            weights = NULL;
338
0
        }
339
242
    }
340
341
636
    if (names == NULL && weights == NULL) {
342
        /* No names, no weights */
343
0
        while (!IGRAPH_EIT_END(it)) {
344
0
            igraph_integer_t from, to;
345
0
            int ret;
346
0
            igraph_edge(graph, IGRAPH_EIT_GET(it), &from, &to);
347
0
            ret = fprintf(outstream, "%" IGRAPH_PRId " %" IGRAPH_PRId "\n", from, to);
348
0
            if (ret < 0) {
349
0
                IGRAPH_ERROR("Writing NCOL file failed.", IGRAPH_EFILE);
350
0
            }
351
0
            IGRAPH_EIT_NEXT(it);
352
0
        }
353
636
    } else if (weights == NULL) {
354
        /* No weights, but use names */
355
394
        igraph_strvector_t nvec;
356
394
        IGRAPH_CHECK(igraph_strvector_init(&nvec, igraph_vcount(graph)));
357
394
        IGRAPH_FINALLY(igraph_strvector_destroy, &nvec);
358
394
        IGRAPH_CHECK(igraph_i_attribute_get_string_vertex_attr(graph, names,
359
394
                     igraph_vss_all(),
360
394
                     &nvec));
361
641k
        while (!IGRAPH_EIT_END(it)) {
362
640k
            igraph_integer_t edge = IGRAPH_EIT_GET(it);
363
640k
            igraph_integer_t from, to;
364
640k
            int ret = 0;
365
640k
            const char *str1, *str2;
366
640k
            igraph_edge(graph, edge, &from, &to);
367
640k
            str1 = igraph_strvector_get(&nvec, from);
368
640k
            IGRAPH_CHECK(check_name(str1));
369
640k
            str2 = igraph_strvector_get(&nvec, to);
370
640k
            IGRAPH_CHECK(check_name(str2));
371
640k
            ret = fprintf(outstream, "%s %s\n", str1, str2);
372
640k
            if (ret < 0) {
373
0
                IGRAPH_ERROR("Writing NCOL file failed.", IGRAPH_EFILE);
374
0
            }
375
640k
            IGRAPH_EIT_NEXT(it);
376
640k
        }
377
213
        igraph_strvector_destroy(&nvec);
378
213
        IGRAPH_FINALLY_CLEAN(1);
379
242
    } else if (names == NULL) {
380
        /* No names but weights */
381
0
        igraph_vector_t wvec;
382
0
        IGRAPH_VECTOR_INIT_FINALLY(&wvec, igraph_ecount(graph));
383
0
        IGRAPH_CHECK(igraph_i_attribute_get_numeric_edge_attr(graph, weights,
384
0
                     igraph_ess_all(IGRAPH_EDGEORDER_ID),
385
0
                     &wvec));
386
0
        while (!IGRAPH_EIT_END(it)) {
387
0
            igraph_integer_t edge = IGRAPH_EIT_GET(it);
388
0
            igraph_integer_t from, to;
389
0
            int ret1, ret2, ret3;
390
0
            igraph_edge(graph, edge, &from, &to);
391
0
            ret1 = fprintf(outstream, "%" IGRAPH_PRId " %" IGRAPH_PRId " ", from, to);
392
0
            ret2 = igraph_real_fprintf_precise(outstream, VECTOR(wvec)[edge]);
393
0
            ret3 = fputc('\n', outstream);
394
0
            if (ret1 < 0 || ret2 < 0 || ret3 == EOF) {
395
0
                IGRAPH_ERROR("Writing NCOL file failed.", IGRAPH_EFILE);
396
0
            }
397
0
            IGRAPH_EIT_NEXT(it);
398
0
        }
399
0
        igraph_vector_destroy(&wvec);
400
0
        IGRAPH_FINALLY_CLEAN(1);
401
242
    } else {
402
        /* Both names and weights */
403
242
        igraph_strvector_t nvec;
404
242
        igraph_vector_t wvec;
405
242
        IGRAPH_VECTOR_INIT_FINALLY(&wvec, igraph_ecount(graph));
406
242
        IGRAPH_CHECK(igraph_strvector_init(&nvec, igraph_vcount(graph)));
407
242
        IGRAPH_FINALLY(igraph_strvector_destroy, &nvec);
408
242
        IGRAPH_CHECK(igraph_i_attribute_get_numeric_edge_attr(graph, weights,
409
242
                     igraph_ess_all(IGRAPH_EDGEORDER_ID),
410
242
                     &wvec));
411
242
        IGRAPH_CHECK(igraph_i_attribute_get_string_vertex_attr(graph, names,
412
242
                     igraph_vss_all(),
413
242
                     &nvec));
414
736k
        while (!IGRAPH_EIT_END(it)) {
415
736k
            igraph_integer_t edge = IGRAPH_EIT_GET(it);
416
736k
            igraph_integer_t from, to;
417
736k
            int ret = 0, ret2 = 0;
418
736k
            const char *str1, *str2;
419
736k
            igraph_edge(graph, edge, &from, &to);
420
736k
            str1 = igraph_strvector_get(&nvec, from);
421
736k
            IGRAPH_CHECK(check_name(str1));
422
736k
            str2 = igraph_strvector_get(&nvec, to);
423
736k
            IGRAPH_CHECK(check_name(str2));
424
736k
            ret = fprintf(outstream, "%s %s ", str1, str2);
425
736k
            if (ret < 0) {
426
0
                IGRAPH_ERROR("Writing NCOL file failed.", IGRAPH_EFILE);
427
0
            }
428
736k
            ret = igraph_real_fprintf_precise(outstream, VECTOR(wvec)[edge]);
429
736k
            ret2 = fputc('\n', outstream);
430
736k
            if (ret < 0 || ret2 == EOF) {
431
0
                IGRAPH_ERROR("Writing NCOL file failed.", IGRAPH_EFILE);
432
0
            }
433
736k
            IGRAPH_EIT_NEXT(it);
434
736k
        }
435
139
        igraph_strvector_destroy(&nvec);
436
139
        igraph_vector_destroy(&wvec);
437
139
        IGRAPH_FINALLY_CLEAN(2);
438
139
    }
439
440
352
    igraph_eit_destroy(&it);
441
352
    IGRAPH_FINALLY_CLEAN(1);
442
352
    return IGRAPH_SUCCESS;
443
636
}