Coverage Report

Created: 2025-11-14 06:33

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/igraph/src/io/ncol.c
Line
Count
Source
1
/*
2
   igraph library.
3
   Copyright (C) 2005-2020  The igraph development team
4
5
   This program is free software; you can redistribute it and/or modify
6
   it under the terms of the GNU General Public License as published by
7
   the Free Software Foundation; either version 2 of the License, or
8
   (at your option) any later version.
9
10
   This program is distributed in the hope that it will be useful,
11
   but WITHOUT ANY WARRANTY; without even the implied warranty of
12
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
   GNU General Public License for more details.
14
15
   You should have received a copy of the GNU General Public License
16
   along with this program; if not, write to the Free Software
17
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18
   02110-1301 USA
19
20
*/
21
22
#include "igraph_foreign.h"
23
24
#include "igraph_attributes.h"
25
#include "igraph_interface.h"
26
27
#include "graph/attributes.h"
28
29
#include "io/ncol-header.h"
30
#include "io/parsers/ncol-parser.h"
31
32
int igraph_ncol_yylex_init_extra (igraph_i_ncol_parsedata_t *user_defined,
33
                                  void *scanner);
34
int igraph_ncol_yylex_destroy(void *scanner);
35
int igraph_ncol_yyparse(igraph_i_ncol_parsedata_t *context);
36
void igraph_ncol_yyset_in(FILE *in_str, void *yyscanner);
37
38
/* for IGRAPH_FINALLY, which assumes that destructor functions return void */
39
330
void igraph_ncol_yylex_destroy_wrapper (void *scanner ) {
40
330
    (void) igraph_ncol_yylex_destroy(scanner);
41
330
}
42
43
/**
44
 * \ingroup loadsave
45
 * \function igraph_read_graph_ncol
46
 * \brief Reads an <code>.ncol</code> file used by LGL.
47
 *
48
 * Also useful for creating graphs from \quote named\endquote (and
49
 * optionally weighted) edge lists.
50
 *
51
 * </para><para>
52
 * This format is used by the Large Graph Layout program
53
 * (https://lgl.sourceforge.net), and it is simply a
54
 * symbolic weighted edge list. It is a simple text file with one edge
55
 * per line. An edge is defined by two symbolic vertex names separated
56
 * by whitespace. The vertex names themselves cannot contain
57
 * whitespace. They may be followed by an optional number,
58
 * the weight of the edge; the number can be negative and can be in
59
 * scientific notation. If there is no weight specified to an edge it
60
 * is assumed to be zero.
61
 *
62
 * </para><para>
63
 * The resulting graph is always undirected.
64
 * LGL cannot deal with files which contain multiple or loop edges,
65
 * this is however not checked here, as \a igraph is happy with
66
 * these.
67
 *
68
 * \param graph Pointer to an uninitialized graph object.
69
 * \param instream Pointer to a stream, it should be readable.
70
 * \param predefnames Pointer to the symbolic names of the vertices in
71
 *        the file. If \c NULL is given here then vertex IDs will be
72
 *        assigned to vertex names in the order of their appearance in
73
 *        the <code>.ncol</code> file. If it is not \c NULL and some unknown
74
 *        vertex names are found in the <code>.ncol</code> file then new vertex
75
 *        ids will be assigned to them.
76
 * \param names Boolean value. If \c true, the symbolic names of the
77
 *        vertices will be added to the graph as a vertex attribute
78
 *        called \quote name\endquote.
79
 * \param weights Whether to add the weights of the edges to the
80
 *        graph as an edge attribute called \quote weight\endquote.
81
 *        \c IGRAPH_ADD_WEIGHTS_YES adds the weights (even if they
82
 *        are not present in the file, in this case they are assumed
83
 *        to be 1). \c IGRAPH_ADD_WEIGHTS_NO does not add any
84
 *        edge attribute. \c IGRAPH_ADD_WEIGHTS_IF_PRESENT adds the
85
 *        attribute if and only if there is at least one explicit
86
 *        edge weight in the input file, and edges without an explicit
87
 *        weight are assumed to have a weight of 1.
88
 * \param directed Whether to create a directed graph. As this format
89
 *        was originally used only for undirected graphs there is no
90
 *        information in the file about the directedness of the graph.
91
 *        Set this parameter to \c IGRAPH_DIRECTED or \c
92
 *        IGRAPH_UNDIRECTED to create a directed or undirected graph.
93
 * \return Error code:
94
 *         \c IGRAPH_PARSEERROR: if there is a
95
 *          problem reading
96
 *         the file, or the file is syntactically incorrect.
97
 *
98
 * Time complexity:
99
 * O(|V|+|E|log(|V|)) if we neglect
100
 * the time required by the parsing. As usual
101
 * |V| is the number of vertices,
102
 * while |E| is the number of edges.
103
 *
104
 * \sa \ref igraph_read_graph_lgl(), \ref igraph_write_graph_ncol()
105
 */
106
igraph_error_t igraph_read_graph_ncol(igraph_t *graph, FILE *instream,
107
                           const igraph_strvector_t *predefnames,
108
                           igraph_bool_t names,
109
                           igraph_add_weights_t weights,
110
940
                           igraph_bool_t directed) {
111
112
940
    igraph_vector_int_t edges;
113
940
    igraph_vector_t ws;
114
940
    igraph_trie_t trie = IGRAPH_TRIE_NULL;
115
940
    igraph_int_t no_of_nodes;
116
940
    igraph_int_t no_predefined = 0;
117
940
    igraph_attribute_record_list_t name, weight;
118
940
    igraph_attribute_record_list_t *pname = NULL, *pweight = NULL;
119
940
    igraph_attribute_record_t *namerec, *weightrec;
120
940
    const char *namestr = "name", *weightstr = "weight";
121
940
    igraph_i_ncol_parsedata_t context;
122
123
940
    IGRAPH_VECTOR_INT_INIT_FINALLY(&edges, 0);
124
125
940
    IGRAPH_TRIE_INIT_FINALLY(&trie, names);
126
940
    IGRAPH_VECTOR_INIT_FINALLY(&ws, 0);
127
128
    /* Add the predefined names, if any */
129
940
    if (predefnames != 0) {
130
0
        igraph_int_t i, id, n;
131
0
        const char *key;
132
0
        n = no_predefined = igraph_strvector_size(predefnames);
133
0
        for (i = 0; i < n; i++) {
134
0
            key = igraph_strvector_get(predefnames, i);
135
0
            IGRAPH_CHECK(igraph_trie_get(&trie, key, &id));
136
0
            if (id != i) {
137
0
                IGRAPH_WARNING("Reading NCOL file, duplicate entry in predefined names.");
138
0
                no_predefined--;
139
0
            }
140
0
        }
141
0
    }
142
143
940
    context.has_weights = false;
144
940
    context.vector = &edges;
145
940
    context.weights = &ws;
146
940
    context.trie = &trie;
147
940
    context.errmsg[0] = '\0';
148
940
    context.igraph_errno = IGRAPH_SUCCESS;
149
150
940
    igraph_ncol_yylex_init_extra(&context, &context.scanner);
151
940
    IGRAPH_FINALLY(igraph_ncol_yylex_destroy_wrapper, context.scanner);
152
153
940
    igraph_ncol_yyset_in(instream, context.scanner);
154
155
    /* Use ENTER/EXIT to avoid destroying context.scanner before this function returns */
156
940
    IGRAPH_FINALLY_ENTER();
157
940
    int err = igraph_ncol_yyparse(&context);
158
940
    IGRAPH_FINALLY_EXIT();
159
940
    switch (err) {
160
610
    case 0: /* success */
161
610
        break;
162
330
    case 1: /* parse error */
163
330
        if (context.errmsg[0] != '\0') {
164
330
            IGRAPH_ERROR(context.errmsg, IGRAPH_PARSEERROR);
165
330
        } else if (context.igraph_errno != IGRAPH_SUCCESS) {
166
0
            IGRAPH_ERROR("", context.igraph_errno);
167
0
        } else {
168
0
            IGRAPH_ERROR("Cannot read NCOL file.", IGRAPH_PARSEERROR);
169
0
        }
170
0
        break;
171
0
    case 2: /* out of memory */
172
0
        IGRAPH_ERROR("Cannot read NCOL file.", IGRAPH_ENOMEM); /* LCOV_EXCL_LINE */
173
0
        break;
174
0
    default: /* must never reach here */
175
        /* Hint: This will usually be triggered if an IGRAPH_CHECK() is used in a Bison
176
         * action instead of an IGRAPH_YY_CHECK(), resulting in an igraph errno being
177
         * returned in place of a Bison error code.
178
         * TODO: What if future Bison versions introduce error codes other than 0, 1 and 2?
179
         */
180
0
        IGRAPH_FATALF("Parser returned unexpected error code (%d) when reading NCOL file.", err);
181
940
    }
182
183
610
    if (predefnames != 0 &&
184
0
        igraph_trie_size(&trie) != no_predefined) {
185
0
        IGRAPH_WARNING("Unknown vertex/vertices found in NCOL file, predefined names extended.");
186
0
    }
187
188
    /* Prepare attributes if needed */
189
610
    if (names) {
190
610
        IGRAPH_CHECK(igraph_attribute_record_list_init(&name, 1));
191
610
        IGRAPH_FINALLY(igraph_attribute_record_list_destroy, &name);
192
193
610
        namerec = igraph_attribute_record_list_get_ptr(&name, 0);
194
610
        IGRAPH_CHECK(igraph_attribute_record_set_name(namerec, namestr));
195
610
        IGRAPH_CHECK(igraph_attribute_record_set_type(namerec, IGRAPH_ATTRIBUTE_STRING));
196
610
        IGRAPH_CHECK(igraph_strvector_update(
197
610
            namerec->value.as_strvector, igraph_i_trie_borrow_keys(context.trie))
198
610
        );
199
200
610
        pname = &name;
201
610
    }
202
203
610
    if (weights == IGRAPH_ADD_WEIGHTS_YES ||
204
610
        (weights == IGRAPH_ADD_WEIGHTS_IF_PRESENT && context.has_weights)) {
205
221
        IGRAPH_CHECK(igraph_attribute_record_list_init(&weight, 1));
206
221
        IGRAPH_FINALLY(igraph_attribute_record_list_destroy, &weight);
207
208
221
        weightrec = igraph_attribute_record_list_get_ptr(&weight, 0);
209
221
        IGRAPH_CHECK(igraph_attribute_record_set_name(weightrec, weightstr));
210
221
        IGRAPH_CHECK(igraph_attribute_record_set_type(weightrec, IGRAPH_ATTRIBUTE_NUMERIC));
211
221
        igraph_vector_swap(weightrec->value.as_vector, context.weights);
212
213
221
        pweight = &weight;
214
221
    }
215
216
610
    if (igraph_vector_int_empty(&edges)) {
217
35
        no_of_nodes = 0;
218
575
    } else {
219
575
        no_of_nodes = igraph_vector_int_max(&edges) + 1;
220
575
    }
221
222
    /* Create graph */
223
610
    IGRAPH_CHECK(igraph_empty(graph, 0, directed));
224
610
    IGRAPH_FINALLY(igraph_destroy, graph);
225
610
    IGRAPH_CHECK(igraph_add_vertices(graph, no_of_nodes, pname));
226
610
    IGRAPH_CHECK(igraph_add_edges(graph, &edges, pweight));
227
228
610
    if (pname) {
229
610
        igraph_attribute_record_list_destroy(pname);
230
610
        IGRAPH_FINALLY_CLEAN(1);
231
610
    }
232
610
    if (pweight) {
233
221
        igraph_attribute_record_list_destroy(pweight);
234
221
        IGRAPH_FINALLY_CLEAN(1);
235
221
    }
236
610
    igraph_vector_destroy(&ws);
237
610
    igraph_trie_destroy(&trie);
238
610
    igraph_vector_int_destroy(&edges);
239
610
    igraph_ncol_yylex_destroy(context.scanner);
240
610
    IGRAPH_FINALLY_CLEAN(5); /* +1 for 'graph' */
241
242
610
    return IGRAPH_SUCCESS;
243
610
}
244
245
246
2.44M
static igraph_error_t check_name(const char *name) {
247
2.44M
    size_t len = 0;
248
249
14.7M
    for (; *name != '\0'; name++, len++) {
250
12.2M
        if (   *name <= 0x020 /* space or non-printable */
251
12.2M
            || *name == 0x7f /* del */) {
252
269
            IGRAPH_ERRORF("The NCOL format does not allow non-printable characters or spaces in vertex names. "
253
269
                          "Character code 0x%02X found.", IGRAPH_EINVAL,
254
269
                          *name);
255
269
        }
256
12.2M
    }
257
2.44M
    if (len == 0) {
258
0
        IGRAPH_ERROR("The NCOL format does not support empty vertex names.", IGRAPH_EINVAL);
259
0
    }
260
2.44M
    return IGRAPH_SUCCESS;
261
2.44M
}
262
263
264
/**
265
 * \ingroup loadsave
266
 * \function igraph_write_graph_ncol
267
 * \brief Writes the graph to a file in <code>.ncol</code> format.
268
 *
269
 * </para><para>
270
 * <code>.ncol</code> is a format used by LGL, see \ref
271
 * igraph_read_graph_ncol() for details.
272
 *
273
 * </para><para>
274
 * Note that having multiple or loop edges in an
275
 * <code>.ncol</code> file breaks the  LGL software but
276
 * \a igraph does not check for this condition.
277
 *
278
 * </para><para>
279
 * This format cannot represent zero-degree vertices.
280
 *
281
 * \param graph The graph to write.
282
 * \param outstream The stream object to write to, it should be
283
 *        writable.
284
 * \param names The name of a string vertex attribute, if symbolic names
285
 *        are to be written to the file. Supply \c NULL to write vertex
286
 *        ids instead.
287
 * \param weights The name of a numerical edge attribute, which will be
288
 *        written as weights to the file. Supply \c NULL to skip writing
289
 *        edge weights.
290
 * \return Error code:
291
 *         \c IGRAPH_EFILE if there is an error writing the
292
 *         file.
293
 *
294
 * Time complexity: O(|E|), the
295
 * number of edges. All file operations are expected to have time
296
 * complexity O(1).
297
 *
298
 * \sa \ref igraph_read_graph_ncol(), \ref igraph_write_graph_lgl()
299
 */
300
igraph_error_t igraph_write_graph_ncol(const igraph_t *graph, FILE *outstream,
301
610
                            const char *names, const char *weights) {
302
610
    igraph_eit_t it;
303
610
    igraph_attribute_type_t nametype, weighttype;
304
305
610
    IGRAPH_CHECK(igraph_eit_create(graph, igraph_ess_all(IGRAPH_EDGEORDER_ID),
306
610
                                   &it));
307
610
    IGRAPH_FINALLY(igraph_eit_destroy, &it);
308
309
    /* Check if we have the names attribute */
310
610
    if (names && !igraph_i_attribute_has_attr(graph, IGRAPH_ATTRIBUTE_VERTEX,
311
610
            names)) {
312
0
        IGRAPH_WARNINGF("Names attribute '%s' does not exist.", names);
313
0
        names = NULL;
314
0
    }
315
610
    if (names) {
316
610
        IGRAPH_CHECK(igraph_i_attribute_get_type(graph, &nametype,
317
610
                                                IGRAPH_ATTRIBUTE_VERTEX, names));
318
610
        if (nametype != IGRAPH_ATTRIBUTE_STRING) {
319
0
            IGRAPH_WARNINGF("Ignoring names attribute '%s', "
320
0
                    "attribute type is not a string.", names);
321
0
            names = NULL;
322
0
        }
323
610
    }
324
325
    /* Check the weights as well */
326
610
    if (weights && !igraph_i_attribute_has_attr(graph, IGRAPH_ATTRIBUTE_EDGE, weights)) {
327
389
        IGRAPH_WARNINGF("Weights attribute '%s' does not exist.", weights);
328
389
        weights = NULL;
329
389
    }
330
610
    if (weights) {
331
221
        IGRAPH_CHECK(igraph_i_attribute_get_type(graph, &weighttype,
332
221
                                                IGRAPH_ATTRIBUTE_EDGE, weights));
333
221
        if (weighttype != IGRAPH_ATTRIBUTE_NUMERIC) {
334
0
            IGRAPH_WARNINGF("Ignoring weights attribute '%s', "
335
0
                    "attribute type is not numeric.", weights);
336
0
            weights = NULL;
337
0
        }
338
221
    }
339
340
610
    if (names == NULL && weights == NULL) {
341
        /* No names, no weights */
342
0
        while (!IGRAPH_EIT_END(it)) {
343
0
            igraph_int_t from, to;
344
0
            int ret;
345
0
            igraph_edge(graph, IGRAPH_EIT_GET(it), &from, &to);
346
0
            ret = fprintf(outstream, "%" IGRAPH_PRId " %" IGRAPH_PRId "\n", from, to);
347
0
            if (ret < 0) {
348
0
                IGRAPH_ERROR("Writing NCOL file failed.", IGRAPH_EFILE);
349
0
            }
350
0
            IGRAPH_EIT_NEXT(it);
351
0
        }
352
610
    } else if (weights == NULL) {
353
        /* No weights, but use names */
354
389
        igraph_strvector_t nvec;
355
389
        IGRAPH_CHECK(igraph_strvector_init(&nvec, igraph_vcount(graph)));
356
389
        IGRAPH_FINALLY(igraph_strvector_destroy, &nvec);
357
389
        IGRAPH_CHECK(igraph_i_attribute_get_string_vertex_attr(graph, names,
358
389
                     igraph_vss_all(),
359
389
                     &nvec));
360
651k
        while (!IGRAPH_EIT_END(it)) {
361
651k
            igraph_int_t edge = IGRAPH_EIT_GET(it);
362
651k
            igraph_int_t from, to;
363
651k
            int ret = 0;
364
651k
            const char *str1, *str2;
365
651k
            igraph_edge(graph, edge, &from, &to);
366
651k
            str1 = igraph_strvector_get(&nvec, from);
367
651k
            IGRAPH_CHECK(check_name(str1));
368
651k
            str2 = igraph_strvector_get(&nvec, to);
369
651k
            IGRAPH_CHECK(check_name(str2));
370
651k
            ret = fprintf(outstream, "%s %s\n", str1, str2);
371
651k
            if (ret < 0) {
372
0
                IGRAPH_ERROR("Writing NCOL file failed.", IGRAPH_EFILE);
373
0
            }
374
651k
            IGRAPH_EIT_NEXT(it);
375
651k
        }
376
218
        igraph_strvector_destroy(&nvec);
377
218
        IGRAPH_FINALLY_CLEAN(1);
378
221
    } else if (names == NULL) {
379
        /* No names but weights */
380
0
        igraph_vector_t wvec;
381
0
        IGRAPH_VECTOR_INIT_FINALLY(&wvec, igraph_ecount(graph));
382
0
        IGRAPH_CHECK(igraph_i_attribute_get_numeric_edge_attr(graph, weights,
383
0
                     igraph_ess_all(IGRAPH_EDGEORDER_ID),
384
0
                     &wvec));
385
0
        while (!IGRAPH_EIT_END(it)) {
386
0
            igraph_int_t edge = IGRAPH_EIT_GET(it);
387
0
            igraph_int_t from, to;
388
0
            int ret1, ret2, ret3;
389
0
            igraph_edge(graph, edge, &from, &to);
390
0
            ret1 = fprintf(outstream, "%" IGRAPH_PRId " %" IGRAPH_PRId " ", from, to);
391
0
            ret2 = igraph_real_fprintf_precise(outstream, VECTOR(wvec)[edge]);
392
0
            ret3 = fputc('\n', outstream);
393
0
            if (ret1 < 0 || ret2 < 0 || ret3 == EOF) {
394
0
                IGRAPH_ERROR("Writing NCOL file failed.", IGRAPH_EFILE);
395
0
            }
396
0
            IGRAPH_EIT_NEXT(it);
397
0
        }
398
0
        igraph_vector_destroy(&wvec);
399
0
        IGRAPH_FINALLY_CLEAN(1);
400
221
    } else {
401
        /* Both names and weights */
402
221
        igraph_strvector_t nvec;
403
221
        igraph_vector_t wvec;
404
221
        IGRAPH_VECTOR_INIT_FINALLY(&wvec, igraph_ecount(graph));
405
221
        IGRAPH_CHECK(igraph_strvector_init(&nvec, igraph_vcount(graph)));
406
221
        IGRAPH_FINALLY(igraph_strvector_destroy, &nvec);
407
221
        IGRAPH_CHECK(igraph_i_attribute_get_numeric_edge_attr(graph, weights,
408
221
                     igraph_ess_all(IGRAPH_EDGEORDER_ID),
409
221
                     &wvec));
410
221
        IGRAPH_CHECK(igraph_i_attribute_get_string_vertex_attr(graph, names,
411
221
                     igraph_vss_all(),
412
221
                     &nvec));
413
572k
        while (!IGRAPH_EIT_END(it)) {
414
572k
            igraph_int_t edge = IGRAPH_EIT_GET(it);
415
572k
            igraph_int_t from, to;
416
572k
            int ret = 0, ret2 = 0;
417
572k
            const char *str1, *str2;
418
572k
            igraph_edge(graph, edge, &from, &to);
419
572k
            str1 = igraph_strvector_get(&nvec, from);
420
572k
            IGRAPH_CHECK(check_name(str1));
421
572k
            str2 = igraph_strvector_get(&nvec, to);
422
572k
            IGRAPH_CHECK(check_name(str2));
423
572k
            ret = fprintf(outstream, "%s %s ", str1, str2);
424
572k
            if (ret < 0) {
425
0
                IGRAPH_ERROR("Writing NCOL file failed.", IGRAPH_EFILE);
426
0
            }
427
572k
            ret = igraph_real_fprintf_precise(outstream, VECTOR(wvec)[edge]);
428
572k
            ret2 = fputc('\n', outstream);
429
572k
            if (ret < 0 || ret2 == EOF) {
430
0
                IGRAPH_ERROR("Writing NCOL file failed.", IGRAPH_EFILE);
431
0
            }
432
572k
            IGRAPH_EIT_NEXT(it);
433
572k
        }
434
123
        igraph_strvector_destroy(&nvec);
435
123
        igraph_vector_destroy(&wvec);
436
123
        IGRAPH_FINALLY_CLEAN(2);
437
123
    }
438
439
341
    igraph_eit_destroy(&it);
440
341
    IGRAPH_FINALLY_CLEAN(1);
441
341
    return IGRAPH_SUCCESS;
442
610
}