/src/igraph/src/io/graphdb.c
Line | Count | Source |
1 | | /* |
2 | | igraph library. |
3 | | Copyright (C) 2005-2020 The igraph development team |
4 | | |
5 | | This program is free software; you can redistribute it and/or modify |
6 | | it under the terms of the GNU General Public License as published by |
7 | | the Free Software Foundation; either version 2 of the License, or |
8 | | (at your option) any later version. |
9 | | |
10 | | This program is distributed in the hope that it will be useful, |
11 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | GNU General Public License for more details. |
14 | | |
15 | | You should have received a copy of the GNU General Public License |
16 | | along with this program; if not, write to the Free Software |
17 | | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
18 | | 02110-1301 USA |
19 | | |
20 | | */ |
21 | | |
22 | | #include "igraph_foreign.h" |
23 | | |
24 | | #include "igraph_constructors.h" |
25 | | #include "core/interruption.h" |
26 | | |
27 | | /* Read a little-endian encoded 16-bit unsigned word. |
28 | | * Returns negative value on failure. */ |
29 | 3.14M | static int igraph_i_read_graph_graphdb_getword(FILE *instream) { |
30 | 3.14M | int b1, b2; |
31 | 3.14M | unsigned char c1, c2; |
32 | 3.14M | b1 = fgetc(instream); |
33 | 3.14M | b2 = fgetc(instream); |
34 | 3.14M | if (b1 != EOF && b2 != EOF) { |
35 | 3.14M | c1 = (unsigned char) b1; c2 = (unsigned char) b2; |
36 | 3.14M | return c1 | (c2 << 8); |
37 | 3.14M | } else { |
38 | 115 | return -1; |
39 | 115 | } |
40 | 3.14M | } |
41 | | |
42 | | /* Determine whether the read failed due to an input error or end-of-file condition. |
43 | | * Must only be called after a read failure, always returns a non-success error code. */ |
44 | 115 | static igraph_error_t handle_input_error(FILE *instream) { |
45 | 115 | if (feof(instream)) { |
46 | 115 | IGRAPH_ERROR("Unexpected end of file, truncated graphdb file.", IGRAPH_PARSEERROR); |
47 | 115 | } else { |
48 | 0 | IGRAPH_ERROR("Cannot read from file.", IGRAPH_EFILE); |
49 | 0 | } |
50 | 115 | } |
51 | | |
52 | | /** |
53 | | * \function igraph_read_graph_graphdb |
54 | | * \brief Read a graph in the binary graph database format. |
55 | | * |
56 | | * This is a binary format, used in the ARG Graph Database |
57 | | * for isomorphism testing. For more information, see |
58 | | * https://mivia.unisa.it/datasets/graph-database/arg-database/ |
59 | | * |
60 | | * </para><para> |
61 | | * From the graph database homepage: |
62 | | * </para> |
63 | | * |
64 | | * \blockquote <para> |
65 | | * The graphs are stored in a compact binary format, one graph per |
66 | | * file. The file is composed of 16 bit words, which are represented |
67 | | * using the so-called little-endian convention, i.e. the least |
68 | | * significant byte of the word is stored first.</para> |
69 | | * |
70 | | * <para> |
71 | | * Then, for each node, the file contains the list of edges coming |
72 | | * out of the node itself. The list is represented by a word encoding |
73 | | * its length, followed by a word for each edge, representing the |
74 | | * destination node of the edge. Node numeration is 0-based, so the |
75 | | * first node of the graph has index 0.</para> \endblockquote |
76 | | * |
77 | | * <para> |
78 | | * As of igraph 0.10, only unlabelled graphs are implemented. |
79 | | * </para> |
80 | | * |
81 | | * <para> |
82 | | * References: |
83 | | * </para> |
84 | | * |
85 | | * <para> |
86 | | * M. De Santo, P. Foggia, C. Sansone, and M. Vento: |
87 | | * A large database of graphs and its use for benchmarking graph isomorphism algorithms. |
88 | | * Pattern Recognition Letters, 24(8), 1067-1079 (2003). |
89 | | * https://doi.org/10.1016/S0167-8655(02)00253-2 |
90 | | * </para> |
91 | | * |
92 | | * <para> |
93 | | * MIVIA ARG Dataset, |
94 | | * https://zenodo.org/records/11204020, |
95 | | * https://mivia.unisa.it/datasets/graph-database/arg-database/ |
96 | | * |
97 | | * \param graph Pointer to an uninitialized graph object. |
98 | | * \param instream The stream to read from. It should be opened |
99 | | * in binary mode. |
100 | | * \param directed Whether to create a directed graph. |
101 | | * \return Error code. |
102 | | * |
103 | | * Time complexity: O(|V|+|E|), the number of vertices plus the |
104 | | * number of edges. |
105 | | * |
106 | | * \example examples/simple/igraph_read_graph_graphdb.c |
107 | | */ |
108 | | |
109 | | igraph_error_t igraph_read_graph_graphdb(igraph_t *graph, FILE *instream, |
110 | 455 | igraph_bool_t directed) { |
111 | | |
112 | 455 | const igraph_int_t nodes = igraph_i_read_graph_graphdb_getword(instream); |
113 | 455 | if (nodes < 0) { |
114 | 9 | IGRAPH_CHECK(handle_input_error(instream)); |
115 | 9 | } |
116 | | |
117 | 446 | igraph_vector_int_t edges; |
118 | 446 | IGRAPH_VECTOR_INT_INIT_FINALLY(&edges, 100); |
119 | 446 | igraph_vector_int_clear(&edges); |
120 | | |
121 | 1.09M | for (igraph_int_t i = 0; i < nodes; i++) { |
122 | 1.09M | igraph_int_t len = igraph_i_read_graph_graphdb_getword(instream); |
123 | 1.09M | if (len < 0) { |
124 | 44 | IGRAPH_CHECK(handle_input_error(instream)); |
125 | 44 | } |
126 | 3.14M | for (igraph_int_t j = 0; j < len; j++) { |
127 | 2.04M | igraph_int_t to = igraph_i_read_graph_graphdb_getword(instream); |
128 | 2.04M | if (to < 0) { |
129 | 62 | IGRAPH_CHECK(handle_input_error(instream)); |
130 | 62 | } |
131 | 2.04M | if (to >= nodes) { |
132 | 27 | IGRAPH_ERRORF("Invalid zero-based vertex ID %" IGRAPH_PRId " in graphdb file. " |
133 | 27 | "Number of vertices is %" IGRAPH_PRId ".", |
134 | 27 | IGRAPH_PARSEERROR, to, nodes); |
135 | 27 | } |
136 | 2.04M | IGRAPH_CHECK(igraph_vector_int_push_back(&edges, i)); |
137 | 2.04M | IGRAPH_CHECK(igraph_vector_int_push_back(&edges, to)); |
138 | 2.04M | IGRAPH_ALLOW_INTERRUPTION(); |
139 | 2.04M | } |
140 | 1.09M | } |
141 | 313 | if (fgetc(instream) != EOF) { |
142 | 9 | IGRAPH_ERROR("Extra bytes at end of graphdb file.", IGRAPH_PARSEERROR); |
143 | 9 | } |
144 | | |
145 | 304 | IGRAPH_CHECK(igraph_create(graph, &edges, nodes, directed)); |
146 | | |
147 | 304 | igraph_vector_int_destroy(&edges); |
148 | 304 | IGRAPH_FINALLY_CLEAN(1); |
149 | | |
150 | 304 | return IGRAPH_SUCCESS; |
151 | 304 | } |