/src/igraph/src/io/parse_utils.c
Line | Count | Source (jump to first uncovered line) |
1 | | |
2 | | #include "parse_utils.h" |
3 | | |
4 | | #include "igraph_foreign.h" |
5 | | #include "igraph_memory.h" |
6 | | |
7 | | #include "config.h" /* HAVE_XLOCALE */ |
8 | | |
9 | | #include <ctype.h> |
10 | | #include <errno.h> |
11 | | #include <stdlib.h> |
12 | | #include <string.h> |
13 | | |
14 | | #if defined(HAVE_XLOCALE) |
15 | | /* On some systems, xlocale.h exists, but uselocale() is still in locale.h. |
16 | | * Thus we include both. */ |
17 | | #include <xlocale.h> |
18 | | #include <locale.h> |
19 | | #else |
20 | | #include <locale.h> |
21 | | #endif |
22 | | |
23 | | /* Trims whitespace from the beginning and the end of a string with a specified length. |
24 | | * A pointer to the first character of the result substring, as well as its length, are returned. |
25 | | * |
26 | | * If you have a null-terminated string, call this function as |
27 | | * |
28 | | * igraph_i_trim_whitespace(str, strlen(str), &res, &len); |
29 | | * |
30 | | * This does not carry a performance penalty, as the end of the string would need to be |
31 | | * determined anyway. |
32 | | */ |
33 | 0 | void igraph_i_trim_whitespace(const char *str, size_t str_len, const char **res, size_t *res_len) { |
34 | 0 | const char *beg = str, *end = str + str_len; |
35 | 0 | while (beg < end && isspace(beg[0]) ) beg++; |
36 | 0 | while (end > beg && isspace(end[-1])) end--; |
37 | 0 | *res = beg; |
38 | 0 | *res_len = end - beg; |
39 | 0 | } |
40 | | |
41 | | |
42 | | /* TODO: Support for reporting line number where parse error occurred. */ |
43 | | |
44 | | /* Converts a string to an integer. Throws an error if the result is not representable. |
45 | | * |
46 | | * The input is a not-necessarily-null-terminated string that must contain only the number. |
47 | | * Any additional characters at the end of the string, such as whitespace, will trigger |
48 | | * a parsing error. |
49 | | * |
50 | | * An error is returned if the input is an empty string. |
51 | | */ |
52 | 0 | igraph_error_t igraph_i_parse_integer(const char *str, size_t length, igraph_integer_t *value) { |
53 | 0 | char buffer[128]; |
54 | 0 | char *tmp, *end; |
55 | 0 | char last_char; |
56 | 0 | igraph_bool_t out_of_range, dynamic_alloc; |
57 | 0 | long long val; |
58 | |
|
59 | 0 | if (length == 0) { |
60 | 0 | IGRAPH_ERROR("Cannot parse integer from empty string.", IGRAPH_PARSEERROR); |
61 | 0 | } |
62 | | |
63 | 0 | dynamic_alloc = length+1 > sizeof(buffer) / sizeof(buffer[0]); |
64 | |
|
65 | 0 | if (dynamic_alloc) { |
66 | 0 | tmp = IGRAPH_CALLOC(length+1, char); |
67 | 0 | IGRAPH_CHECK_OOM(tmp, "Failed to parse integer."); |
68 | 0 | } else { |
69 | 0 | tmp = buffer; |
70 | 0 | } |
71 | | |
72 | 0 | strncpy(tmp, str, length); |
73 | 0 | tmp[length]='\0'; |
74 | | |
75 | | /* To avoid having to choose the appropriate strto?() function based on |
76 | | * the definition of igraph_integer_t, we first use a long long variable |
77 | | * which should be at least as large as igraph_integer_t on any platform. */ |
78 | 0 | errno = 0; |
79 | 0 | val = strtoll(tmp, &end, 10); |
80 | 0 | out_of_range = errno == ERANGE; |
81 | 0 | *value = (igraph_integer_t) val; |
82 | 0 | last_char = *end; |
83 | 0 | if (*value != val) { |
84 | 0 | out_of_range = true; |
85 | 0 | } |
86 | | |
87 | | /* Free memory before raising any errors. */ |
88 | 0 | if (dynamic_alloc) { |
89 | 0 | IGRAPH_FREE(tmp); |
90 | 0 | } |
91 | |
|
92 | 0 | if (out_of_range) { |
93 | 0 | IGRAPH_ERROR("Failed to parse integer.", val > 0 ? IGRAPH_EOVERFLOW : IGRAPH_EUNDERFLOW); |
94 | 0 | } |
95 | | |
96 | | /* Did we parse to the end of the string? */ |
97 | 0 | if (last_char) { |
98 | 0 | IGRAPH_ERRORF("Unexpected character '%c' while parsing integer.", IGRAPH_PARSEERROR, last_char); |
99 | 0 | } |
100 | | |
101 | 0 | return IGRAPH_SUCCESS; |
102 | 0 | } |
103 | | |
104 | | |
105 | | /* Converts a string to a real number. Throws an error if the result is not representable. |
106 | | * |
107 | | * The input is a not-necessarily-null-terminated string that must contain only the number. |
108 | | * Any additional characters at the end of the string, such as whitespace, will trigger |
109 | | * a parsing error. |
110 | | * |
111 | | * NaN and Inf are supported. An error is returned if the input is an empty string. |
112 | | */ |
113 | 3.26M | igraph_error_t igraph_i_parse_real(const char *str, size_t length, igraph_real_t *value) { |
114 | 3.26M | char buffer[128]; |
115 | 3.26M | char *tmp, *end; |
116 | 3.26M | char last_char; |
117 | 3.26M | igraph_bool_t out_of_range, dynamic_alloc; |
118 | | |
119 | 3.26M | if (length == 0) { |
120 | 0 | IGRAPH_ERROR("Cannot parse real number from empty string.", IGRAPH_PARSEERROR); |
121 | 0 | } |
122 | | |
123 | 3.26M | dynamic_alloc = length+1 > sizeof(buffer) / sizeof(buffer[0]); |
124 | | |
125 | 3.26M | if (dynamic_alloc) { |
126 | 292 | tmp = IGRAPH_CALLOC(length+1, char); |
127 | 292 | IGRAPH_CHECK_OOM(tmp, "Failed to parse real number."); |
128 | 3.26M | } else { |
129 | 3.26M | tmp = buffer; |
130 | 3.26M | } |
131 | | |
132 | 3.26M | strncpy(tmp, str, length); |
133 | 3.26M | tmp[length]='\0'; |
134 | | |
135 | 3.26M | errno = 0; |
136 | 3.26M | *value = strtod(tmp, &end); |
137 | 3.26M | out_of_range = errno == ERANGE; /* This does not trigger when reading +-Inf. */ |
138 | 3.26M | last_char = *end; |
139 | | |
140 | | /* Free memory before raising any errors. */ |
141 | 3.26M | if (dynamic_alloc) { |
142 | 292 | IGRAPH_FREE(tmp); |
143 | 292 | } |
144 | | |
145 | 3.26M | if (out_of_range) { |
146 | 44 | IGRAPH_ERROR("Failed to parse real number.", *value > 0 ? IGRAPH_EOVERFLOW : IGRAPH_EUNDERFLOW); |
147 | 44 | } |
148 | | |
149 | | /* Did we parse to the end of the string? */ |
150 | 3.26M | if (last_char) { |
151 | 0 | IGRAPH_ERRORF("Unexpected character '%c' while parsing real number.", IGRAPH_PARSEERROR, last_char); |
152 | 0 | } |
153 | | |
154 | 3.26M | return IGRAPH_SUCCESS; |
155 | 3.26M | } |
156 | | |
157 | | |
158 | | /* Skips all whitespace in a file. */ |
159 | 0 | igraph_error_t igraph_i_fskip_whitespace(FILE *file) { |
160 | 0 | int ch; |
161 | |
|
162 | 0 | do { |
163 | 0 | ch = fgetc(file); |
164 | 0 | } while (isspace(ch)); |
165 | 0 | if (ferror(file)) { |
166 | 0 | IGRAPH_ERROR("Error reading file.", IGRAPH_EFILE); |
167 | 0 | } |
168 | 0 | ungetc(ch, file); |
169 | |
|
170 | 0 | return IGRAPH_SUCCESS; |
171 | 0 | } |
172 | | |
173 | | |
174 | | /* Reads an integer from a file. Throws an error if the result is not representable. |
175 | | * |
176 | | * Any initial whitespace is skipped. If no number is found, an error is raised. |
177 | | * |
178 | | * This function assumes that the number is followed by whitespace or the end of the file. |
179 | | * If this is not the case, an error will be raised. |
180 | | */ |
181 | 0 | igraph_error_t igraph_i_fget_integer(FILE *file, igraph_integer_t *value) { |
182 | | /* The value requiring the most characters on 64-bit is -2^63, i.e. "-9223372036854775808". |
183 | | * This is 20 characters long, plus one for the null terminator, requiring a buffer of |
184 | | * at least 21 characters. We use a slightly larger buffer to allow for leading zeros and |
185 | | * clearer error messages. |
186 | | * |
187 | | * Note: The string held in this buffer is not null-terminated. |
188 | | */ |
189 | 0 | char buf[32]; |
190 | 0 | int ch; |
191 | |
|
192 | 0 | IGRAPH_CHECK(igraph_i_fskip_whitespace(file)); |
193 | | |
194 | 0 | int i = 0; /* must be 'int' due to use in printf format specifier */ |
195 | 0 | while (1) { |
196 | 0 | ch = fgetc(file); |
197 | 0 | if (ch == EOF) break; |
198 | 0 | if (isspace(ch)) { |
199 | 0 | ungetc(ch, file); |
200 | 0 | break; |
201 | 0 | } |
202 | 0 | if (i == sizeof(buf)) { |
203 | | /* Reached the end of the buffer. */ |
204 | 0 | IGRAPH_ERRORF("'%.*s' is not a valid integer value.", IGRAPH_PARSEERROR, i, buf); |
205 | 0 | } |
206 | 0 | buf[i++] = ch; |
207 | 0 | } |
208 | 0 | if (ferror(file)) { |
209 | 0 | IGRAPH_ERROR("Error while reading integer.", IGRAPH_EFILE); |
210 | 0 | } |
211 | | |
212 | 0 | if (i == 0) { |
213 | 0 | IGRAPH_ERROR("Integer expected, reached end of file instead.", IGRAPH_PARSEERROR); |
214 | 0 | } |
215 | | |
216 | 0 | IGRAPH_CHECK(igraph_i_parse_integer(buf, i, value)); |
217 | | |
218 | 0 | return IGRAPH_SUCCESS; |
219 | 0 | } |
220 | | |
221 | | |
222 | | /* Reads a real number from a file. Throws an error if the result is not representable. |
223 | | * |
224 | | * Any initial whitespace is skipped. If no number is found, an error is raised. |
225 | | * |
226 | | * This function assumes that the number is followed by whitespace or the end of the file. |
227 | | * If this is not the case, an error will be raised. |
228 | | */ |
229 | 0 | igraph_error_t igraph_i_fget_real(FILE *file, igraph_real_t *value) { |
230 | | /* The value requiring the most characters with an IEEE-754 double is the smallest |
231 | | * representable number, with signs added, "-2.2250738585072014e-308" |
232 | | * |
233 | | * This is 24 characters long, plus one for the null terminator, requiring a buffer of |
234 | | * at least 25 characters. This is 17 mantissa digits for lossless representation, |
235 | | * 3 exponent digits, "e", and up to two minus signs. We use a larger buffer as some |
236 | | * files may have more digits specified than necessary for exact representation. |
237 | | * |
238 | | * Note: The string held in this buffer is not null-terminated. |
239 | | */ |
240 | 0 | char buf[64]; |
241 | 0 | int ch; |
242 | |
|
243 | 0 | IGRAPH_CHECK(igraph_i_fskip_whitespace(file)); |
244 | | |
245 | 0 | int i = 0; /* must be 'int' due to use in printf format specifier */ |
246 | 0 | while (1) { |
247 | 0 | ch = fgetc(file); |
248 | 0 | if (ch == EOF) break; |
249 | 0 | if (isspace(ch)) { |
250 | 0 | ungetc(ch, file); |
251 | 0 | break; |
252 | 0 | } |
253 | 0 | if (i == sizeof(buf)) { |
254 | | /* Reached the end of the buffer. */ |
255 | 0 | IGRAPH_ERRORF("'%.*s' is not a valid real value.", IGRAPH_PARSEERROR, i, buf); |
256 | 0 | } |
257 | 0 | buf[i++] = ch; |
258 | 0 | } |
259 | 0 | if (ferror(file)) { |
260 | 0 | IGRAPH_ERROR("Error while reading real number.", IGRAPH_EFILE); |
261 | 0 | } |
262 | | |
263 | 0 | if (i == 0) { |
264 | 0 | IGRAPH_ERROR("Real number expected, reached end of file instead.", IGRAPH_PARSEERROR); |
265 | 0 | } |
266 | | |
267 | 0 | IGRAPH_CHECK(igraph_i_parse_real(buf, i, value)); |
268 | | |
269 | 0 | return IGRAPH_SUCCESS; |
270 | 0 | } |
271 | | |
272 | | |
273 | | /* igraph_i_safelocale() and igraph_i_unsafelocale() will set the numeric locale to "C" |
274 | | * and re-set it to its original value. This is to ensure that parsing and writing |
275 | | * numbers uses a decimal point instead of a comma. |
276 | | * |
277 | | * These functions attempt to set the locale only for the current thread on a best-effort |
278 | | * basis. On some platforms this is not possible, so the global locale will be changed. |
279 | | * This is not safe to do in multi-threaded programs (not even if igraph runs only in |
280 | | * a single thread). |
281 | | */ |
282 | | |
283 | | struct igraph_safelocale_s { |
284 | | #ifdef HAVE_USELOCALE |
285 | | locale_t original_locale; |
286 | | locale_t c_locale; |
287 | | #else |
288 | | char *original_locale; |
289 | | # ifdef HAVE__CONFIGTHREADLOCALE |
290 | | int per_thread_locale; |
291 | | # endif |
292 | | #endif |
293 | | }; |
294 | | |
295 | | /** |
296 | | * \function igraph_enter_safelocale |
297 | | * \brief Temporarily set the C locale. |
298 | | * |
299 | | * igraph's foreign format readers and writers require a locale that uses a |
300 | | * decimal point instead of a decimal comma. This is a convenience function |
301 | | * that temporarily sets the C locale so that readers and writers would work |
302 | | * correctly. It \em must be paired with a call to \ref igraph_exit_safelocale(), |
303 | | * otherwise a memory leak will occur. |
304 | | * |
305 | | * </para><para> |
306 | | * This function tries to set the locale for the current thread only on a |
307 | | * best-effort basis. Restricting the locale change to a single thread is not |
308 | | * supported on all platforms. In these cases, this function falls back to using |
309 | | * the standard <code>setlocale()</code> function, which affects the entire process |
310 | | * and is not safe to use from concurrent threads. |
311 | | * |
312 | | * </para><para> |
313 | | * It is generally recommended to run igraph within a thread that has been |
314 | | * permanently set to the C locale using system-specific means. This is a convenience |
315 | | * function for situations when this is not easily possible because the programmer |
316 | | * is not in control of the process, such as when developing plugins/extensions. |
317 | | * Note that processes start up in the C locale by default, thus nothing needs to |
318 | | * be done unless the locale has been changed away from the default. |
319 | | * |
320 | | * \param loc Pointer to a variable of type \c igraph_safelocale_t. The current |
321 | | * locale will be stored here, so that it can be restored using |
322 | | * \ref igraph_exit_safelocale(). |
323 | | * \return Error code. |
324 | | * |
325 | | * \example examples/simple/safelocale.c |
326 | | */ |
327 | | |
328 | 0 | igraph_error_t igraph_enter_safelocale(igraph_safelocale_t *loc) { |
329 | 0 | *loc = IGRAPH_CALLOC(1, struct igraph_safelocale_s); |
330 | 0 | IGRAPH_CHECK_OOM(loc, "Could not set C locale."); |
331 | 0 | igraph_safelocale_t l = *loc; |
332 | 0 | #ifdef HAVE_USELOCALE |
333 | 0 | l->c_locale = newlocale(LC_NUMERIC_MASK, "C", NULL); |
334 | 0 | if (! l->c_locale) { |
335 | 0 | IGRAPH_ERROR("Could not set C locale.", IGRAPH_FAILURE); |
336 | 0 | } |
337 | 0 | l->original_locale = uselocale(l->c_locale); |
338 | | #else |
339 | | l->original_locale = strdup(setlocale(LC_NUMERIC, NULL)); |
340 | | IGRAPH_CHECK_OOM(l->original_locale, "Not enough memory."); |
341 | | # ifdef HAVE__CONFIGTHREADLOCALE |
342 | | /* On Windows, we can enable per-thread locale */ |
343 | | l->per_thread_locale = _configthreadlocale(0); |
344 | | _configthreadlocale(_ENABLE_PER_THREAD_LOCALE); |
345 | | # endif |
346 | | setlocale(LC_NUMERIC, "C"); |
347 | | #endif |
348 | 0 | return IGRAPH_SUCCESS; |
349 | 0 | } |
350 | | |
351 | | /** |
352 | | * \function igraph_exit_safelocale |
353 | | * \brief Temporarily set the C locale. |
354 | | * |
355 | | * Restores a locale saved by \ref igraph_enter_safelocale() and deallocates |
356 | | * all associated data. This function \em must be paired with a call to |
357 | | * \ref igraph_enter_safelocale(). |
358 | | * |
359 | | * \param loc A variable of type \c igraph_safelocale_t, originally set |
360 | | * by \ref igraph_enter_safelocale(). |
361 | | */ |
362 | | |
363 | 0 | void igraph_exit_safelocale(igraph_safelocale_t *loc) { |
364 | 0 | igraph_safelocale_t l = *loc; |
365 | 0 | #ifdef HAVE_USELOCALE |
366 | 0 | uselocale(l->original_locale); |
367 | 0 | freelocale(l->c_locale); |
368 | | #else |
369 | | setlocale(LC_NUMERIC, l->original_locale); |
370 | | IGRAPH_FREE(l->original_locale); |
371 | | # ifdef HAVE__CONFIGTHREADLOCALE |
372 | | /* Restore per-thread locale setting on Windows */ |
373 | | _configthreadlocale(l->per_thread_locale); |
374 | | # endif |
375 | | #endif |
376 | 0 | IGRAPH_FREE(*loc); |
377 | 0 | } |