Coverage Report

Created: 2025-08-25 07:04

/src/igraph/src/io/parse_utils.c
Line
Count
Source (jump to first uncovered line)
1
2
#include "parse_utils.h"
3
4
#include "igraph_foreign.h"
5
#include "igraph_memory.h"
6
7
#include "config.h" /* HAVE_XLOCALE */
8
9
#include <ctype.h>
10
#include <errno.h>
11
#include <stdlib.h>
12
#include <string.h>
13
14
#if defined(HAVE_XLOCALE)
15
/* On some systems, xlocale.h exists, but uselocale() is still in locale.h.
16
 * Thus we include both. */
17
#include <xlocale.h>
18
#include <locale.h>
19
#else
20
#include <locale.h>
21
#endif
22
23
/* Trims whitespace from the beginning and the end of a string with a specified length.
24
 * A pointer to the first character of the result substring, as well as its length, are returned.
25
 *
26
 * If you have a null-terminated string, call this function as
27
 *
28
 *     igraph_i_trim_whitespace(str, strlen(str), &res, &len);
29
 *
30
 * This does not carry a performance penalty, as the end of the string would need to be
31
 * determined anyway.
32
 */
33
0
void igraph_i_trim_whitespace(const char *str, size_t str_len, const char **res, size_t *res_len) {
34
0
    const char *beg = str, *end = str + str_len;
35
0
    while (beg < end && isspace(beg[0]) ) beg++;
36
0
    while (end > beg && isspace(end[-1])) end--;
37
0
    *res = beg;
38
0
    *res_len = end - beg;
39
0
}
40
41
42
/* TODO: Support for reporting line number where parse error occurred. */
43
44
/* Converts a string to an integer. Throws an error if the result is not representable.
45
 *
46
 * The input is a not-necessarily-null-terminated string that must contain only the number.
47
 * Any additional characters at the end of the string, such as whitespace, will trigger
48
 * a parsing error.
49
 *
50
 * An error is returned if the input is an empty string.
51
 */
52
0
igraph_error_t igraph_i_parse_integer(const char *str, size_t length, igraph_integer_t *value) {
53
0
    char buffer[128];
54
0
    char *tmp, *end;
55
0
    char last_char;
56
0
    igraph_bool_t out_of_range, dynamic_alloc;
57
0
    long long val;
58
59
0
    if (length == 0) {
60
0
        IGRAPH_ERROR("Cannot parse integer from empty string.", IGRAPH_PARSEERROR);
61
0
    }
62
63
0
    dynamic_alloc = length+1 > sizeof(buffer) / sizeof(buffer[0]);
64
65
0
    if (dynamic_alloc) {
66
0
        tmp = IGRAPH_CALLOC(length+1, char);
67
0
        IGRAPH_CHECK_OOM(tmp, "Failed to parse integer.");
68
0
    } else {
69
0
        tmp = buffer;
70
0
    }
71
72
0
    strncpy(tmp, str, length);
73
0
    tmp[length]='\0';
74
75
    /* To avoid having to choose the appropriate strto?() function based on
76
     * the definition of igraph_integer_t, we first use a long long variable
77
     * which should be at least as large as igraph_integer_t on any platform. */
78
0
    errno = 0;
79
0
    val = strtoll(tmp, &end, 10);
80
0
    out_of_range = errno == ERANGE;
81
0
    *value = (igraph_integer_t) val;
82
0
    last_char = *end;
83
0
    if (*value != val) {
84
0
        out_of_range = true;
85
0
    }
86
87
    /* Free memory before raising any errors. */
88
0
    if (dynamic_alloc) {
89
0
        IGRAPH_FREE(tmp);
90
0
    }
91
92
0
    if (out_of_range) {
93
0
        IGRAPH_ERROR("Failed to parse integer.", val > 0 ? IGRAPH_EOVERFLOW : IGRAPH_EUNDERFLOW);
94
0
    }
95
96
    /* Did we parse to the end of the string? */
97
0
    if (last_char) {
98
0
        IGRAPH_ERRORF("Unexpected character '%c' while parsing integer.", IGRAPH_PARSEERROR, last_char);
99
0
    }
100
101
0
    return IGRAPH_SUCCESS;
102
0
}
103
104
105
/* Converts a string to a real number. Throws an error if the result is not representable.
106
 *
107
 * The input is a not-necessarily-null-terminated string that must contain only the number.
108
 * Any additional characters at the end of the string, such as whitespace, will trigger
109
 * a parsing error.
110
 *
111
 * NaN and Inf are supported. An error is returned if the input is an empty string.
112
 */
113
3.26M
igraph_error_t igraph_i_parse_real(const char *str, size_t length, igraph_real_t *value) {
114
3.26M
    char buffer[128];
115
3.26M
    char *tmp, *end;
116
3.26M
    char last_char;
117
3.26M
    igraph_bool_t out_of_range, dynamic_alloc;
118
119
3.26M
    if (length == 0) {
120
0
        IGRAPH_ERROR("Cannot parse real number from empty string.", IGRAPH_PARSEERROR);
121
0
    }
122
123
3.26M
    dynamic_alloc = length+1 > sizeof(buffer) / sizeof(buffer[0]);
124
125
3.26M
    if (dynamic_alloc) {
126
292
        tmp = IGRAPH_CALLOC(length+1, char);
127
292
        IGRAPH_CHECK_OOM(tmp, "Failed to parse real number.");
128
3.26M
    } else {
129
3.26M
        tmp = buffer;
130
3.26M
    }
131
132
3.26M
    strncpy(tmp, str, length);
133
3.26M
    tmp[length]='\0';
134
135
3.26M
    errno = 0;
136
3.26M
    *value = strtod(tmp, &end);
137
3.26M
    out_of_range = errno == ERANGE; /* This does not trigger when reading +-Inf. */
138
3.26M
    last_char = *end;
139
140
    /* Free memory before raising any errors. */
141
3.26M
    if (dynamic_alloc) {
142
292
        IGRAPH_FREE(tmp);
143
292
    }
144
145
3.26M
    if (out_of_range) {
146
44
        IGRAPH_ERROR("Failed to parse real number.", *value > 0 ? IGRAPH_EOVERFLOW : IGRAPH_EUNDERFLOW);
147
44
    }
148
149
    /* Did we parse to the end of the string? */
150
3.26M
    if (last_char) {
151
0
        IGRAPH_ERRORF("Unexpected character '%c' while parsing real number.", IGRAPH_PARSEERROR, last_char);
152
0
    }
153
154
3.26M
    return IGRAPH_SUCCESS;
155
3.26M
}
156
157
158
/* Skips all whitespace in a file. */
159
0
igraph_error_t igraph_i_fskip_whitespace(FILE *file) {
160
0
    int ch;
161
162
0
    do {
163
0
        ch = fgetc(file);
164
0
    } while (isspace(ch));
165
0
    if (ferror(file)) {
166
0
        IGRAPH_ERROR("Error reading file.", IGRAPH_EFILE);
167
0
    }
168
0
    ungetc(ch, file);
169
170
0
    return IGRAPH_SUCCESS;
171
0
}
172
173
174
/* Reads an integer from a file. Throws an error if the result is not representable.
175
 *
176
 * Any initial whitespace is skipped. If no number is found, an error is raised.
177
 *
178
 * This function assumes that the number is followed by whitespace or the end of the file.
179
 * If this is not the case, an error will be raised.
180
 */
181
0
igraph_error_t igraph_i_fget_integer(FILE *file, igraph_integer_t *value) {
182
    /* The value requiring the most characters on 64-bit is -2^63, i.e. "-9223372036854775808".
183
     * This is 20 characters long, plus one for the null terminator, requiring a buffer of
184
     * at least 21 characters. We use a slightly larger buffer to allow for leading zeros and
185
     * clearer error messages.
186
     *
187
     * Note: The string held in this buffer is not null-terminated.
188
     */
189
0
    char buf[32];
190
0
    int ch;
191
192
0
    IGRAPH_CHECK(igraph_i_fskip_whitespace(file));
193
194
0
    int i = 0; /* must be 'int' due to use in printf format specifier */
195
0
    while (1) {
196
0
        ch = fgetc(file);
197
0
        if (ch == EOF) break;
198
0
        if (isspace(ch)) {
199
0
            ungetc(ch, file);
200
0
            break;
201
0
        }
202
0
        if (i == sizeof(buf)) {
203
            /* Reached the end of the buffer. */
204
0
            IGRAPH_ERRORF("'%.*s' is not a valid integer value.", IGRAPH_PARSEERROR, i, buf);
205
0
        }
206
0
        buf[i++] = ch;
207
0
    }
208
0
    if (ferror(file)) {
209
0
        IGRAPH_ERROR("Error while reading integer.", IGRAPH_EFILE);
210
0
    }
211
212
0
    if (i == 0) {
213
0
        IGRAPH_ERROR("Integer expected, reached end of file instead.", IGRAPH_PARSEERROR);
214
0
    }
215
216
0
    IGRAPH_CHECK(igraph_i_parse_integer(buf, i, value));
217
218
0
    return IGRAPH_SUCCESS;
219
0
}
220
221
222
/* Reads a real number from a file. Throws an error if the result is not representable.
223
 *
224
 * Any initial whitespace is skipped. If no number is found, an error is raised.
225
 *
226
 * This function assumes that the number is followed by whitespace or the end of the file.
227
 * If this is not the case, an error will be raised.
228
 */
229
0
igraph_error_t igraph_i_fget_real(FILE *file, igraph_real_t *value) {
230
    /* The value requiring the most characters with an IEEE-754 double is the smallest
231
     * representable number, with signs added, "-2.2250738585072014e-308"
232
     *
233
     * This is 24 characters long, plus one for the null terminator, requiring a buffer of
234
     * at least 25 characters. This is 17 mantissa digits for lossless representation,
235
     * 3 exponent digits, "e", and up to two minus signs. We use a larger buffer as some
236
     * files may have more digits specified than necessary for exact representation.
237
     *
238
     * Note: The string held in this buffer is not null-terminated.
239
     */
240
0
    char buf[64];
241
0
    int ch;
242
243
0
    IGRAPH_CHECK(igraph_i_fskip_whitespace(file));
244
245
0
    int i = 0; /* must be 'int' due to use in printf format specifier */
246
0
    while (1) {
247
0
        ch = fgetc(file);
248
0
        if (ch == EOF) break;
249
0
        if (isspace(ch)) {
250
0
            ungetc(ch, file);
251
0
            break;
252
0
        }
253
0
        if (i == sizeof(buf)) {
254
            /* Reached the end of the buffer. */
255
0
            IGRAPH_ERRORF("'%.*s' is not a valid real value.", IGRAPH_PARSEERROR, i, buf);
256
0
        }
257
0
        buf[i++] = ch;
258
0
    }
259
0
    if (ferror(file)) {
260
0
        IGRAPH_ERROR("Error while reading real number.", IGRAPH_EFILE);
261
0
    }
262
263
0
    if (i == 0) {
264
0
        IGRAPH_ERROR("Real number expected, reached end of file instead.", IGRAPH_PARSEERROR);
265
0
    }
266
267
0
    IGRAPH_CHECK(igraph_i_parse_real(buf, i, value));
268
269
0
    return IGRAPH_SUCCESS;
270
0
}
271
272
273
/* igraph_i_safelocale() and igraph_i_unsafelocale() will set the numeric locale to "C"
274
 * and re-set it to its original value. This is to ensure that parsing and writing
275
 * numbers uses a decimal point instead of a comma.
276
 *
277
 * These functions attempt to set the locale only for the current thread on a best-effort
278
 * basis. On some platforms this is not possible, so the global locale will be changed.
279
 * This is not safe to do in multi-threaded programs (not even if igraph runs only in
280
 * a single thread).
281
 */
282
283
struct igraph_safelocale_s {
284
#ifdef HAVE_USELOCALE
285
    locale_t original_locale;
286
    locale_t c_locale;
287
#else
288
    char    *original_locale;
289
# ifdef HAVE__CONFIGTHREADLOCALE
290
    int      per_thread_locale;
291
# endif
292
#endif
293
};
294
295
/**
296
 * \function igraph_enter_safelocale
297
 * \brief Temporarily set the C locale.
298
 *
299
 * igraph's foreign format readers and writers require a locale that uses a
300
 * decimal point instead of a decimal comma. This is a convenience function
301
 * that temporarily sets the C locale so that readers and writers would work
302
 * correctly. It \em must be paired with a call to \ref igraph_exit_safelocale(),
303
 * otherwise a memory leak will occur.
304
 *
305
 * </para><para>
306
 * This function tries to set the locale for the current thread only on a
307
 * best-effort basis. Restricting the locale change to a single thread is not
308
 * supported on all platforms. In these cases, this function falls back to using
309
 * the standard <code>setlocale()</code> function, which affects the entire process
310
 * and is not safe to use from concurrent threads.
311
 *
312
 * </para><para>
313
 * It is generally recommended to run igraph within a thread that has been
314
 * permanently set to the C locale using system-specific means. This is a convenience
315
 * function for situations when this is not easily possible because the programmer
316
 * is not in control of the process, such as when developing plugins/extensions.
317
 * Note that processes start up in the C locale by default, thus nothing needs to
318
 * be done unless the locale has been changed away from the default.
319
 *
320
 * \param loc Pointer to a variable of type \c igraph_safelocale_t. The current
321
 *     locale will be stored here, so that it can be restored using
322
 *     \ref igraph_exit_safelocale().
323
 * \return Error code.
324
 *
325
 * \example examples/simple/safelocale.c
326
 */
327
328
0
igraph_error_t igraph_enter_safelocale(igraph_safelocale_t *loc) {
329
0
    *loc = IGRAPH_CALLOC(1, struct igraph_safelocale_s);
330
0
    IGRAPH_CHECK_OOM(loc, "Could not set C locale.");
331
0
    igraph_safelocale_t l = *loc;
332
0
#ifdef HAVE_USELOCALE
333
0
    l->c_locale = newlocale(LC_NUMERIC_MASK, "C", NULL);
334
0
    if (! l->c_locale) {
335
0
        IGRAPH_ERROR("Could not set C locale.", IGRAPH_FAILURE);
336
0
    }
337
0
    l->original_locale = uselocale(l->c_locale);
338
#else
339
    l->original_locale = strdup(setlocale(LC_NUMERIC, NULL));
340
    IGRAPH_CHECK_OOM(l->original_locale, "Not enough memory.");
341
# ifdef HAVE__CONFIGTHREADLOCALE
342
    /* On Windows, we can enable per-thread locale */
343
    l->per_thread_locale = _configthreadlocale(0);
344
    _configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
345
# endif
346
    setlocale(LC_NUMERIC, "C");
347
#endif
348
0
    return IGRAPH_SUCCESS;
349
0
}
350
351
/**
352
 * \function igraph_exit_safelocale
353
 * \brief Temporarily set the C locale.
354
 *
355
 * Restores a locale saved by \ref igraph_enter_safelocale() and deallocates
356
 * all associated data. This function \em must be paired with a call to
357
 * \ref igraph_enter_safelocale().
358
 *
359
 * \param loc A variable of type \c igraph_safelocale_t, originally set
360
 *     by \ref igraph_enter_safelocale().
361
 */
362
363
0
void igraph_exit_safelocale(igraph_safelocale_t *loc) {
364
0
    igraph_safelocale_t l = *loc;
365
0
#ifdef HAVE_USELOCALE
366
0
    uselocale(l->original_locale);
367
0
    freelocale(l->c_locale);
368
#else
369
    setlocale(LC_NUMERIC, l->original_locale);
370
    IGRAPH_FREE(l->original_locale);
371
# ifdef HAVE__CONFIGTHREADLOCALE
372
    /* Restore per-thread locale setting on Windows */
373
    _configthreadlocale(l->per_thread_locale);
374
# endif
375
#endif
376
0
    IGRAPH_FREE(*loc);
377
0
}