Coverage Report

Created: 2023-03-26 06:38

/src/readstat/src/spss/readstat_sav_read.c
Line
Count
Source (jump to first uncovered line)
1
2
#include <stdio.h>
3
#include <stdlib.h>
4
#include <string.h>
5
#include <sys/types.h>
6
#include <stdint.h>
7
#include <math.h>
8
#include <float.h>
9
#include <time.h>
10
#include <limits.h>
11
12
#include "../readstat.h"
13
#include "../readstat_bits.h"
14
#include "../readstat_iconv.h"
15
#include "../readstat_convert.h"
16
#include "../readstat_malloc.h"
17
18
#include "readstat_sav.h"
19
#include "readstat_sav_compress.h"
20
#include "readstat_sav_parse.h"
21
#include "readstat_sav_parse_timestamp.h"
22
23
#if HAVE_ZLIB
24
#include "readstat_zsav_read.h"
25
#endif
26
27
#define DATA_BUFFER_SIZE    65536
28
166k
#define VERY_LONG_STRING_MAX_LENGTH INT_MAX
29
30
/* Others defined in table below */
31
32
/* See http://msdn.microsoft.com/en-us/library/dd317756(VS.85).aspx */
33
static readstat_charset_entry_t _charset_table[] = { 
34
    { .code = 1,     .name = "EBCDIC-US" },
35
    { .code = 2,     .name = "WINDOWS-1252" }, /* supposed to be ASCII, but some files are miscoded */
36
    { .code = 3,     .name = "WINDOWS-1252" },
37
    { .code = 4,     .name = "DEC-KANJI" },
38
    { .code = 437,   .name = "CP437" },
39
    { .code = 708,   .name = "ASMO-708" },
40
    { .code = 737,   .name = "CP737" },
41
    { .code = 775,   .name = "CP775" },
42
    { .code = 850,   .name = "CP850" },
43
    { .code = 852,   .name = "CP852" },
44
    { .code = 855,   .name = "CP855" },
45
    { .code = 857,   .name = "CP857" },
46
    { .code = 858,   .name = "CP858" },
47
    { .code = 860,   .name = "CP860" },
48
    { .code = 861,   .name = "CP861" },
49
    { .code = 862,   .name = "CP862" },
50
    { .code = 863,   .name = "CP863" },
51
    { .code = 864,   .name = "CP864" },
52
    { .code = 865,   .name = "CP865" },
53
    { .code = 866,   .name = "CP866" },
54
    { .code = 869,   .name = "CP869" },
55
    { .code = 874,   .name = "CP874" },
56
    { .code = 932,   .name = "CP932" },
57
    { .code = 936,   .name = "CP936" },
58
    { .code = 949,   .name = "CP949" },
59
    { .code = 950,   .name = "BIG-5" },
60
    { .code = 1200,  .name = "UTF-16LE" },
61
    { .code = 1201,  .name = "UTF-16BE" },
62
    { .code = 1250,  .name = "WINDOWS-1250" },
63
    { .code = 1251,  .name = "WINDOWS-1251" },
64
    { .code = 1252,  .name = "WINDOWS-1252" },
65
    { .code = 1253,  .name = "WINDOWS-1253" },
66
    { .code = 1254,  .name = "WINDOWS-1254" },
67
    { .code = 1255,  .name = "WINDOWS-1255" },
68
    { .code = 1256,  .name = "WINDOWS-1256" },
69
    { .code = 1257,  .name = "WINDOWS-1257" },
70
    { .code = 1258,  .name = "WINDOWS-1258" },
71
    { .code = 1361,  .name = "CP1361" },
72
    { .code = 10000, .name = "MACROMAN" },
73
    { .code = 10004, .name = "MACARABIC" },
74
    { .code = 10005, .name = "MACHEBREW" },
75
    { .code = 10006, .name = "MACGREEK" },
76
    { .code = 10007, .name = "MACCYRILLIC" },
77
    { .code = 10010, .name = "MACROMANIA" },
78
    { .code = 10017, .name = "MACUKRAINE" },
79
    { .code = 10021, .name = "MACTHAI" },
80
    { .code = 10029, .name = "MACCENTRALEUROPE" },
81
    { .code = 10079, .name = "MACICELAND" },
82
    { .code = 10081, .name = "MACTURKISH" },
83
    { .code = 10082, .name = "MACCROATIAN" },
84
    { .code = 12000, .name = "UTF-32LE" },
85
    { .code = 12001, .name = "UTF-32BE" },
86
    { .code = 20127, .name = "US-ASCII" },
87
    { .code = 20866, .name = "KOI8-R" },
88
    { .code = 20932, .name = "EUC-JP" },
89
    { .code = 21866, .name = "KOI8-U" },
90
    { .code = 28591, .name = "ISO-8859-1" },
91
    { .code = 28592, .name = "ISO-8859-2" },
92
    { .code = 28593, .name = "ISO-8859-3" },
93
    { .code = 28594, .name = "ISO-8859-4" },
94
    { .code = 28595, .name = "ISO-8859-5" },
95
    { .code = 28596, .name = "ISO-8859-6" },
96
    { .code = 28597, .name = "ISO-8859-7" },
97
    { .code = 28598, .name = "ISO-8859-8" },
98
    { .code = 28599, .name = "ISO-8859-9" },
99
    { .code = 28603, .name = "ISO-8859-13" },
100
    { .code = 28605, .name = "ISO-8859-15" },
101
    { .code = 50220, .name = "ISO-2022-JP" },
102
    { .code = 50221, .name = "ISO-2022-JP" }, // same as above?
103
    { .code = 50222, .name = "ISO-2022-JP" }, // same as above?
104
    { .code = 50225, .name = "ISO-2022-KR" },
105
    { .code = 50229, .name = "ISO-2022-CN" },
106
    { .code = 51932, .name = "EUC-JP" },
107
    { .code = 51936, .name = "GBK" },
108
    { .code = 51949, .name = "EUC-KR" },
109
    { .code = 52936, .name = "HZ-GB-2312" },
110
    { .code = 54936, .name = "GB18030" },
111
    { .code = 65000, .name = "UTF-7" },
112
    { .code = 65001, .name = "UTF-8" }
113
};
114
115
185k
#define SAV_LABEL_NAME_PREFIX         "labels"
116
117
typedef struct value_label_s {
118
    char             raw_value[8];
119
    char             utf8_string_value[8*4+1];
120
    readstat_value_t final_value;
121
    char            *label;
122
} value_label_t;
123
124
static readstat_error_t sav_update_progress(sav_ctx_t *ctx);
125
static readstat_error_t sav_read_data(sav_ctx_t *ctx);
126
static readstat_error_t sav_read_compressed_data(sav_ctx_t *ctx,
127
        readstat_error_t (*row_handler)(unsigned char *, size_t, sav_ctx_t *));
128
static readstat_error_t sav_read_uncompressed_data(sav_ctx_t *ctx,
129
        readstat_error_t (*row_handler)(unsigned char *, size_t, sav_ctx_t *));
130
131
static readstat_error_t sav_skip_variable_record(sav_ctx_t *ctx);
132
static readstat_error_t sav_read_variable_record(sav_ctx_t *ctx);
133
134
static readstat_error_t sav_skip_document_record(sav_ctx_t *ctx);
135
static readstat_error_t sav_read_document_record(sav_ctx_t *ctx);
136
137
static readstat_error_t sav_skip_value_label_record(sav_ctx_t *ctx);
138
static readstat_error_t sav_read_value_label_record(sav_ctx_t *ctx);
139
140
static readstat_error_t sav_read_dictionary_termination_record(sav_ctx_t *ctx);
141
142
static readstat_error_t sav_parse_machine_floating_point_record(const void *data, size_t size, size_t count, sav_ctx_t *ctx);
143
static readstat_error_t sav_store_variable_display_parameter_record(const void *data, size_t size, size_t count, sav_ctx_t *ctx);
144
static readstat_error_t sav_parse_variable_display_parameter_record(sav_ctx_t *ctx);
145
static readstat_error_t sav_parse_machine_integer_info_record(const void *data, size_t data_len, sav_ctx_t *ctx);
146
static readstat_error_t sav_parse_long_string_value_labels_record(const void *data, size_t size, size_t count, sav_ctx_t *ctx);
147
static readstat_error_t sav_parse_long_string_missing_values_record(const void *data, size_t size, size_t count, sav_ctx_t *ctx);
148
149
1.35M
static void sav_tag_missing_double(readstat_value_t *value, sav_ctx_t *ctx) {
150
1.35M
    double fp_value = value->v.double_value;
151
1.35M
    uint64_t long_value = 0;
152
1.35M
    memcpy(&long_value, &fp_value, 8);
153
1.35M
    if (long_value == ctx->missing_double)
154
99.7k
        value->is_system_missing = 1;
155
1.35M
    if (long_value == ctx->lowest_double)
156
411
        value->is_system_missing = 1;
157
1.35M
    if (long_value == ctx->highest_double)
158
426
        value->is_system_missing = 1;
159
1.35M
    if (isnan(fp_value))
160
64.3k
        value->is_system_missing = 1;
161
1.35M
}
162
163
6.07k
static readstat_error_t sav_update_progress(sav_ctx_t *ctx) {
164
6.07k
    readstat_io_t *io = ctx->io;
165
6.07k
    return io->update(ctx->file_size, ctx->handle.progress, ctx->user_ctx, io->io_ctx);
166
6.07k
}
167
168
281k
static readstat_error_t sav_skip_variable_record(sav_ctx_t *ctx) {
169
281k
    sav_variable_record_t variable;
170
281k
    readstat_error_t retval = READSTAT_OK;
171
281k
    readstat_io_t *io = ctx->io;
172
281k
    if (io->read(&variable, sizeof(sav_variable_record_t), io->io_ctx) < sizeof(sav_variable_record_t)) {
173
9
        retval = READSTAT_ERROR_READ;
174
9
        goto cleanup;
175
9
    }
176
281k
    if (variable.has_var_label) {
177
4.59k
        uint32_t label_len;
178
4.59k
        if (io->read(&label_len, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) {
179
50
            retval = READSTAT_ERROR_READ;
180
50
            goto cleanup;
181
50
        }
182
4.54k
        label_len = ctx->bswap ? byteswap4(label_len) : label_len;
183
4.54k
        uint32_t label_capacity = (label_len + 3) / 4 * 4;
184
4.54k
        if (io->seek(label_capacity, READSTAT_SEEK_CUR, io->io_ctx) == -1) {
185
6
            retval = READSTAT_ERROR_SEEK;
186
6
            goto cleanup;
187
6
        }
188
4.54k
    }
189
281k
    if (variable.n_missing_values) {
190
6.63k
        int n_missing_values = ctx->bswap ? byteswap4(variable.n_missing_values) : variable.n_missing_values;
191
6.63k
        if (io->seek(abs(n_missing_values) * sizeof(double), READSTAT_SEEK_CUR, io->io_ctx) == -1) {
192
80
            retval = READSTAT_ERROR_SEEK;
193
80
            goto cleanup;
194
80
        }
195
6.63k
    }
196
281k
cleanup:
197
281k
    return retval;
198
281k
}
199
200
1.67k
static readstat_error_t sav_read_variable_label(spss_varinfo_t *info, sav_ctx_t *ctx) {
201
1.67k
    readstat_io_t *io = ctx->io;
202
1.67k
    readstat_error_t retval = READSTAT_OK;
203
1.67k
    uint32_t label_len, label_capacity;
204
1.67k
    size_t out_label_len;
205
1.67k
    char *label_buf = NULL;
206
1.67k
    if (io->read(&label_len, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) {
207
6
        retval = READSTAT_ERROR_READ;
208
6
        goto cleanup;
209
6
    }
210
1.67k
    label_len = ctx->bswap ? byteswap4(label_len) : label_len;
211
212
1.67k
    if (label_len == 0)
213
876
        goto cleanup;
214
215
795
    label_capacity = (label_len + 3) / 4 * 4;
216
795
    if ((label_buf = readstat_malloc(label_capacity)) == NULL) {
217
34
        retval = READSTAT_ERROR_MALLOC;
218
34
        goto cleanup;
219
34
    }
220
221
761
    out_label_len = (size_t)label_len*4+1;
222
761
    if ((info->label = readstat_malloc(out_label_len)) == NULL) {
223
2
        retval = READSTAT_ERROR_MALLOC;
224
2
        goto cleanup;
225
2
    }
226
227
759
    if (io->read(label_buf, label_capacity, io->io_ctx) < label_capacity) {
228
24
        retval = READSTAT_ERROR_READ;
229
24
        goto cleanup;
230
24
    }
231
232
735
    retval = readstat_convert(info->label, out_label_len, label_buf, label_len, ctx->converter);
233
735
    if (retval != READSTAT_OK)
234
0
        goto cleanup;
235
236
1.67k
cleanup:
237
1.67k
    if (label_buf)
238
761
        free(label_buf);
239
240
1.67k
    if (retval != READSTAT_OK) {
241
66
        if (info->label) {
242
24
            free(info->label);
243
24
            info->label = NULL;
244
24
        }
245
66
    }
246
247
1.67k
    return retval;
248
735
}
249
250
1.43k
static readstat_error_t sav_read_variable_missing_double_values(spss_varinfo_t *info, sav_ctx_t *ctx) {
251
1.43k
    readstat_io_t *io = ctx->io;
252
1.43k
    int i;
253
1.43k
    readstat_error_t retval = READSTAT_OK;
254
1.43k
    if (io->read(info->missing_double_values, info->n_missing_values * sizeof(double), io->io_ctx)
255
1.43k
            < info->n_missing_values * sizeof(double)) {
256
6
        retval = READSTAT_ERROR_READ;
257
6
        goto cleanup;
258
6
    }
259
4.08k
    for (i=0; i<info->n_missing_values; i++) {
260
2.65k
        if (ctx->bswap) {
261
1.08k
            info->missing_double_values[i] = byteswap_double(info->missing_double_values[i]);
262
1.08k
        }
263
264
2.65k
        uint64_t long_value = 0;
265
2.65k
        memcpy(&long_value, &info->missing_double_values[i], 8);
266
267
2.65k
        if (long_value == ctx->missing_double)
268
197
            info->missing_double_values[i] = NAN;
269
2.65k
        if (long_value == ctx->lowest_double)
270
283
            info->missing_double_values[i] = -HUGE_VAL;
271
2.65k
        if (long_value == ctx->highest_double)
272
289
            info->missing_double_values[i] = HUGE_VAL;
273
2.65k
    }
274
275
1.43k
cleanup:
276
1.43k
    return retval;
277
1.42k
}
278
279
1.07k
static readstat_error_t sav_read_variable_missing_string_values(spss_varinfo_t *info, sav_ctx_t *ctx) {
280
1.07k
    readstat_io_t *io = ctx->io;
281
1.07k
    int i;
282
1.07k
    readstat_error_t retval = READSTAT_OK;
283
3.51k
    for (i=0; i<info->n_missing_values; i++) {
284
2.44k
        char missing_value[8];
285
2.44k
        if (io->read(missing_value, sizeof(missing_value), io->io_ctx) < sizeof(missing_value)) {
286
5
            retval = READSTAT_ERROR_READ;
287
5
            goto cleanup;
288
5
        }
289
2.43k
        retval = readstat_convert(info->missing_string_values[i], sizeof(info->missing_string_values[0]),
290
2.43k
                missing_value, sizeof(missing_value), ctx->converter);
291
2.43k
        if (retval != READSTAT_OK)
292
1
            goto cleanup;
293
2.43k
    }
294
295
1.07k
cleanup:
296
1.07k
    return retval;
297
1.07k
}
298
299
2.55k
static readstat_error_t sav_read_variable_missing_values(spss_varinfo_t *info, sav_ctx_t *ctx) {
300
2.55k
    if (info->n_missing_values > 3 || info->n_missing_values < -3) {
301
52
        return READSTAT_ERROR_PARSE;
302
52
    }
303
2.50k
    if (info->n_missing_values < 0) {
304
1.54k
        info->missing_range = 1;
305
1.54k
        info->n_missing_values = abs(info->n_missing_values);
306
1.54k
    } else {
307
957
        info->missing_range = 0;
308
957
    }
309
2.50k
    if (info->type == READSTAT_TYPE_DOUBLE) {
310
1.43k
        return sav_read_variable_missing_double_values(info, ctx);
311
1.43k
    }
312
1.07k
    return sav_read_variable_missing_string_values(info, ctx);
313
2.50k
}
314
315
302k
static readstat_error_t sav_read_variable_record(sav_ctx_t *ctx) {
316
302k
    readstat_io_t *io = ctx->io;
317
302k
    sav_variable_record_t variable = { 0 };
318
302k
    spss_varinfo_t *info = NULL;
319
302k
    readstat_error_t retval = READSTAT_OK;
320
302k
    if (ctx->var_index == ctx->varinfo_capacity) {
321
178
        if ((ctx->varinfo = readstat_realloc(ctx->varinfo, (ctx->varinfo_capacity *= 2) * sizeof(spss_varinfo_t *))) == NULL) {
322
0
            retval = READSTAT_ERROR_MALLOC;
323
0
            goto cleanup;
324
0
        }
325
178
    }
326
302k
    if (io->read(&variable, sizeof(sav_variable_record_t), io->io_ctx) < sizeof(sav_variable_record_t)) {
327
9
        retval = READSTAT_ERROR_READ;
328
9
        goto cleanup;
329
9
    }
330
302k
    variable.print = ctx->bswap ? byteswap4(variable.print) : variable.print;
331
302k
    variable.write = ctx->bswap ? byteswap4(variable.write) : variable.write;
332
333
302k
    int32_t type = ctx->bswap ? byteswap4(variable.type) : variable.type;
334
302k
    if (type < 0) {
335
2.40k
        if (ctx->var_index == 0) {
336
33
            return READSTAT_ERROR_PARSE;
337
33
        }
338
2.37k
        ctx->var_offset++;
339
2.37k
        ctx->varinfo[ctx->var_index-1]->width++;
340
2.37k
        return 0;
341
2.40k
    }
342
343
300k
    if ((info = readstat_calloc(1, sizeof(spss_varinfo_t))) == NULL) {
344
0
        retval = READSTAT_ERROR_MALLOC;
345
0
        goto cleanup;
346
0
    }
347
300k
    info->width = 1;
348
300k
    info->n_segments = 1;
349
300k
    info->index = ctx->var_index;
350
300k
    info->offset = ctx->var_offset;
351
300k
    info->labels_index = -1;
352
353
300k
    retval = readstat_convert(info->name, sizeof(info->name),
354
300k
            variable.name, sizeof(variable.name), NULL);
355
300k
    if (retval != READSTAT_OK)
356
0
        goto cleanup;
357
358
300k
    retval = readstat_convert(info->longname, sizeof(info->longname), 
359
300k
            variable.name, sizeof(variable.name), NULL);
360
300k
    if (retval != READSTAT_OK)
361
0
        goto cleanup;
362
363
300k
    info->print_format.decimal_places = (variable.print & 0x000000FF);
364
300k
    info->print_format.width = (variable.print & 0x0000FF00) >> 8;
365
300k
    info->print_format.type = (variable.print  & 0x00FF0000) >> 16;
366
367
300k
    info->write_format.decimal_places = (variable.write & 0x000000FF);
368
300k
    info->write_format.width = (variable.write & 0x0000FF00) >> 8;
369
300k
    info->write_format.type = (variable.write  & 0x00FF0000) >> 16;
370
371
300k
    if (type > 0 || info->print_format.type == SPSS_FORMAT_TYPE_A || info->write_format.type == SPSS_FORMAT_TYPE_A) {
372
245k
        info->type = READSTAT_TYPE_STRING;
373
245k
    } else {
374
54.1k
        info->type = READSTAT_TYPE_DOUBLE;
375
54.1k
    }
376
    
377
300k
    if (variable.has_var_label) {
378
1.67k
        if ((retval = sav_read_variable_label(info, ctx)) != READSTAT_OK) {
379
66
            goto cleanup;
380
66
        }
381
1.67k
    }
382
    
383
299k
    if (variable.n_missing_values) {
384
2.55k
        info->n_missing_values = ctx->bswap ? byteswap4(variable.n_missing_values) : variable.n_missing_values;
385
2.55k
        if ((retval = sav_read_variable_missing_values(info, ctx)) != READSTAT_OK) {
386
64
            goto cleanup;
387
64
        }
388
2.55k
    }
389
    
390
299k
    ctx->varinfo[ctx->var_index] = info;
391
392
299k
    ctx->var_index++;
393
299k
    ctx->var_offset++;
394
    
395
300k
cleanup:
396
300k
    if (retval != READSTAT_OK) {
397
139
        spss_varinfo_free(info);
398
139
    }
399
400
300k
    return retval;
401
299k
}
402
403
28.5k
static readstat_error_t sav_skip_value_label_record(sav_ctx_t *ctx) {
404
28.5k
    uint32_t label_count;
405
28.5k
    uint32_t rec_type;
406
28.5k
    uint32_t var_count;
407
28.5k
    readstat_error_t retval = READSTAT_OK;
408
28.5k
    readstat_io_t *io = ctx->io;
409
28.5k
    if (io->read(&label_count, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) {
410
3
        retval = READSTAT_ERROR_READ;
411
3
        goto cleanup;
412
3
    }
413
28.5k
    if (ctx->bswap)
414
27.2k
        label_count = byteswap4(label_count);
415
28.5k
    int i;
416
235k
    for (i=0; i<label_count; i++) {
417
207k
        unsigned char unpadded_len = 0;
418
207k
        size_t padded_len = 0;
419
207k
        if (io->seek(8, READSTAT_SEEK_CUR, io->io_ctx) == -1) {
420
79
            retval = READSTAT_ERROR_SEEK;
421
79
            goto cleanup;
422
79
        }
423
207k
        if (io->read(&unpadded_len, 1, io->io_ctx) < 1) {
424
19
            retval = READSTAT_ERROR_READ;
425
19
            goto cleanup;
426
19
        }
427
207k
        padded_len = (unpadded_len + 8) / 8 * 8 - 1;
428
207k
        if (io->seek(padded_len, READSTAT_SEEK_CUR, io->io_ctx) == -1) {
429
3
            retval = READSTAT_ERROR_SEEK;
430
3
            goto cleanup;
431
3
        }
432
207k
    }
433
434
28.4k
    if (io->read(&rec_type, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) {
435
17
        retval = READSTAT_ERROR_READ;
436
17
        goto cleanup;
437
17
    }
438
28.4k
    if (ctx->bswap)
439
27.1k
        rec_type = byteswap4(rec_type);
440
    
441
28.4k
    if (rec_type != 4) {
442
52
        retval = READSTAT_ERROR_PARSE;
443
52
        goto cleanup;
444
52
    }
445
28.4k
    if (io->read(&var_count, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) {
446
5
        retval = READSTAT_ERROR_READ;
447
5
        goto cleanup;
448
5
    }
449
28.4k
    if (ctx->bswap)
450
27.1k
        var_count = byteswap4(var_count);
451
    
452
28.4k
    if (io->seek(var_count * sizeof(uint32_t), READSTAT_SEEK_CUR, io->io_ctx) == -1) {
453
9
        retval = READSTAT_ERROR_SEEK;
454
9
        goto cleanup;
455
9
    }
456
457
28.5k
cleanup:
458
28.5k
    return retval;
459
28.4k
}
460
461
static readstat_error_t sav_submit_value_labels(value_label_t *value_labels, int32_t label_count, 
462
18.2k
        readstat_type_t value_type, sav_ctx_t *ctx) {
463
18.2k
    char label_name_buf[256];
464
18.2k
    readstat_error_t retval = READSTAT_OK;
465
18.2k
    int32_t i;
466
467
18.2k
    snprintf(label_name_buf, sizeof(label_name_buf), SAV_LABEL_NAME_PREFIX "%d", ctx->value_labels_count);
468
469
88.5k
    for (i=0; i<label_count; i++) {
470
70.2k
        value_label_t *vlabel = &value_labels[i];
471
70.2k
        if (ctx->handle.value_label(label_name_buf, vlabel->final_value, vlabel->label, ctx->user_ctx) != READSTAT_HANDLER_OK) {
472
0
            retval = READSTAT_ERROR_USER_ABORT;
473
0
            goto cleanup;
474
0
        }
475
70.2k
    }
476
18.2k
cleanup:
477
18.2k
    return retval;
478
18.2k
}
479
480
18.4k
static readstat_error_t sav_read_value_label_record(sav_ctx_t *ctx) {
481
18.4k
    uint32_t label_count;
482
18.4k
    readstat_error_t retval = READSTAT_OK;
483
18.4k
    readstat_io_t *io = ctx->io;
484
18.4k
    uint32_t *vars = NULL;
485
18.4k
    uint32_t var_count;
486
18.4k
    int32_t rec_type;
487
18.4k
    readstat_type_t value_type = READSTAT_TYPE_STRING;
488
18.4k
    char label_buf[256];
489
18.4k
    value_label_t *value_labels = NULL;
490
491
18.4k
    if (io->read(&label_count, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) {
492
3
        retval = READSTAT_ERROR_READ;
493
3
        goto cleanup;
494
3
    }
495
18.4k
    if (ctx->bswap)
496
17.5k
        label_count = byteswap4(label_count);
497
    
498
18.4k
    if (label_count && (value_labels = readstat_calloc(label_count, sizeof(value_label_t))) == NULL) {
499
26
        retval = READSTAT_ERROR_MALLOC;
500
26
        goto cleanup;
501
26
    }
502
    
503
18.4k
    int i;
504
88.6k
    for (i=0; i<label_count; i++) {
505
70.2k
        value_label_t *vlabel = &value_labels[i];
506
70.2k
        unsigned char unpadded_label_len = 0;
507
70.2k
        size_t padded_label_len = 0, utf8_label_len = 0;
508
509
70.2k
        if (io->read(vlabel->raw_value, 8, io->io_ctx) < 8) {
510
30
            retval = READSTAT_ERROR_READ;
511
30
            goto cleanup;
512
30
        }
513
70.2k
        if (io->read(&unpadded_label_len, 1, io->io_ctx) < 1) {
514
1
            retval = READSTAT_ERROR_READ;
515
1
            goto cleanup;
516
1
        }
517
518
70.2k
        padded_label_len = (unpadded_label_len + 8) / 8 * 8 - 1;
519
70.2k
        if (io->read(label_buf, padded_label_len, io->io_ctx) < padded_label_len) {
520
3
            retval = READSTAT_ERROR_READ;
521
3
            goto cleanup;
522
3
        }
523
524
70.2k
        utf8_label_len = padded_label_len*4+1;
525
70.2k
        if ((vlabel->label = readstat_malloc(utf8_label_len)) == NULL) {
526
0
            retval = READSTAT_ERROR_MALLOC;
527
0
            goto cleanup;
528
0
        }
529
530
70.2k
        retval = readstat_convert(vlabel->label, utf8_label_len, label_buf, padded_label_len, ctx->converter);
531
70.2k
        if (retval != READSTAT_OK)
532
1
            goto cleanup;
533
70.2k
    }
534
535
18.3k
    if (io->read(&rec_type, sizeof(int32_t), io->io_ctx) < sizeof(int32_t)) {
536
3
        retval = READSTAT_ERROR_READ;
537
3
        goto cleanup;
538
3
    }
539
18.3k
    if (ctx->bswap)
540
17.5k
        rec_type = byteswap4(rec_type);
541
    
542
18.3k
    if (rec_type != 4) {
543
46
        retval = READSTAT_ERROR_PARSE;
544
46
        goto cleanup;
545
46
    }
546
18.3k
    if (io->read(&var_count, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) {
547
3
        retval = READSTAT_ERROR_READ;
548
3
        goto cleanup;
549
3
    }
550
18.3k
    if (ctx->bswap)
551
17.5k
        var_count = byteswap4(var_count);
552
    
553
18.3k
    if (var_count && (vars = readstat_malloc(var_count * sizeof(uint32_t))) == NULL) {
554
37
        retval = READSTAT_ERROR_MALLOC;
555
37
        goto cleanup;
556
37
    }
557
18.3k
    if (io->read(vars, var_count * sizeof(uint32_t), io->io_ctx) < var_count * sizeof(uint32_t)) {
558
24
        retval = READSTAT_ERROR_READ;
559
24
        goto cleanup;
560
24
    }
561
100k
    for (i=0; i<var_count; i++) {
562
82.5k
        uint32_t var_offset = vars[i];
563
82.5k
        if (ctx->bswap)
564
15.2k
            var_offset = byteswap4(var_offset);
565
566
82.5k
        var_offset--; // Why subtract 1????
567
82.5k
        spss_varinfo_t **var = bsearch(&var_offset, ctx->varinfo, ctx->var_index, sizeof(spss_varinfo_t *),
568
82.5k
                &spss_varinfo_compare);
569
82.5k
        if (var) {
570
12.7k
            (*var)->labels_index = ctx->value_labels_count;
571
12.7k
            value_type = (*var)->type;
572
12.7k
        }
573
82.5k
    }
574
575
88.5k
    for (i=0; i<label_count; i++) {
576
70.2k
        value_label_t *vlabel = &value_labels[i];
577
70.2k
        double val_d = 0.0;
578
70.2k
        vlabel->final_value.type = value_type;
579
70.2k
        if (value_type == READSTAT_TYPE_DOUBLE) {
580
15.1k
            memcpy(&val_d, vlabel->raw_value, 8);
581
15.1k
            if (ctx->bswap)
582
14.6k
                val_d = byteswap_double(val_d);
583
584
15.1k
            vlabel->final_value.v.double_value = val_d;
585
15.1k
            sav_tag_missing_double(&vlabel->final_value, ctx);
586
55.0k
        } else {
587
55.0k
            retval = readstat_convert(vlabel->utf8_string_value, sizeof(vlabel->utf8_string_value),
588
55.0k
                    vlabel->raw_value, 8, ctx->converter);
589
55.0k
            if (retval != READSTAT_OK)
590
2
                break;
591
592
55.0k
            vlabel->final_value.v.string_value = vlabel->utf8_string_value;
593
55.0k
        }
594
70.2k
    }
595
596
18.2k
    if (ctx->handle.value_label) {
597
18.2k
        sav_submit_value_labels(value_labels, label_count, value_type, ctx);
598
18.2k
    }
599
18.2k
    ctx->value_labels_count++;
600
18.4k
cleanup:
601
18.4k
    if (vars)
602
5.34k
        free(vars);
603
18.4k
    if (value_labels) {
604
6.06M
        for (i=0; i<label_count; i++) {
605
6.06M
            value_label_t *vlabel = &value_labels[i];
606
6.06M
            if (vlabel->label)
607
70.2k
                free(vlabel->label);
608
6.06M
        }
609
1.83k
        free(value_labels);
610
1.83k
    }
611
    
612
18.4k
    return retval;
613
18.2k
}
614
615
1.92k
static readstat_error_t sav_skip_document_record(sav_ctx_t *ctx) {
616
1.92k
    uint32_t n_lines;
617
1.92k
    readstat_error_t retval = READSTAT_OK;
618
1.92k
    readstat_io_t *io = ctx->io;
619
1.92k
    if (io->read(&n_lines, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) {
620
3
        retval = READSTAT_ERROR_READ;
621
3
        goto cleanup;
622
3
    }
623
1.92k
    if (ctx->bswap)
624
1.48k
        n_lines = byteswap4(n_lines);
625
1.92k
    if (io->seek(n_lines * SPSS_DOC_LINE_SIZE, READSTAT_SEEK_CUR, io->io_ctx) == -1) {
626
54
        retval = READSTAT_ERROR_SEEK;
627
54
        goto cleanup;
628
54
    }
629
    
630
1.92k
cleanup:
631
1.92k
    return retval;
632
1.92k
}
633
634
1.06k
static readstat_error_t sav_read_document_record(sav_ctx_t *ctx) {
635
1.06k
    if (!ctx->handle.note)
636
0
        return sav_skip_document_record(ctx);
637
638
1.06k
    uint32_t n_lines;
639
1.06k
    readstat_error_t retval = READSTAT_OK;
640
1.06k
    readstat_io_t *io = ctx->io;
641
1.06k
    if (io->read(&n_lines, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) {
642
4
        retval = READSTAT_ERROR_READ;
643
4
        goto cleanup;
644
4
    }
645
1.05k
    if (ctx->bswap)
646
542
        n_lines = byteswap4(n_lines);
647
648
1.05k
    char raw_buffer[SPSS_DOC_LINE_SIZE];
649
1.05k
    char utf8_buffer[4*SPSS_DOC_LINE_SIZE+1];
650
1.05k
    int i;
651
1.45k
    for (i=0; i<n_lines; i++) {
652
458
        if (io->read(raw_buffer, SPSS_DOC_LINE_SIZE, io->io_ctx) < SPSS_DOC_LINE_SIZE) {
653
61
            retval = READSTAT_ERROR_READ;
654
61
            goto cleanup;
655
61
        }
656
657
397
        retval = readstat_convert(utf8_buffer, sizeof(utf8_buffer),
658
397
                raw_buffer, sizeof(raw_buffer), ctx->converter);
659
397
        if (retval != READSTAT_OK)
660
1
            goto cleanup;
661
662
396
        if (ctx->handle.note(i, utf8_buffer, ctx->user_ctx) != READSTAT_HANDLER_OK) {
663
0
            retval = READSTAT_ERROR_USER_ABORT;
664
0
            goto cleanup;
665
0
        }
666
396
    }
667
668
1.06k
cleanup:
669
1.06k
    return retval;
670
1.05k
}
671
672
2.76k
static readstat_error_t sav_read_dictionary_termination_record(sav_ctx_t *ctx) {
673
2.76k
    int32_t filler;
674
2.76k
    readstat_error_t retval = READSTAT_OK;
675
2.76k
    readstat_io_t *io = ctx->io;
676
2.76k
    if (io->read(&filler, sizeof(int32_t), io->io_ctx) < sizeof(int32_t)) {
677
1.16k
        retval = READSTAT_ERROR_READ;
678
1.16k
    }
679
2.76k
    return retval;
680
2.76k
}
681
682
2.26M
static readstat_error_t sav_process_row(unsigned char *buffer, size_t buffer_len, sav_ctx_t *ctx) {
683
2.26M
    if (ctx->row_offset) {
684
0
        ctx->row_offset--;
685
0
        return READSTAT_OK;
686
0
    }
687
688
2.26M
    readstat_error_t retval = READSTAT_OK;
689
2.26M
    double fp_value;
690
2.26M
    int offset = 0;
691
2.26M
    readstat_off_t data_offset = 0;
692
2.26M
    size_t raw_str_used = 0;
693
2.26M
    int segment_offset = 0;
694
2.26M
    int var_index = 0, col = 0;
695
2.26M
    int raw_str_is_utf8 = ctx->input_encoding && !strcmp(ctx->input_encoding, "UTF-8");
696
697
4.56M
    while (data_offset < buffer_len && col < ctx->var_index && var_index < ctx->var_index) {
698
2.29M
        spss_varinfo_t *col_info = ctx->varinfo[col];
699
2.29M
        spss_varinfo_t *var_info = ctx->varinfo[var_index];
700
2.29M
        readstat_value_t value = { .type = var_info->type };
701
2.29M
        if (offset > 31) {
702
2
            retval = READSTAT_ERROR_PARSE;
703
2
            goto done;
704
2
        }
705
2.29M
        if (var_info->type == READSTAT_TYPE_STRING) {
706
963k
            if (raw_str_used + 8 <= ctx->raw_string_len) {
707
963k
                if (raw_str_is_utf8) {
708
                    /* Skip null bytes, see https://github.com/tidyverse/haven/issues/560  */
709
920
                    char c;
710
8.28k
                    for (int i=0; i<8; i++)
711
7.36k
                        if ((c = buffer[data_offset+i]))
712
3.93k
                            ctx->raw_string[raw_str_used++] = c;
713
962k
                } else {
714
962k
                    memcpy(ctx->raw_string + raw_str_used, &buffer[data_offset], 8);
715
962k
                    raw_str_used += 8;
716
962k
                }
717
963k
            }
718
963k
            if (++offset == col_info->width) {
719
962k
                if (++segment_offset < var_info->n_segments) {
720
2.97k
                    raw_str_used--;
721
2.97k
                }
722
962k
                offset = 0;
723
962k
                col++;
724
962k
            }
725
963k
            if (segment_offset == var_info->n_segments) {
726
959k
                if (!ctx->variables[var_info->index]->skip) {
727
959k
                    retval = readstat_convert(ctx->utf8_string, ctx->utf8_string_len, 
728
959k
                            ctx->raw_string, raw_str_used, ctx->converter);
729
959k
                    if (retval != READSTAT_OK)
730
15
                        goto done;
731
959k
                    value.v.string_value = ctx->utf8_string;
732
959k
                    if (ctx->handle.value(ctx->current_row, ctx->variables[var_info->index],
733
959k
                                value, ctx->user_ctx) != READSTAT_HANDLER_OK) {
734
0
                        retval = READSTAT_ERROR_USER_ABORT;
735
0
                        goto done;
736
0
                    }
737
959k
                }
738
959k
                raw_str_used = 0;
739
959k
                segment_offset = 0;
740
959k
                var_index += var_info->n_segments;
741
959k
            }
742
1.33M
        } else if (var_info->type == READSTAT_TYPE_DOUBLE) {
743
1.33M
            if (!ctx->variables[var_info->index]->skip) {
744
1.33M
                memcpy(&fp_value, &buffer[data_offset], 8);
745
1.33M
                if (ctx->bswap) {
746
1.00M
                    fp_value = byteswap_double(fp_value);
747
1.00M
                }
748
1.33M
                value.v.double_value = fp_value;
749
1.33M
                sav_tag_missing_double(&value, ctx);
750
1.33M
                if (ctx->handle.value(ctx->current_row, ctx->variables[var_info->index],
751
1.33M
                            value, ctx->user_ctx) != READSTAT_HANDLER_OK) {
752
0
                    retval = READSTAT_ERROR_USER_ABORT;
753
0
                    goto done;
754
0
                }
755
1.33M
            }
756
1.33M
            var_index += var_info->n_segments;
757
1.33M
            col++;
758
1.33M
        }
759
2.29M
        data_offset += 8;
760
2.29M
    }
761
2.26M
    ctx->current_row++;
762
2.26M
done:
763
2.26M
    return retval;
764
2.26M
}
765
766
1.52k
static readstat_error_t sav_read_data(sav_ctx_t *ctx) {
767
1.52k
    readstat_error_t retval = READSTAT_OK;
768
1.52k
    size_t longest_string = 256;
769
1.52k
    int i;
770
771
167k
    for (i=0; i<ctx->var_index;) {
772
165k
        spss_varinfo_t *info = ctx->varinfo[i];
773
165k
        if (info->string_length > longest_string) {
774
165
            longest_string = info->string_length;
775
165
        }
776
165k
        i += info->n_segments;
777
165k
    }
778
779
1.52k
    ctx->raw_string_len = longest_string + sizeof(SAV_EIGHT_SPACES)-2;
780
1.52k
    ctx->raw_string = readstat_malloc(ctx->raw_string_len);
781
782
1.52k
    ctx->utf8_string_len = 4*longest_string+1 + sizeof(SAV_EIGHT_SPACES)-2;
783
1.52k
    ctx->utf8_string = readstat_malloc(ctx->utf8_string_len);
784
785
1.52k
    if (ctx->raw_string == NULL || ctx->utf8_string == NULL) {
786
35
        retval = READSTAT_ERROR_MALLOC;
787
35
        goto done;
788
35
    }
789
790
1.49k
    if (ctx->compression == READSTAT_COMPRESS_ROWS) {
791
416
        retval = sav_read_compressed_data(ctx, &sav_process_row);
792
1.07k
    } else if (ctx->compression == READSTAT_COMPRESS_BINARY) {
793
505
#if HAVE_ZLIB
794
505
        retval = zsav_read_compressed_data(ctx, &sav_process_row);
795
#else
796
        retval = READSTAT_ERROR_UNSUPPORTED_COMPRESSION;
797
#endif
798
569
    } else {
799
569
        retval = sav_read_uncompressed_data(ctx, &sav_process_row);
800
569
    }
801
1.49k
    if (retval != READSTAT_OK)
802
471
        goto done;
803
804
1.01k
    if (ctx->record_count >= 0 && ctx->current_row != ctx->row_limit) {
805
674
        retval = READSTAT_ERROR_ROW_COUNT_MISMATCH;
806
674
    }
807
808
1.52k
done:
809
1.52k
    return retval;
810
1.01k
}
811
812
static readstat_error_t sav_read_uncompressed_data(sav_ctx_t *ctx,
813
569
        readstat_error_t (*row_handler)(unsigned char *, size_t, sav_ctx_t *)) {
814
569
    readstat_error_t retval = READSTAT_OK;
815
569
    readstat_io_t *io = ctx->io;
816
569
    unsigned char *buffer = NULL;
817
569
    size_t bytes_read = 0;
818
569
    size_t buffer_len = ctx->var_offset * 8;
819
820
569
    buffer = readstat_malloc(buffer_len);
821
822
569
    if (ctx->row_offset) {
823
0
        if (io->seek(buffer_len * ctx->row_offset, READSTAT_SEEK_CUR, io->io_ctx) == -1) {
824
0
            retval = READSTAT_ERROR_SEEK;
825
0
            goto done;
826
0
        }
827
0
        ctx->row_offset = 0;
828
0
    }
829
830
1.23k
    while (ctx->row_limit == -1 || ctx->current_row < ctx->row_limit) {
831
1.02k
        retval = sav_update_progress(ctx);
832
1.02k
        if (retval != READSTAT_OK)
833
0
            goto done;
834
835
1.02k
        if ((bytes_read = io->read(buffer, buffer_len, io->io_ctx)) != buffer_len)
836
346
            goto done;
837
838
675
        retval = row_handler(buffer, buffer_len, ctx);
839
675
        if (retval != READSTAT_OK)
840
5
            goto done;
841
675
    }
842
569
done:
843
569
    if (buffer)
844
569
        free(buffer);
845
846
569
    return retval;
847
569
}
848
849
static readstat_error_t sav_read_compressed_data(sav_ctx_t *ctx,
850
416
        readstat_error_t (*row_handler)(unsigned char *, size_t, sav_ctx_t *)) {
851
416
    readstat_error_t retval = READSTAT_OK;
852
416
    readstat_io_t *io = ctx->io;
853
416
    readstat_off_t data_offset = 0;
854
416
    unsigned char buffer[DATA_BUFFER_SIZE];
855
416
    int buffer_used = 0;
856
857
416
    size_t uncompressed_row_len = ctx->var_offset * 8;
858
416
    readstat_off_t uncompressed_offset = 0;
859
416
    unsigned char *uncompressed_row = NULL;
860
861
416
    struct sav_row_stream_s state = { 
862
416
        .missing_value = ctx->missing_double,
863
416
        .bias = ctx->bias,
864
416
        .bswap = ctx->bswap };
865
866
416
    if (uncompressed_row_len && (uncompressed_row = readstat_malloc(uncompressed_row_len)) == NULL) {
867
0
        retval = READSTAT_ERROR_MALLOC;
868
0
        goto done;
869
0
    }
870
871
780
    while (1) {
872
780
        retval = sav_update_progress(ctx);
873
780
        if (retval != READSTAT_OK)
874
0
            goto done;
875
876
780
        buffer_used = io->read(buffer, sizeof(buffer), io->io_ctx);
877
780
        if (buffer_used == -1 || buffer_used == 0 || (buffer_used % 8) != 0)
878
370
            goto done;
879
880
410
        state.status = SAV_ROW_STREAM_HAVE_DATA;
881
410
        data_offset = 0;
882
883
2.26M
        while (state.status != SAV_ROW_STREAM_NEED_DATA) {
884
2.26M
            state.next_in = &buffer[data_offset];
885
2.26M
            state.avail_in = buffer_used - data_offset;
886
887
2.26M
            state.next_out = &uncompressed_row[uncompressed_offset];
888
2.26M
            state.avail_out = uncompressed_row_len - uncompressed_offset;
889
890
2.26M
            sav_decompress_row(&state);
891
892
2.26M
            uncompressed_offset = uncompressed_row_len - state.avail_out;
893
2.26M
            data_offset = buffer_used - state.avail_in;
894
895
2.26M
            if (state.status == SAV_ROW_STREAM_FINISHED_ROW) {
896
2.26M
                retval = row_handler(uncompressed_row, uncompressed_row_len, ctx);
897
2.26M
                if (retval != READSTAT_OK)
898
10
                    goto done;
899
900
2.26M
                uncompressed_offset = 0;
901
2.26M
            }
902
903
2.26M
            if (state.status == SAV_ROW_STREAM_FINISHED_ALL)
904
26
                goto done;
905
2.26M
            if (ctx->row_limit > 0 && ctx->current_row == ctx->row_limit)
906
10
                goto done;
907
2.26M
        }
908
410
    }
909
910
416
done:
911
416
    if (uncompressed_row)
912
416
        free(uncompressed_row);
913
914
416
    return retval;
915
416
}
916
917
802
static readstat_error_t sav_parse_machine_integer_info_record(const void *data, size_t data_len, sav_ctx_t *ctx) {
918
802
    if (data_len != 32)
919
10
        return READSTAT_ERROR_PARSE;
920
921
792
    const char *src_charset = NULL;
922
792
    const char *dst_charset = ctx->output_encoding;
923
792
    sav_machine_integer_info_record_t record;
924
792
    memcpy(&record, data, data_len);
925
792
    if (ctx->bswap) {
926
339
        record.character_code = byteswap4(record.character_code);
927
339
    }
928
792
    if (ctx->input_encoding) {
929
676
        src_charset = ctx->input_encoding;
930
676
    } else {
931
116
        int i;
932
6.93k
        for (i=0; i<sizeof(_charset_table)/sizeof(_charset_table[0]); i++) {
933
6.91k
            if (record.character_code  == _charset_table[i].code) {
934
97
                src_charset = _charset_table[i].name;
935
97
                break;
936
97
            }
937
6.91k
        }
938
116
        if (src_charset == NULL) {
939
19
            if (ctx->handle.error) {
940
0
                char error_buf[1024];
941
0
                snprintf(error_buf, sizeof(error_buf), "Unsupported character set: %d\n", record.character_code);
942
0
                ctx->handle.error(error_buf, ctx->user_ctx);
943
0
            }
944
19
            return READSTAT_ERROR_UNSUPPORTED_CHARSET;
945
19
        }
946
97
        ctx->input_encoding = src_charset;
947
97
    }
948
773
    if (src_charset && dst_charset) {
949
        // You might be tempted to skip the charset conversion when src_charset
950
        // and dst_charset are the same. However, some versions of SPSS insert
951
        // illegally truncated strings (e.g. the last character is three bytes
952
        // but the field only has room for two bytes). So to prevent the client
953
        // from receiving an invalid byte sequence, we ram everything through
954
        // our iconv machinery.
955
773
        iconv_t converter = iconv_open(dst_charset, src_charset);
956
773
        if (converter == (iconv_t)-1) {
957
1
            return READSTAT_ERROR_UNSUPPORTED_CHARSET;
958
1
        }
959
772
        if (ctx->converter) {
960
676
            iconv_close(ctx->converter);
961
676
        }
962
772
        ctx->converter = converter;
963
772
    }
964
772
    return READSTAT_OK;
965
773
}
966
967
457
static readstat_error_t sav_parse_machine_floating_point_record(const void *data, size_t size, size_t count, sav_ctx_t *ctx) {
968
457
    if (size != 8 || count != 3)
969
35
        return READSTAT_ERROR_PARSE;
970
971
422
    sav_machine_floating_point_info_record_t fp_info;
972
422
    memcpy(&fp_info, data, sizeof(sav_machine_floating_point_info_record_t));
973
974
422
    ctx->missing_double = ctx->bswap ? byteswap8(fp_info.sysmis) : fp_info.sysmis;
975
422
    ctx->highest_double = ctx->bswap ? byteswap8(fp_info.highest) : fp_info.highest;
976
422
    ctx->lowest_double = ctx->bswap ? byteswap8(fp_info.lowest) : fp_info.lowest;
977
978
422
    return READSTAT_OK;
979
457
}
980
981
/* We don't yet know how many real variables there are, so store the values in the record
982
 * and make sense of them later. */
983
524
static readstat_error_t sav_store_variable_display_parameter_record(const void *data, size_t size, size_t count, sav_ctx_t *ctx) {
984
524
    if (size != 4)
985
9
        return READSTAT_ERROR_PARSE;
986
987
515
    const uint32_t *data_ptr = data;
988
515
    int i;
989
990
515
    ctx->variable_display_values = readstat_realloc(ctx->variable_display_values, count * sizeof(uint32_t));
991
515
    if (count > 0 && ctx->variable_display_values == NULL)
992
0
        return READSTAT_ERROR_MALLOC;
993
994
515
    ctx->variable_display_values_count = count;
995
1.01M
    for (i=0; i<count; i++) {
996
1.01M
        ctx->variable_display_values[i] = ctx->bswap ? byteswap4(data_ptr[i]) : data_ptr[i];
997
1.01M
    }
998
515
    return READSTAT_OK;
999
515
}
1000
1001
1.56k
static readstat_error_t sav_parse_variable_display_parameter_record(sav_ctx_t *ctx) {
1002
1.56k
    if (!ctx->variable_display_values)
1003
1.37k
        return READSTAT_OK;
1004
1005
187
    int i;
1006
187
    long count = ctx->variable_display_values_count;
1007
187
    if (count != 2 * ctx->var_index && count != 3 * ctx->var_index) {
1008
36
        return READSTAT_ERROR_PARSE;
1009
36
    }
1010
151
    int has_display_width = ctx->var_index > 0 && (count / ctx->var_index == 3);
1011
151
    int offset = 0;
1012
5.20k
    for (i=0; i<ctx->var_index;) {
1013
5.05k
        spss_varinfo_t *info = ctx->varinfo[i];
1014
5.05k
        offset = (2 + has_display_width)*i;
1015
5.05k
        info->measure = spss_measure_to_readstat_measure(ctx->variable_display_values[offset++]);
1016
5.05k
        if (has_display_width) {
1017
730
            info->display_width = ctx->variable_display_values[offset++];
1018
730
        }
1019
5.05k
        info->alignment = spss_alignment_to_readstat_alignment(ctx->variable_display_values[offset++]);
1020
1021
5.05k
        i += info->n_segments;
1022
5.05k
    }
1023
151
    return READSTAT_OK;
1024
187
}
1025
1026
static readstat_error_t sav_read_pascal_string(char *buf, size_t buf_len,
1027
2.73k
        const char **inout_data_ptr, size_t data_ptr_len, sav_ctx_t *ctx) {
1028
2.73k
    const char *data_ptr = *inout_data_ptr;
1029
2.73k
    const char *data_end = data_ptr + data_ptr_len;
1030
2.73k
    readstat_error_t retval = READSTAT_OK;
1031
2.73k
    uint32_t var_name_len = 0;
1032
1033
2.73k
    if (data_ptr + sizeof(uint32_t) > data_end) {
1034
29
        retval = READSTAT_ERROR_PARSE;
1035
29
        goto cleanup;
1036
29
    }
1037
1038
2.70k
    memcpy(&var_name_len, data_ptr, sizeof(uint32_t));
1039
2.70k
    if (ctx->bswap)
1040
1.31k
        var_name_len = byteswap4(var_name_len);
1041
1042
2.70k
    data_ptr += sizeof(uint32_t);
1043
1044
2.70k
    if (data_ptr + var_name_len > data_end) {
1045
33
        retval = READSTAT_ERROR_PARSE;
1046
33
        goto cleanup;
1047
33
    }
1048
1049
2.67k
    retval = readstat_convert(buf, buf_len, data_ptr, var_name_len, NULL);
1050
2.67k
    if (retval != READSTAT_OK)
1051
17
        goto cleanup;
1052
1053
2.65k
    data_ptr += var_name_len;
1054
1055
2.73k
cleanup:
1056
2.73k
    *inout_data_ptr = data_ptr;
1057
1058
2.73k
    return retval;
1059
2.65k
}
1060
1061
719
static readstat_error_t sav_parse_long_string_value_labels_record(const void *data, size_t size, size_t count, sav_ctx_t *ctx) {
1062
719
    if (!ctx->handle.value_label)
1063
0
        return READSTAT_OK;
1064
719
    if (size != 1)
1065
14
        return READSTAT_ERROR_PARSE;
1066
1067
705
    readstat_error_t retval = READSTAT_OK;
1068
705
    uint32_t label_count = 0;
1069
705
    uint32_t i = 0;
1070
705
    const char *data_ptr = data;
1071
705
    const char *data_end = data_ptr + count;
1072
705
    char var_name_buf[256+1]; // unconverted
1073
705
    char label_name_buf[256];
1074
705
    char *value_buffer = NULL;
1075
705
    char *label_buffer = NULL;
1076
    
1077
2.31k
    while (data_ptr < data_end) {
1078
1.85k
        memset(label_name_buf, '\0', sizeof(label_name_buf));
1079
1080
1.85k
        retval = sav_read_pascal_string(var_name_buf, sizeof(var_name_buf),
1081
1.85k
                &data_ptr, data_end - data_ptr, ctx);
1082
1.85k
        if (retval != READSTAT_OK)
1083
42
            goto cleanup;
1084
1085
19.5k
        for (i=0; i<ctx->var_index;) {
1086
19.4k
            spss_varinfo_t *info = ctx->varinfo[i];
1087
19.4k
            if (strcmp(var_name_buf, info->longname) == 0) {
1088
1.71k
                info->labels_index = ctx->value_labels_count++;
1089
1.71k
                snprintf(label_name_buf, sizeof(label_name_buf),
1090
1.71k
                        SAV_LABEL_NAME_PREFIX "%d", info->labels_index);
1091
1.71k
                break;
1092
1.71k
            }
1093
17.7k
            i += info->n_segments;
1094
17.7k
        }
1095
1096
1.81k
        if (label_name_buf[0] == '\0') {
1097
94
            retval = READSTAT_ERROR_PARSE;
1098
94
            goto cleanup;
1099
94
        }
1100
1101
1.71k
        data_ptr += sizeof(uint32_t);
1102
1103
1.71k
        if (data_ptr + sizeof(uint32_t) > data_end) {
1104
13
            retval = READSTAT_ERROR_PARSE;
1105
13
            goto cleanup;
1106
13
        }
1107
1108
1.70k
        memcpy(&label_count, data_ptr, sizeof(uint32_t));
1109
1.70k
        if (ctx->bswap)
1110
869
            label_count = byteswap4(label_count);
1111
1112
1.70k
        data_ptr += sizeof(uint32_t);
1113
1114
2.94k
        for (i=0; i<label_count; i++) {
1115
1.33k
            uint32_t value_len = 0, label_len = 0;
1116
1.33k
            uint32_t value_buffer_len = 0, label_buffer_len = 0;
1117
1118
1.33k
            if (data_ptr + sizeof(uint32_t) > data_end) {
1119
55
                retval = READSTAT_ERROR_PARSE;
1120
55
                goto cleanup;
1121
55
            }
1122
1123
1.27k
            memcpy(&value_len, data_ptr, sizeof(uint32_t));
1124
1.27k
            if (ctx->bswap)
1125
745
                value_len = byteswap4(value_len);
1126
1127
1.27k
            data_ptr += sizeof(uint32_t);
1128
1129
1.27k
            value_buffer_len = value_len*4+1;
1130
1.27k
            value_buffer = readstat_realloc(value_buffer, value_buffer_len);
1131
1.27k
            if (value_buffer == NULL) {
1132
11
                retval = READSTAT_ERROR_MALLOC;
1133
11
                goto cleanup;
1134
11
            }
1135
1136
1.26k
            if (data_ptr + value_len > data_end) {
1137
6
                retval = READSTAT_ERROR_PARSE;
1138
6
                goto cleanup;
1139
6
            }
1140
1141
1.25k
            retval = readstat_convert(value_buffer, value_buffer_len, data_ptr, value_len, ctx->converter);
1142
1.25k
            if (retval != READSTAT_OK)
1143
1
                goto cleanup;
1144
1145
1.25k
            data_ptr += value_len;
1146
1147
1.25k
            if (data_ptr + sizeof(uint32_t) > data_end) {
1148
7
                retval = READSTAT_ERROR_PARSE;
1149
7
                goto cleanup;
1150
7
            }
1151
1152
1.25k
            memcpy(&label_len, data_ptr, sizeof(uint32_t));
1153
1.25k
            if (ctx->bswap)
1154
738
                label_len = byteswap4(label_len);
1155
1156
1.25k
            data_ptr += sizeof(uint32_t);
1157
1158
1.25k
            label_buffer_len = label_len*4+1;
1159
1.25k
            label_buffer = readstat_realloc(label_buffer, label_buffer_len);
1160
1.25k
            if (label_buffer == NULL) {
1161
4
                retval = READSTAT_ERROR_MALLOC;
1162
4
                goto cleanup;
1163
4
            }
1164
1165
1.24k
            if (data_ptr + label_len > data_end) {
1166
7
                retval = READSTAT_ERROR_PARSE;
1167
7
                goto cleanup;
1168
7
            }
1169
1170
1.24k
            retval = readstat_convert(label_buffer, label_buffer_len, data_ptr, label_len, ctx->converter);
1171
1.24k
            if (retval != READSTAT_OK)
1172
1
                goto cleanup;
1173
1174
1.23k
            data_ptr += label_len;
1175
1176
1.23k
            readstat_value_t value = { .type = READSTAT_TYPE_STRING };
1177
1.23k
            value.v.string_value = value_buffer;
1178
1179
1.23k
            if (ctx->handle.value_label(label_name_buf, value, label_buffer, ctx->user_ctx) != READSTAT_HANDLER_OK) {
1180
0
                retval = READSTAT_ERROR_USER_ABORT;
1181
0
                goto cleanup;
1182
0
            }
1183
1.23k
        }
1184
1.70k
    }
1185
1186
464
    if (data_ptr != data_end) {
1187
0
        retval = READSTAT_ERROR_PARSE;
1188
0
    }
1189
1190
705
cleanup:
1191
705
    if (value_buffer)
1192
280
        free(value_buffer);
1193
705
    if (label_buffer)
1194
279
        free(label_buffer);
1195
705
    return retval;
1196
464
}
1197
1198
470
static readstat_error_t sav_parse_long_string_missing_values_record(const void *data, size_t size, size_t count, sav_ctx_t *ctx) {
1199
470
    if (size != 1)
1200
14
        return READSTAT_ERROR_PARSE;
1201
1202
456
    readstat_error_t retval = READSTAT_OK;
1203
456
    uint32_t i = 0, j = 0;
1204
456
    const char *data_ptr = data;
1205
456
    const char *data_end = data_ptr + count;
1206
456
    char var_name_buf[256+1];
1207
1208
1.15k
    while (data_ptr < data_end) {
1209
879
        retval = sav_read_pascal_string(var_name_buf, sizeof(var_name_buf),
1210
879
                &data_ptr, data_end - data_ptr, ctx);
1211
879
        if (retval != READSTAT_OK)
1212
37
            goto cleanup;
1213
1214
842
        if (data_ptr == data_end) {
1215
20
            retval = READSTAT_ERROR_PARSE;
1216
20
            goto cleanup;
1217
20
        }
1218
1219
822
        char n_missing_values = *data_ptr++;
1220
822
        if (n_missing_values < 1 || n_missing_values > 3) {
1221
29
            retval = READSTAT_ERROR_PARSE;
1222
29
            goto cleanup;
1223
29
        }
1224
1225
118k
        for (i=0; i<ctx->var_index;) {
1226
117k
            spss_varinfo_t *info = ctx->varinfo[i];
1227
117k
            if (strcmp(var_name_buf, info->longname) == 0) {
1228
711
                info->n_missing_values = n_missing_values;
1229
1230
711
                uint32_t var_name_len = 0;
1231
1232
711
                if (data_ptr + sizeof(uint32_t) > data_end) {
1233
7
                    retval = READSTAT_ERROR_PARSE;
1234
7
                    goto cleanup;
1235
7
                }
1236
1237
704
                memcpy(&var_name_len, data_ptr, sizeof(uint32_t));
1238
704
                if (ctx->bswap)
1239
335
                    var_name_len = byteswap4(var_name_len);
1240
1241
704
                data_ptr += sizeof(uint32_t);
1242
1243
2.57k
                for (j=0; j<n_missing_values; j++) {
1244
1.87k
                    if (data_ptr + var_name_len > data_end) {
1245
5
                        retval = READSTAT_ERROR_PARSE;
1246
5
                        goto cleanup;
1247
5
                    }
1248
1249
1.87k
                    retval = readstat_convert(info->missing_string_values[j],
1250
1.87k
                            sizeof(info->missing_string_values[0]),
1251
1.87k
                            data_ptr, var_name_len, ctx->converter);
1252
1.87k
                    if (retval != READSTAT_OK)
1253
5
                        goto cleanup;
1254
1255
1.86k
                    data_ptr += var_name_len;
1256
1.86k
                }
1257
694
                break;
1258
704
            }
1259
117k
            i += info->n_segments;
1260
117k
        }
1261
776
        if (i == ctx->var_index) {
1262
82
            retval = READSTAT_ERROR_PARSE;
1263
82
            goto cleanup;
1264
82
        }
1265
776
    }
1266
1267
271
    if (data_ptr != data_end) {
1268
0
        retval = READSTAT_ERROR_PARSE;
1269
0
    }
1270
1271
456
cleanup:
1272
456
    return retval;
1273
271
}
1274
1275
5.26k
static readstat_error_t sav_parse_records_pass1(sav_ctx_t *ctx) {
1276
5.26k
    char data_buf[4096];
1277
5.26k
    readstat_error_t retval = READSTAT_OK;
1278
5.26k
    readstat_io_t *io = ctx->io;
1279
325k
    while (1) {
1280
325k
        uint32_t rec_type;
1281
325k
        uint32_t extra_info[3];
1282
325k
        size_t data_len = 0;
1283
325k
        int i;
1284
325k
        int done = 0;
1285
325k
        if (io->read(&rec_type, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) {
1286
294
            retval = READSTAT_ERROR_READ;
1287
294
            goto cleanup;
1288
294
        }
1289
        
1290
325k
        if (ctx->bswap) {
1291
38.9k
            rec_type = byteswap4(rec_type);
1292
38.9k
        }
1293
        
1294
325k
        switch (rec_type) {
1295
281k
            case SAV_RECORD_TYPE_VARIABLE:
1296
281k
                retval = sav_skip_variable_record(ctx);
1297
281k
                if (retval != READSTAT_OK)
1298
145
                    goto cleanup;
1299
281k
                break;
1300
281k
            case SAV_RECORD_TYPE_VALUE_LABEL:
1301
28.5k
                retval = sav_skip_value_label_record(ctx);
1302
28.5k
                if (retval != READSTAT_OK)
1303
187
                    goto cleanup;
1304
28.4k
                break;
1305
28.4k
            case SAV_RECORD_TYPE_DOCUMENT:
1306
1.92k
                retval = sav_skip_document_record(ctx);
1307
1.92k
                if (retval != READSTAT_OK)
1308
57
                    goto cleanup;
1309
1.86k
                break;
1310
4.27k
            case SAV_RECORD_TYPE_DICT_TERMINATION:
1311
4.27k
                done = 1;
1312
4.27k
                break;
1313
8.47k
            case SAV_RECORD_TYPE_HAS_DATA:
1314
8.47k
                if (io->read(extra_info, sizeof(extra_info), io->io_ctx) < sizeof(extra_info)) {
1315
8
                    retval = READSTAT_ERROR_READ;
1316
8
                    goto cleanup;
1317
8
                }
1318
8.46k
                if (ctx->bswap) {
1319
8.97k
                    for (i=0; i<3; i++)
1320
6.72k
                        extra_info[i] = byteswap4(extra_info[i]);
1321
2.24k
                }
1322
8.46k
                uint32_t subtype = extra_info[0];
1323
8.46k
                size_t size = extra_info[1];
1324
8.46k
                size_t count = extra_info[2];
1325
8.46k
                data_len = size * count;
1326
8.46k
                if (subtype == SAV_RECORD_SUBTYPE_INTEGER_INFO) {
1327
882
                    if (data_len > sizeof(data_buf)) {
1328
63
                        retval = READSTAT_ERROR_PARSE;
1329
63
                        goto cleanup;
1330
63
                    }
1331
819
                    if (io->read(data_buf, data_len, io->io_ctx) < data_len) {
1332
17
                        retval = READSTAT_ERROR_PARSE;
1333
17
                        goto cleanup;
1334
17
                    }
1335
802
                    retval = sav_parse_machine_integer_info_record(data_buf, data_len, ctx);
1336
802
                    if (retval != READSTAT_OK)
1337
30
                        goto cleanup;
1338
7.58k
                } else {
1339
7.58k
                    if (io->seek(data_len, READSTAT_SEEK_CUR, io->io_ctx) == -1) {
1340
115
                        retval = READSTAT_ERROR_SEEK;
1341
115
                        goto cleanup;
1342
115
                    }
1343
7.58k
                }
1344
8.24k
                break;
1345
8.24k
            default:
1346
77
                retval = READSTAT_ERROR_PARSE;
1347
77
                goto cleanup;
1348
0
                break;
1349
325k
        }
1350
324k
        if (done)
1351
4.27k
            break;
1352
324k
    }
1353
5.26k
cleanup:
1354
5.26k
    return retval;
1355
5.26k
}
1356
1357
4.27k
static readstat_error_t sav_parse_records_pass2(sav_ctx_t *ctx) {
1358
4.27k
    void *data_buf = NULL;
1359
4.27k
    size_t data_buf_capacity = 4096;
1360
4.27k
    readstat_error_t retval = READSTAT_OK;
1361
4.27k
    readstat_io_t *io = ctx->io;
1362
1363
4.27k
    if ((data_buf = readstat_malloc(data_buf_capacity)) == NULL) {
1364
0
        retval = READSTAT_ERROR_MALLOC;
1365
0
        goto cleanup;
1366
0
    }
1367
1368
331k
    while (1) {
1369
331k
        uint32_t rec_type;
1370
331k
        uint32_t extra_info[3];
1371
331k
        size_t data_len = 0;
1372
331k
        int i;
1373
331k
        int done = 0;
1374
331k
        if (io->read(&rec_type, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) {
1375
33
            retval = READSTAT_ERROR_READ;
1376
33
            goto cleanup;
1377
33
        }
1378
        
1379
331k
        if (ctx->bswap) {
1380
23.7k
            rec_type = byteswap4(rec_type);
1381
23.7k
        }
1382
        
1383
331k
        switch (rec_type) {
1384
302k
            case SAV_RECORD_TYPE_VARIABLE:
1385
302k
                if ((retval = sav_read_variable_record(ctx)) != READSTAT_OK)
1386
172
                    goto cleanup;
1387
302k
                break;
1388
302k
            case SAV_RECORD_TYPE_VALUE_LABEL:
1389
18.4k
                if ((retval = sav_read_value_label_record(ctx)) != READSTAT_OK)
1390
179
                    goto cleanup;
1391
18.2k
                break;
1392
18.2k
            case SAV_RECORD_TYPE_DOCUMENT:
1393
1.06k
                if ((retval = sav_read_document_record(ctx)) != READSTAT_OK)
1394
66
                    goto cleanup;
1395
996
                break;
1396
2.76k
            case SAV_RECORD_TYPE_DICT_TERMINATION:
1397
2.76k
                if ((retval = sav_read_dictionary_termination_record(ctx)) != READSTAT_OK)
1398
1.16k
                    goto cleanup;
1399
1.60k
                done = 1;
1400
1.60k
                break;
1401
6.74k
            case SAV_RECORD_TYPE_HAS_DATA:
1402
6.74k
                if (io->read(extra_info, sizeof(extra_info), io->io_ctx) < sizeof(extra_info)) {
1403
7
                    retval = READSTAT_ERROR_READ;
1404
7
                    goto cleanup;
1405
7
                }
1406
6.74k
                if (ctx->bswap) {
1407
6.89k
                    for (i=0; i<3; i++)
1408
5.16k
                        extra_info[i] = byteswap4(extra_info[i]);
1409
1.72k
                }
1410
6.74k
                uint32_t subtype = extra_info[0];
1411
6.74k
                size_t size = extra_info[1];
1412
6.74k
                size_t count = extra_info[2];
1413
6.74k
                data_len = size * count;
1414
6.74k
                if (data_buf_capacity < data_len) {
1415
171
                    if ((data_buf = readstat_realloc(data_buf, data_buf_capacity = data_len)) == NULL) {
1416
65
                        retval = READSTAT_ERROR_MALLOC;
1417
65
                        goto cleanup;
1418
65
                    }
1419
171
                }
1420
6.67k
                if (data_len == 0 || io->read(data_buf, data_len, io->io_ctx) < data_len) {
1421
58
                    retval = READSTAT_ERROR_PARSE;
1422
58
                    goto cleanup;
1423
58
                }
1424
                
1425
6.61k
                switch (subtype) {
1426
67
                    case SAV_RECORD_SUBTYPE_INTEGER_INFO:
1427
                        /* parsed in pass 1 */
1428
67
                        break;
1429
457
                    case SAV_RECORD_SUBTYPE_FP_INFO:
1430
457
                        retval = sav_parse_machine_floating_point_record(data_buf, size, count, ctx);
1431
457
                        if (retval != READSTAT_OK)
1432
35
                            goto cleanup;
1433
422
                        break;
1434
524
                    case SAV_RECORD_SUBTYPE_VAR_DISPLAY:
1435
524
                        retval = sav_store_variable_display_parameter_record(data_buf, size, count, ctx);
1436
524
                        if (retval != READSTAT_OK)
1437
9
                            goto cleanup;
1438
515
                        break;
1439
1.11k
                    case SAV_RECORD_SUBTYPE_LONG_VAR_NAME:
1440
1.11k
                        retval = sav_parse_long_variable_names_record(data_buf, count, ctx);
1441
1.11k
                        if (retval != READSTAT_OK)
1442
175
                            goto cleanup;
1443
937
                        break;
1444
2.22k
                    case SAV_RECORD_SUBTYPE_VERY_LONG_STR:
1445
2.22k
                        retval = sav_parse_very_long_string_record(data_buf, count, ctx);
1446
2.22k
                        if (retval != READSTAT_OK)
1447
178
                            goto cleanup;
1448
2.04k
                        break;
1449
2.04k
                    case SAV_RECORD_SUBTYPE_LONG_STRING_VALUE_LABELS:
1450
719
                        retval = sav_parse_long_string_value_labels_record(data_buf, size, count, ctx);
1451
719
                        if (retval != READSTAT_OK)
1452
255
                            goto cleanup;
1453
464
                        break;
1454
470
                    case SAV_RECORD_SUBTYPE_LONG_STRING_MISSING_VALUES:
1455
470
                        retval = sav_parse_long_string_missing_values_record(data_buf, size, count, ctx);
1456
470
                        if (retval != READSTAT_OK)
1457
199
                            goto cleanup;
1458
271
                        break;
1459
1.04k
                    default: /* misc. info */
1460
1.04k
                        break;
1461
6.61k
                }
1462
5.76k
                break;
1463
5.76k
            default:
1464
71
                retval = READSTAT_ERROR_PARSE;
1465
71
                goto cleanup;
1466
0
                break;
1467
331k
        }
1468
328k
        if (done)
1469
1.60k
            break;
1470
328k
    }
1471
4.27k
cleanup:
1472
4.27k
    if (data_buf)
1473
4.20k
        free(data_buf);
1474
4.27k
    return retval;
1475
4.27k
}
1476
1477
1.60k
static readstat_error_t sav_set_n_segments_and_var_count(sav_ctx_t *ctx) {
1478
1.60k
    int i;
1479
1.60k
    ctx->var_count = 0;
1480
167k
    for (i=0; i<ctx->var_index;) {
1481
166k
        spss_varinfo_t *info = ctx->varinfo[i];
1482
166k
        if (info->string_length > VERY_LONG_STRING_MAX_LENGTH)
1483
32
            return READSTAT_ERROR_PARSE;
1484
166k
        if (info->string_length) {
1485
413
            info->n_segments = (info->string_length + 251) / 252;
1486
413
        }
1487
166k
        info->index = ctx->var_count++;
1488
166k
        i += info->n_segments;
1489
166k
    }
1490
1.57k
    ctx->variables = readstat_calloc(ctx->var_count, sizeof(readstat_variable_t *));
1491
1.57k
    return READSTAT_OK;
1492
1.60k
}
1493
1494
1.52k
static readstat_error_t sav_handle_variables(sav_ctx_t *ctx) {
1495
1.52k
    int i;
1496
1.52k
    int index_after_skipping = 0;
1497
1.52k
    readstat_error_t retval = READSTAT_OK;
1498
1499
1.52k
    if (!ctx->handle.variable)
1500
0
        return retval;
1501
1502
167k
    for (i=0; i<ctx->var_index;) {
1503
165k
        char label_name_buf[256];
1504
165k
        spss_varinfo_t *info = ctx->varinfo[i];
1505
165k
        ctx->variables[info->index] = spss_init_variable_for_info(info, index_after_skipping, ctx->converter);
1506
1507
165k
        snprintf(label_name_buf, sizeof(label_name_buf), SAV_LABEL_NAME_PREFIX "%d", info->labels_index);
1508
1509
165k
        int cb_retval = ctx->handle.variable(info->index, ctx->variables[info->index],
1510
165k
                info->labels_index == -1 ? NULL : label_name_buf,
1511
165k
                ctx->user_ctx);
1512
1513
165k
        if (cb_retval == READSTAT_HANDLER_ABORT) {
1514
0
            retval = READSTAT_ERROR_USER_ABORT;
1515
0
            goto cleanup;
1516
0
        }
1517
1518
165k
        if (cb_retval == READSTAT_HANDLER_SKIP_VARIABLE) {
1519
0
            ctx->variables[info->index]->skip = 1;
1520
165k
        } else {
1521
165k
            index_after_skipping++;
1522
165k
        }
1523
1524
165k
        i += info->n_segments;
1525
165k
    }
1526
1.52k
cleanup:
1527
1.52k
    return retval;
1528
1.52k
}
1529
1530
1.52k
static readstat_error_t sav_handle_fweight(sav_ctx_t *ctx) {
1531
1.52k
    readstat_error_t retval = READSTAT_OK;
1532
1.52k
    int i;
1533
1.52k
    if (ctx->handle.fweight && ctx->fweight_index >= 0) {
1534
109k
        for (i=0; i<ctx->var_index;) {
1535
108k
            spss_varinfo_t *info = ctx->varinfo[i];
1536
108k
            if (info->offset == ctx->fweight_index - 1) {
1537
18
                if (ctx->handle.fweight(ctx->variables[info->index], ctx->user_ctx) != READSTAT_HANDLER_OK) {
1538
0
                    retval = READSTAT_ERROR_USER_ABORT;
1539
0
                    goto cleanup;
1540
0
                }
1541
18
                break;
1542
18
            }
1543
108k
            i += info->n_segments;
1544
108k
        }
1545
1.08k
    }
1546
1.52k
cleanup:
1547
1.52k
    return retval;
1548
1.52k
}
1549
1550
5.26k
readstat_error_t sav_parse_timestamp(sav_ctx_t *ctx, sav_file_header_record_t *header) {
1551
5.26k
    readstat_error_t retval = READSTAT_OK;
1552
5.26k
    struct tm timestamp = { .tm_isdst = -1 };
1553
1554
5.26k
    if ((retval = sav_parse_time(header->creation_time, sizeof(header->creation_time),
1555
5.26k
                    &timestamp, ctx->handle.error, ctx->user_ctx)) 
1556
5.26k
            != READSTAT_OK)
1557
3.57k
        goto cleanup;
1558
1559
1.69k
    if ((retval = sav_parse_date(header->creation_date, sizeof(header->creation_date),
1560
1.69k
                    &timestamp, ctx->handle.error, ctx->user_ctx)) 
1561
1.69k
            != READSTAT_OK)
1562
429
        goto cleanup;
1563
1564
1.26k
    ctx->timestamp = mktime(&timestamp);
1565
1566
5.26k
cleanup:
1567
5.26k
    return retval;
1568
1.26k
}
1569
1570
5.31k
readstat_error_t readstat_parse_sav(readstat_parser_t *parser, const char *path, void *user_ctx) {
1571
5.31k
    readstat_error_t retval = READSTAT_OK;
1572
5.31k
    readstat_io_t *io = parser->io;
1573
5.31k
    sav_file_header_record_t header;
1574
5.31k
    sav_ctx_t *ctx = NULL;
1575
5.31k
    size_t file_size = 0;
1576
    
1577
5.31k
    if (io->open(path, io->io_ctx) == -1) {
1578
0
        return READSTAT_ERROR_OPEN;
1579
0
    }
1580
1581
5.31k
    file_size = io->seek(0, READSTAT_SEEK_END, io->io_ctx);
1582
5.31k
    if (file_size == -1) {
1583
0
        retval = READSTAT_ERROR_SEEK;
1584
0
        goto cleanup;
1585
0
    }
1586
1587
5.31k
    if (io->seek(0, READSTAT_SEEK_SET, io->io_ctx) == -1) {
1588
0
        retval = READSTAT_ERROR_SEEK;
1589
0
        goto cleanup;
1590
0
    }
1591
1592
5.31k
    if (io->read(&header, sizeof(sav_file_header_record_t), io->io_ctx) < sizeof(sav_file_header_record_t)) {
1593
18
        retval = READSTAT_ERROR_READ;
1594
18
        goto cleanup;
1595
18
    }
1596
1597
5.29k
    ctx = sav_ctx_init(&header, io);
1598
5.29k
    if (ctx == NULL) {
1599
31
        retval = READSTAT_ERROR_PARSE;
1600
31
        goto cleanup;
1601
31
    }
1602
1603
5.26k
    ctx->handle = parser->handlers;
1604
5.26k
    ctx->input_encoding = parser->input_encoding;
1605
5.26k
    ctx->output_encoding = parser->output_encoding;
1606
5.26k
    ctx->user_ctx = user_ctx;
1607
5.26k
    ctx->file_size = file_size;
1608
5.26k
    if (parser->row_offset > 0)
1609
0
        ctx->row_offset = parser->row_offset;
1610
5.26k
    if (ctx->record_count >= 0) {
1611
3.83k
        int record_count_after_skipping = ctx->record_count - ctx->row_offset;
1612
3.83k
        if (record_count_after_skipping < 0) {
1613
0
            record_count_after_skipping = 0;
1614
0
            ctx->row_offset = ctx->record_count;
1615
0
        }
1616
3.83k
        ctx->row_limit = record_count_after_skipping;
1617
3.83k
        if (parser->row_limit > 0 && parser->row_limit < record_count_after_skipping) 
1618
0
            ctx->row_limit = parser->row_limit;
1619
3.83k
    } else if (parser->row_limit > 0) {
1620
0
        ctx->row_limit = parser->row_limit;
1621
0
    }
1622
    
1623
    /* ignore errors */
1624
5.26k
    sav_parse_timestamp(ctx, &header);
1625
1626
5.26k
    if ((retval = sav_parse_records_pass1(ctx)) != READSTAT_OK)
1627
993
        goto cleanup;
1628
    
1629
4.27k
    if (io->seek(sizeof(sav_file_header_record_t), READSTAT_SEEK_SET, io->io_ctx) == -1) {
1630
0
        retval = READSTAT_ERROR_SEEK;
1631
0
        goto cleanup;
1632
0
    }
1633
1634
4.27k
    if ((retval = sav_update_progress(ctx)) != READSTAT_OK)
1635
0
        goto cleanup;
1636
1637
4.27k
    if ((retval = sav_parse_records_pass2(ctx)) != READSTAT_OK)
1638
2.66k
        goto cleanup;
1639
 
1640
1.60k
    if ((retval = sav_set_n_segments_and_var_count(ctx)) != READSTAT_OK)
1641
32
        goto cleanup;
1642
1643
1.57k
    if (ctx->var_count == 0) {
1644
9
        retval = READSTAT_ERROR_PARSE;
1645
9
        goto cleanup;
1646
9
    }
1647
1648
1.56k
    if (ctx->handle.metadata) {
1649
1.56k
        readstat_metadata_t metadata = {
1650
1.56k
            .row_count = ctx->record_count < 0 ? -1 : ctx->row_limit,
1651
1.56k
            .var_count = ctx->var_count,
1652
1.56k
            .file_encoding = ctx->input_encoding,
1653
1.56k
            .file_format_version = ctx->format_version,
1654
1.56k
            .creation_time = ctx->timestamp,
1655
1.56k
            .modified_time = ctx->timestamp,
1656
1.56k
            .compression = ctx->compression,
1657
1.56k
            .endianness = ctx->endianness
1658
1.56k
        };
1659
1.56k
        if ((retval = readstat_convert(ctx->file_label, sizeof(ctx->file_label),
1660
1.56k
                        header.file_label, sizeof(header.file_label), ctx->converter)) != READSTAT_OK)
1661
1
            goto cleanup;
1662
1663
1.56k
        metadata.file_label = ctx->file_label;
1664
1665
1.56k
        if (ctx->handle.metadata(&metadata, ctx->user_ctx) != READSTAT_HANDLER_OK) {
1666
0
            retval = READSTAT_ERROR_USER_ABORT;
1667
0
            goto cleanup;
1668
0
        }
1669
1.56k
    }
1670
1671
1.56k
    if ((retval = sav_parse_variable_display_parameter_record(ctx)) != READSTAT_OK)
1672
36
        goto cleanup;
1673
1674
1.52k
    if ((retval = sav_handle_variables(ctx)) != READSTAT_OK)
1675
0
        goto cleanup;
1676
1677
1.52k
    if ((retval = sav_handle_fweight(ctx)) != READSTAT_OK)
1678
0
        goto cleanup;
1679
1680
1.52k
    if (ctx->handle.value) {
1681
1.52k
        retval = sav_read_data(ctx);
1682
1.52k
    }
1683
    
1684
5.31k
cleanup:
1685
5.31k
    io->close(io->io_ctx);
1686
5.31k
    if (ctx)
1687
5.26k
        sav_ctx_free(ctx);
1688
    
1689
5.31k
    return retval;
1690
1.52k
}