Coverage Report

Created: 2025-10-10 06:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/readstat/src/stata/readstat_dta.c
Line
Count
Source
1
#include <stdlib.h>
2
#include <math.h>
3
#include <stdint.h>
4
#include <string.h>
5
#include <sys/types.h>
6
7
#include "../readstat.h"
8
#include "../readstat_iconv.h"
9
#include "../readstat_malloc.h"
10
#include "../readstat_bits.h"
11
12
#include "readstat_dta.h"
13
14
7.58k
#define DTA_MIN_VERSION 104
15
3.78k
#define DTA_MAX_VERSION 119
16
17
4.25k
dta_ctx_t *dta_ctx_alloc(readstat_io_t *io) {
18
4.25k
    dta_ctx_t *ctx = calloc(1, sizeof(dta_ctx_t));
19
4.25k
    if (ctx == NULL) {
20
0
        return NULL;
21
0
    }
22
23
4.25k
    ctx->io = io;
24
4.25k
    ctx->initialized = 0;
25
26
4.25k
    return ctx;
27
4.25k
}
28
29
readstat_error_t dta_ctx_init(dta_ctx_t *ctx, uint32_t nvar, uint64_t nobs,
30
        unsigned char byteorder, unsigned char ds_format,
31
3.79k
        const char *input_encoding, const char *output_encoding) {
32
3.79k
    readstat_error_t retval = READSTAT_OK;
33
3.79k
    int machine_byteorder = DTA_HILO;
34
3.79k
    if (ds_format < DTA_MIN_VERSION || ds_format > DTA_MAX_VERSION)
35
14
        return READSTAT_ERROR_UNSUPPORTED_FILE_FORMAT_VERSION;
36
37
3.77k
    if (machine_is_little_endian()) {
38
3.77k
        machine_byteorder = DTA_LOHI;
39
3.77k
    }
40
41
3.77k
    ctx->bswap = (byteorder != machine_byteorder);
42
3.77k
    ctx->ds_format = ds_format;
43
3.77k
    ctx->endianness = byteorder == DTA_LOHI ? READSTAT_ENDIAN_LITTLE : READSTAT_ENDIAN_BIG;
44
45
3.77k
    ctx->nvar = nvar;
46
3.77k
    ctx->nobs = nobs;
47
48
3.77k
    if (ctx->nvar) {
49
2.87k
        if ((ctx->variables = readstat_calloc(ctx->nvar, sizeof(readstat_variable_t *))) == NULL) {
50
87
            retval = READSTAT_ERROR_MALLOC;
51
87
            goto cleanup;
52
87
        }
53
2.87k
    }
54
55
3.69k
    ctx->machine_is_twos_complement = READSTAT_MACHINE_IS_TWOS_COMPLEMENT;
56
57
3.69k
    if (ds_format < 105) {
58
688
        ctx->fmtlist_entry_len = 7;
59
3.00k
    } else if (ds_format < 114) {
60
865
        ctx->fmtlist_entry_len = 12;
61
2.13k
    } else if (ds_format < 118) {
62
1.74k
        ctx->fmtlist_entry_len = 49;
63
1.74k
    } else {
64
390
        ctx->fmtlist_entry_len = 57;
65
390
    }
66
    
67
3.69k
    if (ds_format >= 117) {
68
2.09k
        ctx->typlist_version = 117;
69
2.09k
    } else if (ds_format >= 111) {
70
320
        ctx->typlist_version = 111;
71
1.28k
    } else {
72
1.28k
        ctx->typlist_version = 0;
73
1.28k
    }
74
75
3.69k
    if (ds_format >= 118) {
76
390
        ctx->data_label_len_len = 2;
77
390
        ctx->strl_v_len = 2;
78
390
        ctx->strl_o_len = 6;
79
3.30k
    } else if (ds_format >= 117) {
80
1.70k
        ctx->data_label_len_len = 1;
81
1.70k
        ctx->strl_v_len = 4;
82
1.70k
        ctx->strl_o_len = 4;
83
1.70k
    }
84
85
3.69k
    if (ds_format < 105) {
86
688
        ctx->expansion_len_len = 0;
87
3.00k
    } else if (ds_format < 110) {
88
592
        ctx->expansion_len_len = 2;
89
2.41k
    } else {
90
2.41k
        ctx->expansion_len_len = 4;
91
2.41k
    }
92
    
93
3.69k
    if (ds_format < 110) {
94
1.28k
        ctx->lbllist_entry_len = 9;
95
1.28k
        ctx->variable_name_len = 9;
96
1.28k
        ctx->ch_metadata_len = 9;
97
2.41k
    } else if (ds_format < 118) {
98
2.02k
        ctx->lbllist_entry_len = 33;
99
2.02k
        ctx->variable_name_len = 33;
100
2.02k
        ctx->ch_metadata_len = 33;
101
2.02k
    } else {
102
390
        ctx->lbllist_entry_len = 129;
103
390
        ctx->variable_name_len = 129;
104
390
        ctx->ch_metadata_len = 129;
105
390
    }
106
107
3.69k
    if (ds_format < 108) {
108
1.27k
        ctx->variable_labels_entry_len = 32;
109
1.27k
        ctx->data_label_len = 32;
110
2.41k
    } else if (ds_format < 118) {
111
2.02k
        ctx->variable_labels_entry_len = 81;
112
2.02k
        ctx->data_label_len = 81;
113
2.02k
    } else {
114
390
        ctx->variable_labels_entry_len = 321;
115
390
        ctx->data_label_len = 321;
116
390
    }
117
118
3.69k
    if (ds_format < 105) {
119
688
        ctx->timestamp_len = 0;
120
688
        ctx->value_label_table_len_len = 2;
121
688
        ctx->value_label_table_labname_len = 12;
122
688
        ctx->value_label_table_padding_len = 2;
123
3.00k
    } else {
124
3.00k
        ctx->timestamp_len = 18;
125
3.00k
        ctx->value_label_table_len_len = 4;
126
3.00k
        if (ds_format < 118) {
127
2.61k
            ctx->value_label_table_labname_len = 33;
128
2.61k
        } else {
129
390
            ctx->value_label_table_labname_len = 129;
130
390
        }
131
3.00k
        ctx->value_label_table_padding_len = 3;
132
3.00k
    }
133
134
3.69k
    if (ds_format < 117) {
135
1.60k
        ctx->typlist_entry_len = 1;
136
1.60k
        ctx->file_is_xmlish = 0;
137
2.09k
    } else {
138
2.09k
        ctx->typlist_entry_len = 2;
139
2.09k
        ctx->file_is_xmlish = 1;
140
2.09k
    }
141
142
3.69k
    if (ds_format < 113) {
143
1.30k
        ctx->max_int8 = DTA_OLD_MAX_INT8;
144
1.30k
        ctx->max_int16 = DTA_OLD_MAX_INT16;
145
1.30k
        ctx->max_int32 = DTA_OLD_MAX_INT32;
146
1.30k
        ctx->max_float = DTA_OLD_MAX_FLOAT;
147
1.30k
        ctx->max_double = DTA_OLD_MAX_DOUBLE;
148
2.39k
    } else {
149
2.39k
        ctx->max_int8 = DTA_113_MAX_INT8;
150
2.39k
        ctx->max_int16 = DTA_113_MAX_INT16;
151
2.39k
        ctx->max_int32 = DTA_113_MAX_INT32;
152
2.39k
        ctx->max_float = DTA_113_MAX_FLOAT;
153
2.39k
        ctx->max_double = DTA_113_MAX_DOUBLE;
154
155
2.39k
        ctx->supports_tagged_missing = 1;
156
2.39k
    }
157
158
3.69k
    if (output_encoding) {
159
3.69k
        if (input_encoding) {
160
0
            ctx->converter = iconv_open(output_encoding, input_encoding);
161
3.69k
        } else if (ds_format < 118) {
162
3.30k
            ctx->converter = iconv_open(output_encoding, "WINDOWS-1252");
163
3.30k
        } else if (strcmp(output_encoding, "UTF-8") != 0) {
164
0
            ctx->converter = iconv_open(output_encoding, "UTF-8");
165
0
        }
166
3.69k
        if (ctx->converter == (iconv_t)-1) {
167
0
            ctx->converter = NULL;
168
0
            retval = READSTAT_ERROR_UNSUPPORTED_CHARSET;
169
0
            goto cleanup;
170
0
        }
171
3.69k
    }
172
173
3.69k
    if (ds_format < 119) {
174
3.61k
        ctx->srtlist_len = (ctx->nvar + 1) * sizeof(int16_t);
175
3.61k
    } else {
176
75
        ctx->srtlist_len = (ctx->nvar + 1) * sizeof(int32_t);
177
75
    }
178
179
3.69k
    if ((ctx->srtlist = readstat_malloc(ctx->srtlist_len)) == NULL) {
180
0
        retval = READSTAT_ERROR_MALLOC;
181
0
        goto cleanup;
182
0
    }
183
184
3.69k
    if (ctx->nvar > 0) {
185
2.78k
        ctx->typlist_len = ctx->nvar * sizeof(uint16_t);
186
2.78k
        ctx->varlist_len = ctx->variable_name_len * ctx->nvar * sizeof(char);
187
2.78k
        ctx->fmtlist_len = ctx->fmtlist_entry_len * ctx->nvar * sizeof(char);
188
2.78k
        ctx->lbllist_len = ctx->lbllist_entry_len * ctx->nvar * sizeof(char);
189
2.78k
        ctx->variable_labels_len = ctx->variable_labels_entry_len * ctx->nvar * sizeof(char);
190
191
2.78k
        if ((ctx->typlist = readstat_malloc(ctx->typlist_len)) == NULL) {
192
0
            retval = READSTAT_ERROR_MALLOC;
193
0
            goto cleanup;
194
0
        }
195
2.78k
        if ((ctx->varlist = readstat_malloc(ctx->varlist_len)) == NULL) {
196
23
            retval = READSTAT_ERROR_MALLOC;
197
23
            goto cleanup;
198
23
        }
199
2.76k
        if ((ctx->fmtlist = readstat_malloc(ctx->fmtlist_len)) == NULL) {
200
0
            retval = READSTAT_ERROR_MALLOC;
201
0
            goto cleanup;
202
0
        }
203
2.76k
        if ((ctx->lbllist = readstat_malloc(ctx->lbllist_len)) == NULL) {
204
0
            retval = READSTAT_ERROR_MALLOC;
205
0
            goto cleanup;
206
0
        }
207
2.76k
        if ((ctx->variable_labels = readstat_malloc(ctx->variable_labels_len)) == NULL) {
208
3
            retval = READSTAT_ERROR_MALLOC;
209
3
            goto cleanup;
210
3
        }
211
2.76k
    }
212
213
3.66k
    ctx->initialized = 1;
214
215
3.77k
cleanup:
216
3.77k
    return retval;
217
3.66k
}
218
219
4.25k
void dta_ctx_free(dta_ctx_t *ctx) {
220
4.25k
    if (ctx->typlist)
221
2.78k
        free(ctx->typlist);
222
4.25k
    if (ctx->varlist)
223
2.76k
        free(ctx->varlist);
224
4.25k
    if (ctx->srtlist)
225
3.69k
        free(ctx->srtlist);
226
4.25k
    if (ctx->fmtlist)
227
2.76k
        free(ctx->fmtlist);
228
4.25k
    if (ctx->lbllist)
229
2.76k
        free(ctx->lbllist);
230
4.25k
    if (ctx->variable_labels)
231
2.76k
        free(ctx->variable_labels);
232
4.25k
    if (ctx->converter)
233
3.30k
        iconv_close(ctx->converter);
234
4.25k
    if (ctx->data_label)
235
3.41k
        free(ctx->data_label);
236
4.25k
    if (ctx->variables) {
237
2.78k
        int i;
238
52.2M
        for (i=0; i<ctx->nvar; i++) {
239
52.2M
            if (ctx->variables[i])
240
37.8k
                free(ctx->variables[i]);
241
52.2M
        }
242
2.78k
        free(ctx->variables);
243
2.78k
    }
244
4.25k
    if (ctx->strls) {
245
804
        int i;
246
34.5k
        for (i=0; i<ctx->strls_count; i++) {
247
33.7k
            free(ctx->strls[i]);
248
33.7k
        }
249
804
        free(ctx->strls);
250
804
    }
251
4.25k
    free(ctx);
252
4.25k
}
253
254
readstat_error_t dta_type_info(uint16_t typecode, dta_ctx_t *ctx,
255
146k
        size_t *max_len, readstat_type_t *out_type) {
256
146k
    readstat_error_t retval = READSTAT_OK;
257
146k
    size_t len = 0;
258
146k
    readstat_type_t type = READSTAT_TYPE_STRING;
259
146k
    if (ctx->typlist_version == 111) {
260
36.1k
        switch (typecode) {
261
877
            case DTA_111_TYPE_CODE_INT8:
262
877
                len = 1; type = READSTAT_TYPE_INT8; break;
263
434
            case DTA_111_TYPE_CODE_INT16:
264
434
                len = 2; type = READSTAT_TYPE_INT16; break;
265
440
            case DTA_111_TYPE_CODE_INT32:
266
440
                len = 4; type = READSTAT_TYPE_INT32; break;
267
487
            case DTA_111_TYPE_CODE_FLOAT:
268
487
                len = 4; type = READSTAT_TYPE_FLOAT; break;
269
711
            case DTA_111_TYPE_CODE_DOUBLE:
270
711
                len = 8; type = READSTAT_TYPE_DOUBLE; break;
271
33.2k
            default:
272
33.2k
                len = typecode; type = READSTAT_TYPE_STRING; break;
273
36.1k
        }
274
110k
    } else if (ctx->typlist_version == 117) {
275
61.8k
        switch (typecode) {
276
588
            case DTA_117_TYPE_CODE_INT8:
277
588
                len = 1; type = READSTAT_TYPE_INT8; break;
278
895
            case DTA_117_TYPE_CODE_INT16:
279
895
                len = 2; type = READSTAT_TYPE_INT16; break;
280
1.53k
            case DTA_117_TYPE_CODE_INT32:
281
1.53k
                len = 4; type = READSTAT_TYPE_INT32; break;
282
854
            case DTA_117_TYPE_CODE_FLOAT:
283
854
                len = 4; type = READSTAT_TYPE_FLOAT; break;
284
837
            case DTA_117_TYPE_CODE_DOUBLE:
285
837
                len = 8; type = READSTAT_TYPE_DOUBLE; break;
286
52.5k
            case DTA_117_TYPE_CODE_STRL:
287
52.5k
                len = 8; type = READSTAT_TYPE_STRING_REF; break;
288
4.62k
            default:
289
4.62k
                len = typecode; type = READSTAT_TYPE_STRING; break;
290
61.8k
        }
291
61.8k
    } else if (typecode < 0x7F) {
292
26.1k
        switch (typecode) {
293
23.3k
            case DTA_OLD_TYPE_CODE_INT8:
294
23.3k
                len = 1; type = READSTAT_TYPE_INT8; break;
295
1.07k
            case DTA_OLD_TYPE_CODE_INT16:
296
1.07k
                len = 2; type = READSTAT_TYPE_INT16; break;
297
517
            case DTA_OLD_TYPE_CODE_INT32:
298
517
                len = 4; type = READSTAT_TYPE_INT32; break;
299
640
            case DTA_OLD_TYPE_CODE_FLOAT:
300
640
                len = 4; type = READSTAT_TYPE_FLOAT; break;
301
549
            case DTA_OLD_TYPE_CODE_DOUBLE:
302
549
                len = 8; type = READSTAT_TYPE_DOUBLE; break;
303
15
            default:
304
15
                retval = READSTAT_ERROR_PARSE; break;
305
26.1k
        }
306
26.1k
    } else {
307
22.7k
        len = typecode - 0x7F;
308
22.7k
        type = READSTAT_TYPE_STRING;
309
22.7k
    }
310
    
311
146k
    if (max_len)
312
146k
        *max_len = len;
313
146k
    if (out_type)
314
109k
        *out_type = type;
315
316
146k
    return retval;
317
146k
}