Coverage Report

Created: 2025-12-11 06:21

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/readstat/src/sas/readstat_sas.c
Line
Count
Source
1
2
#include <stdio.h>
3
#include <stdlib.h>
4
#include <errno.h>
5
#include <string.h>
6
#include <math.h>
7
#include <time.h>
8
#include <limits.h>
9
#include <inttypes.h>
10
11
#include "readstat_sas.h"
12
#include "../readstat_iconv.h"
13
#include "../readstat_convert.h"
14
#include "../readstat_writer.h"
15
16
0
#define SAS_FILE_HEADER_SIZE_32BIT 1024
17
0
#define SAS_FILE_HEADER_SIZE_64BIT 8192
18
0
#define SAS_DEFAULT_PAGE_SIZE      4096
19
20
#define SAS_DEFAULT_STRING_ENCODING "WINDOWS-1252"
21
22
unsigned char sas7bdat_magic_number[32] = {
23
    0x00, 0x00, 0x00, 0x00,   0x00, 0x00, 0x00, 0x00,
24
    0x00, 0x00, 0x00, 0x00,   0xc2, 0xea, 0x81, 0x60,
25
    0xb3, 0x14, 0x11, 0xcf,   0xbd, 0x92, 0x08, 0x00,
26
    0x09, 0xc7, 0x31, 0x8c,   0x18, 0x1f, 0x10, 0x11
27
};
28
29
unsigned char sas7bcat_magic_number[32] = {
30
    0x00, 0x00, 0x00, 0x00,   0x00, 0x00, 0x00, 0x00,
31
    0x00, 0x00, 0x00, 0x00,   0xc2, 0xea, 0x81, 0x63,
32
    0xb3, 0x14, 0x11, 0xcf,   0xbd, 0x92, 0x08, 0x00,
33
    0x09, 0xc7, 0x31, 0x8c,   0x18, 0x1f, 0x10, 0x11
34
};
35
36
/* This table is cobbled together from extant files and:
37
 * https://support.sas.com/documentation/cdl/en/nlsref/61893/HTML/default/viewer.htm#a002607278.htm
38
 * https://support.sas.com/documentation/onlinedoc/dfdmstudio/2.6/dmpdmsug/Content/dfU_Encodings_SAS.html
39
 *
40
 * Discrepancies form the official documentation are noted with a comment. It
41
 * appears that in some instances that SAS software uses a newer encoding than
42
 * what's listed in the docs. In these cases the encoding used by ReadStat 
43
 * represents the author's best guess.
44
 */
45
static readstat_charset_entry_t _charset_table[] = { 
46
    { .code = 0,     .name = SAS_DEFAULT_STRING_ENCODING },
47
    { .code = 20,    .name = "UTF-8" },
48
    { .code = 28,    .name = "US-ASCII" },
49
    { .code = 29,    .name = "ISO-8859-1" },
50
    { .code = 30,    .name = "ISO-8859-2" },
51
    { .code = 31,    .name = "ISO-8859-3" },
52
    { .code = 32,    .name = "ISO-8859-4" },
53
    { .code = 33,    .name = "ISO-8859-5" },
54
    { .code = 34,    .name = "ISO-8859-6" },
55
    { .code = 35,    .name = "ISO-8859-7" },
56
    { .code = 36,    .name = "ISO-8859-8" },
57
    { .code = 37,    .name = "ISO-8859-9" },
58
    { .code = 39,    .name = "ISO-8859-11" },
59
    { .code = 40,    .name = "ISO-8859-15" },
60
    { .code = 41,    .name = "CP437" },
61
    { .code = 42,    .name = "CP850" },
62
    { .code = 43,    .name = "CP852" },
63
    { .code = 44,    .name = "CP857" },
64
    { .code = 45,    .name = "CP858" },
65
    { .code = 46,    .name = "CP862" },
66
    { .code = 47,    .name = "CP864" },
67
    { .code = 48,    .name = "CP865" },
68
    { .code = 49,    .name = "CP866" },
69
    { .code = 50,    .name = "CP869" },
70
    { .code = 51,    .name = "CP874" },
71
    { .code = 52,    .name = "CP921" },
72
    { .code = 53,    .name = "CP922" },
73
    { .code = 54,    .name = "CP1129" },
74
    { .code = 55,    .name = "CP720" },
75
    { .code = 56,    .name = "CP737" },
76
    { .code = 57,    .name = "CP775" },
77
    { .code = 58,    .name = "CP860" },
78
    { .code = 59,    .name = "CP863" },
79
    { .code = 60,    .name = "WINDOWS-1250" },
80
    { .code = 61,    .name = "WINDOWS-1251" },
81
    { .code = 62,    .name = "WINDOWS-1252" },
82
    { .code = 63,    .name = "WINDOWS-1253" },
83
    { .code = 64,    .name = "WINDOWS-1254" },
84
    { .code = 65,    .name = "WINDOWS-1255" },
85
    { .code = 66,    .name = "WINDOWS-1256" },
86
    { .code = 67,    .name = "WINDOWS-1257" },
87
    { .code = 68,    .name = "WINDOWS-1258" },
88
    { .code = 69,    .name = "MACROMAN" },
89
    { .code = 70,    .name = "MACARABIC" },
90
    { .code = 71,    .name = "MACHEBREW" },
91
    { .code = 72,    .name = "MACGREEK" },
92
    { .code = 73,    .name = "MACTHAI" },
93
    { .code = 75,    .name = "MACTURKISH" },
94
    { .code = 76,    .name = "MACUKRAINE" },
95
    { .code = 118,   .name = "CP950" },
96
    { .code = 119,   .name = "EUC-TW" },
97
    { .code = 123,   .name = "BIG-5" },
98
    { .code = 125,   .name = "GB18030" }, // "euc-cn" in SAS
99
    { .code = 126,   .name = "WINDOWS-936" }, // "zwin"
100
    { .code = 128,   .name = "CP1381" }, // "zpce"
101
    { .code = 134,   .name = "EUC-JP" },
102
    { .code = 136,   .name = "CP949" },
103
    { .code = 137,   .name = "CP942" },
104
    { .code = 138,   .name = "CP932" }, // "shift-jis" in SAS
105
    { .code = 140,   .name = "EUC-KR" },
106
    { .code = 141,   .name = "CP949" }, // "kpce"
107
    { .code = 142,   .name = "CP949" }, // "kwin"
108
    { .code = 163,   .name = "MACICELAND" },
109
    { .code = 167,   .name = "ISO-2022-JP" },
110
    { .code = 168,   .name = "ISO-2022-KR" },
111
    { .code = 169,   .name = "ISO-2022-CN" },
112
    { .code = 172,   .name = "ISO-2022-CN-EXT" },
113
    { .code = 204,   .name = SAS_DEFAULT_STRING_ENCODING }, // "any" in SAS
114
    { .code = 205,   .name = "GB18030" },
115
    { .code = 227,   .name = "ISO-8859-14" },
116
    { .code = 242,   .name = "ISO-8859-13" },
117
    { .code = 245,   .name = "MACCROATIAN" },
118
    { .code = 246,   .name = "MACCYRILLIC" },
119
    { .code = 247,   .name = "MACROMANIA" },
120
    { .code = 248,   .name = "SHIFT_JISX0213" },
121
};
122
123
4.43k
static time_t sas_epoch(void) {
124
4.43k
    return - 3653 * 86400; // seconds between 01-01-1960 and 01-01-1970
125
4.43k
}
126
127
8.08k
static time_t sas_convert_time(double time, double time_diff, time_t epoch) {
128
8.08k
    time -= time_diff;
129
8.08k
    time += epoch;
130
8.08k
    if (isnan(time))
131
940
        return 0;
132
7.14k
    if (time > (double)LONG_MAX)
133
1.69k
        return LONG_MAX;
134
5.45k
    if (time < (double)LONG_MIN)
135
1.32k
        return LONG_MIN;
136
4.12k
    return time;
137
5.45k
}
138
139
91.3k
uint64_t sas_read8(const char *data, int bswap) {
140
91.3k
    uint64_t tmp;
141
91.3k
    memcpy(&tmp, data, 8);
142
91.3k
    return bswap ? byteswap8(tmp) : tmp;
143
91.3k
}
144
145
428k
uint32_t sas_read4(const char *data, int bswap) {
146
428k
    uint32_t tmp;
147
428k
    memcpy(&tmp, data, 4);
148
428k
    return bswap ? byteswap4(tmp) : tmp;
149
428k
}
150
151
12.6M
uint16_t sas_read2(const char *data, int bswap) {
152
12.6M
    uint16_t tmp;
153
12.6M
    memcpy(&tmp, data, 2);
154
12.6M
    return bswap ? byteswap2(tmp) : tmp;
155
12.6M
}
156
157
6.51k
size_t sas_subheader_remainder(size_t len, size_t signature_len) {
158
6.51k
    return len - (4+2*signature_len);
159
6.51k
}
160
161
readstat_error_t sas_read_header(readstat_io_t *io, sas_header_info_t *hinfo, 
162
4.43k
        readstat_error_handler error_handler, void *user_ctx) {
163
4.43k
    sas_header_start_t  header_start;
164
4.43k
    sas_header_end_t    header_end;
165
4.43k
    int retval = READSTAT_OK;
166
4.43k
    char error_buf[1024];
167
4.43k
    time_t epoch = sas_epoch();
168
169
4.43k
    if (io->read(&header_start, sizeof(sas_header_start_t), io->io_ctx) < sizeof(sas_header_start_t)) {
170
40
        retval = READSTAT_ERROR_READ;
171
40
        goto cleanup;
172
40
    }
173
4.39k
    if (memcmp(header_start.magic, sas7bdat_magic_number, sizeof(sas7bdat_magic_number)) != 0 &&
174
2.12k
            memcmp(header_start.magic, sas7bcat_magic_number, sizeof(sas7bcat_magic_number)) != 0) {
175
254
        retval = READSTAT_ERROR_PARSE;
176
254
        goto cleanup;
177
254
    }
178
4.14k
    if (header_start.a1 == SAS_ALIGNMENT_OFFSET_4) {
179
16
        hinfo->pad1 = 4;
180
16
    }
181
4.14k
    if (header_start.a2 == SAS_ALIGNMENT_OFFSET_4) {
182
1.70k
        hinfo->u64 = 1;
183
1.70k
    }
184
4.14k
    int bswap = 0;
185
4.14k
    if (header_start.endian == SAS_ENDIAN_BIG) {
186
2.43k
        bswap = machine_is_little_endian();
187
2.43k
        hinfo->little_endian = 0;
188
2.43k
    } else if (header_start.endian == SAS_ENDIAN_LITTLE) {
189
1.67k
        bswap = !machine_is_little_endian();
190
1.67k
        hinfo->little_endian = 1;
191
1.67k
    } else {
192
23
        retval = READSTAT_ERROR_PARSE;
193
23
        goto cleanup;
194
23
    }
195
4.11k
    int i;
196
32.1k
    for (i=0; i<sizeof(_charset_table)/sizeof(_charset_table[0]); i++) {
197
32.1k
        if (header_start.encoding == _charset_table[i].code) {
198
4.11k
            hinfo->encoding = _charset_table[i].name;
199
4.11k
            break;
200
4.11k
        }
201
32.1k
    }
202
4.11k
    if (hinfo->encoding == NULL) {
203
6
        if (error_handler) {
204
0
            snprintf(error_buf, sizeof(error_buf), "Unsupported character set code: %d", header_start.encoding);
205
0
            error_handler(error_buf, user_ctx);
206
0
        }
207
6
        retval = READSTAT_ERROR_UNSUPPORTED_CHARSET;
208
6
        goto cleanup;
209
6
    }
210
4.11k
    memcpy(hinfo->table_name, header_start.table_name, sizeof(header_start.table_name));
211
4.11k
    if (io->seek(hinfo->pad1, READSTAT_SEEK_CUR, io->io_ctx) == -1) {
212
7
        retval = READSTAT_ERROR_SEEK;
213
7
        goto cleanup;
214
7
    }
215
216
4.10k
    double creation_time, modification_time, creation_time_diff, modification_time_diff;
217
218
4.10k
    if (io->read(&creation_time, sizeof(double), io->io_ctx) < sizeof(double)) {
219
22
        retval = READSTAT_ERROR_READ;
220
22
        goto cleanup;
221
22
    }
222
4.08k
    if (bswap)
223
2.41k
        creation_time = byteswap_double(creation_time);
224
225
4.08k
    if (io->read(&modification_time, sizeof(double), io->io_ctx) < sizeof(double)) {
226
13
        retval = READSTAT_ERROR_READ;
227
13
        goto cleanup;
228
13
    }
229
4.06k
    if (bswap)
230
2.40k
        modification_time = byteswap_double(modification_time);
231
232
4.06k
    if (io->read(&creation_time_diff, sizeof(double), io->io_ctx) < sizeof(double)) {
233
12
        retval = READSTAT_ERROR_READ;
234
12
        goto cleanup;
235
12
    }
236
4.05k
    if (bswap)
237
2.40k
        creation_time_diff = byteswap_double(creation_time_diff);
238
    
239
4.05k
    if (io->read(&modification_time_diff, sizeof(double), io->io_ctx) < sizeof(double)) {
240
13
        retval = READSTAT_ERROR_READ;
241
13
        goto cleanup;
242
13
    }
243
4.04k
    if (bswap)
244
2.39k
        modification_time_diff = byteswap_double(modification_time_diff);
245
    
246
4.04k
    hinfo->creation_time = sas_convert_time(creation_time, creation_time_diff, epoch);
247
4.04k
    hinfo->modification_time = sas_convert_time(modification_time, modification_time_diff, epoch);
248
249
4.04k
    uint32_t header_size, page_size;
250
251
4.04k
    if (io->read(&header_size, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) {
252
13
        retval = READSTAT_ERROR_READ;
253
13
        goto cleanup;
254
13
    }
255
4.03k
    if (io->read(&page_size, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) {
256
6
        retval = READSTAT_ERROR_READ;
257
6
        goto cleanup;
258
6
    }
259
260
4.02k
    hinfo->header_size = bswap ? byteswap4(header_size) : header_size;
261
4.02k
    hinfo->page_size = bswap ? byteswap4(page_size) : page_size;
262
263
4.02k
    if (hinfo->header_size < 1024 || hinfo->page_size < 1024) {
264
43
        retval = READSTAT_ERROR_PARSE;
265
43
        goto cleanup;
266
43
    }
267
3.98k
    if (hinfo->header_size > (1<<24) || hinfo->page_size > (1<<24)) {
268
73
        retval = READSTAT_ERROR_PARSE;
269
73
        goto cleanup;
270
73
    }
271
272
3.90k
    if (hinfo->u64) {
273
1.63k
        hinfo->page_header_size = SAS_PAGE_HEADER_SIZE_64BIT;
274
1.63k
        hinfo->subheader_pointer_size = SAS_SUBHEADER_POINTER_SIZE_64BIT;
275
2.27k
    } else {
276
2.27k
        hinfo->page_header_size = SAS_PAGE_HEADER_SIZE_32BIT;
277
2.27k
        hinfo->subheader_pointer_size = SAS_SUBHEADER_POINTER_SIZE_32BIT;
278
2.27k
    }
279
280
3.90k
    if (hinfo->u64) {
281
1.63k
        uint64_t page_count;
282
1.63k
        if (io->read(&page_count, sizeof(uint64_t), io->io_ctx) < sizeof(uint64_t)) {
283
27
            retval = READSTAT_ERROR_READ;
284
27
            goto cleanup;
285
27
        }
286
1.60k
        hinfo->page_count = bswap ? byteswap8(page_count) : page_count;
287
2.27k
    } else {
288
2.27k
        uint32_t page_count;
289
2.27k
        if (io->read(&page_count, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) {
290
44
            retval = READSTAT_ERROR_READ;
291
44
            goto cleanup;
292
44
        }
293
2.23k
        hinfo->page_count = bswap ? byteswap4(page_count) : page_count;
294
2.23k
    }
295
3.83k
    if (hinfo->page_count > (1<<24)) {
296
20
        retval = READSTAT_ERROR_PARSE;
297
20
        goto cleanup;
298
20
    }
299
    
300
3.81k
    if (io->seek(8, READSTAT_SEEK_CUR, io->io_ctx) == -1) {
301
142
        retval = READSTAT_ERROR_SEEK;
302
142
        if (error_handler) {
303
0
            snprintf(error_buf, sizeof(error_buf), "ReadStat: Failed to seek forward by %d", 8);
304
0
            error_handler(error_buf, user_ctx);
305
0
        }
306
142
        goto cleanup;
307
142
    }
308
3.67k
    if (io->read(&header_end, sizeof(sas_header_end_t), io->io_ctx) < sizeof(sas_header_end_t)) {
309
33
        retval = READSTAT_ERROR_READ;
310
33
        goto cleanup;
311
33
    }
312
3.64k
    char major, revision_tag;
313
3.64k
    int minor, revision;
314
3.64k
    if (sscanf(header_end.release, "%c.%04d%c%1d", &major, &minor, &revision_tag, &revision) != 4) {
315
6
        retval = READSTAT_ERROR_PARSE;
316
6
        goto cleanup;
317
6
    }
318
319
3.63k
    if (major >= '1' && major <= '9') {
320
3.59k
        hinfo->major_version = major - '0';
321
3.59k
    } else if (major == 'V') {
322
        // It appears that SAS Visual Forecaster reports the major version as "V"
323
        // Treat it as version 9 for all intents and purposes
324
8
        hinfo->major_version = 9;
325
34
    } else {
326
34
        retval = READSTAT_ERROR_PARSE;
327
34
        goto cleanup;
328
34
    }
329
    // revision_tag is usually M, but J has been observed in the wild (not created with SAS?)
330
3.60k
    if (revision_tag != 'M' && revision_tag != 'J') {
331
26
        retval = READSTAT_ERROR_PARSE;
332
26
        goto cleanup;
333
26
    }
334
3.57k
    hinfo->minor_version = minor;
335
3.57k
    hinfo->revision = revision;
336
337
3.57k
    if ((major == '8' || major == '9') && minor == 0 && revision == 0) {
338
        /* A bit of a hack, but most SAS installations are running a minor update */
339
63
        hinfo->vendor = READSTAT_VENDOR_STAT_TRANSFER;
340
3.51k
    } else {
341
3.51k
        hinfo->vendor = READSTAT_VENDOR_SAS;
342
3.51k
    }
343
3.57k
    if (io->seek(hinfo->header_size, READSTAT_SEEK_SET, io->io_ctx) == -1) {
344
89
        retval = READSTAT_ERROR_SEEK;
345
89
        if (error_handler) {
346
0
            snprintf(error_buf, sizeof(error_buf), 
347
0
                    "ReadStat: Failed to seek to position %" PRId64, hinfo->header_size);
348
0
            error_handler(error_buf, user_ctx);
349
0
        }
350
89
        goto cleanup;
351
89
    }
352
353
4.43k
cleanup:
354
4.43k
    return retval;
355
3.57k
}
356
357
0
readstat_error_t sas_write_header(readstat_writer_t *writer, sas_header_info_t *hinfo, sas_header_start_t header_start) {
358
0
    readstat_error_t retval = READSTAT_OK;
359
0
    time_t epoch = sas_epoch();
360
361
0
    memset(header_start.table_name, ' ', sizeof(header_start.table_name));
362
363
0
    size_t table_name_len = strlen(writer->table_name);
364
0
    if (table_name_len > sizeof(header_start.table_name))
365
0
        table_name_len = sizeof(header_start.table_name);
366
367
0
    if (table_name_len) {
368
0
        memcpy(header_start.table_name, writer->table_name, table_name_len);
369
0
    } else {
370
0
        memcpy(header_start.table_name, "DATASET", sizeof("DATASET")-1);
371
0
    }
372
373
0
    retval = readstat_write_bytes(writer, &header_start, sizeof(sas_header_start_t));
374
0
    if (retval != READSTAT_OK)
375
0
        goto cleanup;
376
377
0
    retval = readstat_write_zeros(writer, hinfo->pad1);
378
0
    if (retval != READSTAT_OK)
379
0
        goto cleanup;
380
381
0
    double creation_time = hinfo->creation_time - epoch;
382
383
0
    retval = readstat_write_bytes(writer, &creation_time, sizeof(double));
384
0
    if (retval != READSTAT_OK)
385
0
        goto cleanup;
386
387
0
    double modification_time = hinfo->modification_time - epoch;
388
389
0
    retval = readstat_write_bytes(writer, &modification_time, sizeof(double));
390
0
    if (retval != READSTAT_OK)
391
0
        goto cleanup;
392
393
0
    retval = readstat_write_zeros(writer, 16);
394
0
    if (retval != READSTAT_OK)
395
0
        goto cleanup;
396
397
0
    uint32_t header_size = hinfo->header_size;
398
0
    uint32_t page_size = hinfo->page_size;
399
400
0
    retval = readstat_write_bytes(writer, &header_size, sizeof(uint32_t));
401
0
    if (retval != READSTAT_OK)
402
0
        goto cleanup;
403
404
0
    retval = readstat_write_bytes(writer, &page_size, sizeof(uint32_t));
405
0
    if (retval != READSTAT_OK)
406
0
        goto cleanup;
407
408
0
    if (hinfo->u64) {
409
0
        uint64_t page_count = hinfo->page_count;
410
0
        retval = readstat_write_bytes(writer, &page_count, sizeof(uint64_t));
411
0
    } else {
412
0
        uint32_t page_count = hinfo->page_count;
413
0
        retval = readstat_write_bytes(writer, &page_count, sizeof(uint32_t));
414
0
    }
415
0
    if (retval != READSTAT_OK)
416
0
        goto cleanup;
417
418
0
    retval = readstat_write_zeros(writer, 8);
419
0
    if (retval != READSTAT_OK)
420
0
        goto cleanup;
421
422
0
    sas_header_end_t header_end = {
423
0
        .host = "9.0401M6Linux"
424
0
    };
425
426
0
    char release[sizeof(header_end.release)+1] = { 0 };
427
0
    snprintf(release, sizeof(release), "%1d.%04dM0", (unsigned int)writer->version % 10, 101);
428
0
    memcpy(header_end.release, release, sizeof(header_end.release));
429
430
0
    retval = readstat_write_bytes(writer, &header_end, sizeof(sas_header_end_t));
431
0
    if (retval != READSTAT_OK)
432
0
        goto cleanup;
433
434
0
    retval = readstat_write_zeros(writer, hinfo->header_size-writer->bytes_written);
435
0
    if (retval != READSTAT_OK)
436
0
        goto cleanup;
437
438
0
cleanup:
439
0
    return retval;
440
0
}
441
442
0
sas_header_info_t *sas_header_info_init(readstat_writer_t *writer, int is_64bit) {
443
0
    sas_header_info_t *hinfo = calloc(1, sizeof(sas_header_info_t));
444
0
    hinfo->creation_time = writer->timestamp;
445
0
    hinfo->modification_time = writer->timestamp;
446
0
    hinfo->page_size = SAS_DEFAULT_PAGE_SIZE;
447
0
    hinfo->u64 = !!is_64bit;
448
449
0
    if (hinfo->u64) {
450
0
        hinfo->header_size = SAS_FILE_HEADER_SIZE_64BIT;
451
0
        hinfo->page_header_size = SAS_PAGE_HEADER_SIZE_64BIT;
452
0
        hinfo->subheader_pointer_size = SAS_SUBHEADER_POINTER_SIZE_64BIT;
453
0
    } else {
454
0
        hinfo->header_size = SAS_FILE_HEADER_SIZE_32BIT;
455
0
        hinfo->page_header_size = SAS_PAGE_HEADER_SIZE_32BIT;
456
0
        hinfo->subheader_pointer_size = SAS_SUBHEADER_POINTER_SIZE_32BIT;
457
0
    }
458
459
0
    return hinfo;
460
0
}
461
462
0
readstat_error_t sas_fill_page(readstat_writer_t *writer, sas_header_info_t *hinfo) {
463
0
    if ((writer->bytes_written - hinfo->header_size) % hinfo->page_size) {
464
0
        size_t num_zeros = (hinfo->page_size -
465
0
                (writer->bytes_written - hinfo->header_size) % hinfo->page_size);
466
0
        return readstat_write_zeros(writer, num_zeros);
467
0
    }
468
0
    return READSTAT_OK;
469
0
}
470
471
0
readstat_error_t sas_validate_name(const char *name, size_t max_len) {
472
0
    int j;
473
0
    for (j=0; name[j]; j++) {
474
0
        if (name[j] != '_' &&
475
0
                !(name[j] >= 'a' && name[j] <= 'z') &&
476
0
                !(name[j] >= 'A' && name[j] <= 'Z') &&
477
0
                !(name[j] >= '0' && name[j] <= '9')) {
478
0
            return READSTAT_ERROR_NAME_CONTAINS_ILLEGAL_CHARACTER;
479
0
        }
480
0
    }
481
0
    char first_char = name[0];
482
483
0
    if (!first_char)
484
0
        return READSTAT_ERROR_NAME_IS_ZERO_LENGTH;
485
486
0
    if (first_char != '_' &&
487
0
            !(first_char >= 'a' && first_char <= 'z') &&
488
0
            !(first_char >= 'A' && first_char <= 'Z')) {
489
0
        return READSTAT_ERROR_NAME_BEGINS_WITH_ILLEGAL_CHARACTER;
490
0
    }
491
0
    if (strcmp(name, "_N_") == 0 || strcmp(name, "_ERROR_") == 0 ||
492
0
            strcmp(name, "_NUMERIC_") == 0 || strcmp(name, "_CHARACTER_") == 0 ||
493
0
            strcmp(name, "_ALL_") == 0) {
494
0
        return READSTAT_ERROR_NAME_IS_RESERVED_WORD;
495
0
    }
496
497
0
    if (strlen(name) > max_len)
498
0
        return READSTAT_ERROR_NAME_IS_TOO_LONG;
499
500
0
    return READSTAT_OK;
501
0
}
502
503
0
readstat_error_t sas_validate_variable(const readstat_variable_t *variable) {
504
0
    return sas_validate_name(readstat_variable_get_name(variable), 32);
505
0
}
506
507
8.51k
readstat_error_t sas_validate_tag(char tag) {
508
8.51k
    if (tag == '_' || (tag >= 'A' && tag <= 'Z'))
509
4.74k
        return READSTAT_OK;
510
511
3.77k
    return READSTAT_ERROR_TAGGED_VALUE_IS_OUT_OF_RANGE;
512
8.51k
}
513
514
7.61k
void sas_assign_tag(readstat_value_t *value, uint8_t tag) {
515
    /* We accommodate two tag schemes. In the first, the tag is an ASCII code
516
     * given by uint8_t tag above. System missing is represented by an ASCII
517
     * period. In the second scheme, (tag-2) is an offset from 'A', except when
518
     * tag == 0, in which case it represents an underscore, or tag == 1, in
519
     * which case it represents system-missing.
520
     */
521
7.61k
    if (tag == 0) {
522
3.16k
        tag = '_';
523
4.45k
    } else if (tag >= 2 && tag < 28) {
524
981
        tag = 'A' + (tag - 2);
525
981
    }
526
7.61k
    if (sas_validate_tag(tag) == READSTAT_OK) {
527
4.35k
        value->tag = tag;
528
4.35k
        value->is_tagged_missing = 1;
529
4.35k
    } else {
530
3.25k
        value->tag = 0;
531
3.25k
        value->is_system_missing = 1;
532
3.25k
    }
533
7.61k
}