/src/readstat/src/sas/readstat_sas.c
Line | Count | Source |
1 | | |
2 | | #include <stdio.h> |
3 | | #include <stdlib.h> |
4 | | #include <errno.h> |
5 | | #include <string.h> |
6 | | #include <math.h> |
7 | | #include <time.h> |
8 | | #include <limits.h> |
9 | | #include <inttypes.h> |
10 | | |
11 | | #include "readstat_sas.h" |
12 | | #include "../readstat_iconv.h" |
13 | | #include "../readstat_convert.h" |
14 | | #include "../readstat_writer.h" |
15 | | |
16 | 0 | #define SAS_FILE_HEADER_SIZE_32BIT 1024 |
17 | 0 | #define SAS_FILE_HEADER_SIZE_64BIT 8192 |
18 | 0 | #define SAS_DEFAULT_PAGE_SIZE 4096 |
19 | | |
20 | | #define SAS_DEFAULT_STRING_ENCODING "WINDOWS-1252" |
21 | | |
22 | | unsigned char sas7bdat_magic_number[32] = { |
23 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
24 | | 0x00, 0x00, 0x00, 0x00, 0xc2, 0xea, 0x81, 0x60, |
25 | | 0xb3, 0x14, 0x11, 0xcf, 0xbd, 0x92, 0x08, 0x00, |
26 | | 0x09, 0xc7, 0x31, 0x8c, 0x18, 0x1f, 0x10, 0x11 |
27 | | }; |
28 | | |
29 | | unsigned char sas7bcat_magic_number[32] = { |
30 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
31 | | 0x00, 0x00, 0x00, 0x00, 0xc2, 0xea, 0x81, 0x63, |
32 | | 0xb3, 0x14, 0x11, 0xcf, 0xbd, 0x92, 0x08, 0x00, |
33 | | 0x09, 0xc7, 0x31, 0x8c, 0x18, 0x1f, 0x10, 0x11 |
34 | | }; |
35 | | |
36 | | /* This table is cobbled together from extant files and: |
37 | | * https://support.sas.com/documentation/cdl/en/nlsref/61893/HTML/default/viewer.htm#a002607278.htm |
38 | | * https://support.sas.com/documentation/onlinedoc/dfdmstudio/2.6/dmpdmsug/Content/dfU_Encodings_SAS.html |
39 | | * |
40 | | * Discrepancies form the official documentation are noted with a comment. It |
41 | | * appears that in some instances that SAS software uses a newer encoding than |
42 | | * what's listed in the docs. In these cases the encoding used by ReadStat |
43 | | * represents the author's best guess. |
44 | | */ |
45 | | static readstat_charset_entry_t _charset_table[] = { |
46 | | { .code = 0, .name = SAS_DEFAULT_STRING_ENCODING }, |
47 | | { .code = 20, .name = "UTF-8" }, |
48 | | { .code = 28, .name = "US-ASCII" }, |
49 | | { .code = 29, .name = "ISO-8859-1" }, |
50 | | { .code = 30, .name = "ISO-8859-2" }, |
51 | | { .code = 31, .name = "ISO-8859-3" }, |
52 | | { .code = 32, .name = "ISO-8859-4" }, |
53 | | { .code = 33, .name = "ISO-8859-5" }, |
54 | | { .code = 34, .name = "ISO-8859-6" }, |
55 | | { .code = 35, .name = "ISO-8859-7" }, |
56 | | { .code = 36, .name = "ISO-8859-8" }, |
57 | | { .code = 37, .name = "ISO-8859-9" }, |
58 | | { .code = 39, .name = "ISO-8859-11" }, |
59 | | { .code = 40, .name = "ISO-8859-15" }, |
60 | | { .code = 41, .name = "CP437" }, |
61 | | { .code = 42, .name = "CP850" }, |
62 | | { .code = 43, .name = "CP852" }, |
63 | | { .code = 44, .name = "CP857" }, |
64 | | { .code = 45, .name = "CP858" }, |
65 | | { .code = 46, .name = "CP862" }, |
66 | | { .code = 47, .name = "CP864" }, |
67 | | { .code = 48, .name = "CP865" }, |
68 | | { .code = 49, .name = "CP866" }, |
69 | | { .code = 50, .name = "CP869" }, |
70 | | { .code = 51, .name = "CP874" }, |
71 | | { .code = 52, .name = "CP921" }, |
72 | | { .code = 53, .name = "CP922" }, |
73 | | { .code = 54, .name = "CP1129" }, |
74 | | { .code = 55, .name = "CP720" }, |
75 | | { .code = 56, .name = "CP737" }, |
76 | | { .code = 57, .name = "CP775" }, |
77 | | { .code = 58, .name = "CP860" }, |
78 | | { .code = 59, .name = "CP863" }, |
79 | | { .code = 60, .name = "WINDOWS-1250" }, |
80 | | { .code = 61, .name = "WINDOWS-1251" }, |
81 | | { .code = 62, .name = "WINDOWS-1252" }, |
82 | | { .code = 63, .name = "WINDOWS-1253" }, |
83 | | { .code = 64, .name = "WINDOWS-1254" }, |
84 | | { .code = 65, .name = "WINDOWS-1255" }, |
85 | | { .code = 66, .name = "WINDOWS-1256" }, |
86 | | { .code = 67, .name = "WINDOWS-1257" }, |
87 | | { .code = 68, .name = "WINDOWS-1258" }, |
88 | | { .code = 69, .name = "MACROMAN" }, |
89 | | { .code = 70, .name = "MACARABIC" }, |
90 | | { .code = 71, .name = "MACHEBREW" }, |
91 | | { .code = 72, .name = "MACGREEK" }, |
92 | | { .code = 73, .name = "MACTHAI" }, |
93 | | { .code = 75, .name = "MACTURKISH" }, |
94 | | { .code = 76, .name = "MACUKRAINE" }, |
95 | | { .code = 118, .name = "CP950" }, |
96 | | { .code = 119, .name = "EUC-TW" }, |
97 | | { .code = 123, .name = "BIG-5" }, |
98 | | { .code = 125, .name = "GB18030" }, // "euc-cn" in SAS |
99 | | { .code = 126, .name = "WINDOWS-936" }, // "zwin" |
100 | | { .code = 128, .name = "CP1381" }, // "zpce" |
101 | | { .code = 134, .name = "EUC-JP" }, |
102 | | { .code = 136, .name = "CP949" }, |
103 | | { .code = 137, .name = "CP942" }, |
104 | | { .code = 138, .name = "CP932" }, // "shift-jis" in SAS |
105 | | { .code = 140, .name = "EUC-KR" }, |
106 | | { .code = 141, .name = "CP949" }, // "kpce" |
107 | | { .code = 142, .name = "CP949" }, // "kwin" |
108 | | { .code = 163, .name = "MACICELAND" }, |
109 | | { .code = 167, .name = "ISO-2022-JP" }, |
110 | | { .code = 168, .name = "ISO-2022-KR" }, |
111 | | { .code = 169, .name = "ISO-2022-CN" }, |
112 | | { .code = 172, .name = "ISO-2022-CN-EXT" }, |
113 | | { .code = 204, .name = SAS_DEFAULT_STRING_ENCODING }, // "any" in SAS |
114 | | { .code = 205, .name = "GB18030" }, |
115 | | { .code = 227, .name = "ISO-8859-14" }, |
116 | | { .code = 242, .name = "ISO-8859-13" }, |
117 | | { .code = 245, .name = "MACCROATIAN" }, |
118 | | { .code = 246, .name = "MACCYRILLIC" }, |
119 | | { .code = 247, .name = "MACROMANIA" }, |
120 | | { .code = 248, .name = "SHIFT_JISX0213" }, |
121 | | }; |
122 | | |
123 | 4.43k | static time_t sas_epoch(void) { |
124 | 4.43k | return - 3653 * 86400; // seconds between 01-01-1960 and 01-01-1970 |
125 | 4.43k | } |
126 | | |
127 | 8.08k | static time_t sas_convert_time(double time, double time_diff, time_t epoch) { |
128 | 8.08k | time -= time_diff; |
129 | 8.08k | time += epoch; |
130 | 8.08k | if (isnan(time)) |
131 | 940 | return 0; |
132 | 7.14k | if (time > (double)LONG_MAX) |
133 | 1.69k | return LONG_MAX; |
134 | 5.45k | if (time < (double)LONG_MIN) |
135 | 1.32k | return LONG_MIN; |
136 | 4.12k | return time; |
137 | 5.45k | } |
138 | | |
139 | 91.3k | uint64_t sas_read8(const char *data, int bswap) { |
140 | 91.3k | uint64_t tmp; |
141 | 91.3k | memcpy(&tmp, data, 8); |
142 | 91.3k | return bswap ? byteswap8(tmp) : tmp; |
143 | 91.3k | } |
144 | | |
145 | 428k | uint32_t sas_read4(const char *data, int bswap) { |
146 | 428k | uint32_t tmp; |
147 | 428k | memcpy(&tmp, data, 4); |
148 | 428k | return bswap ? byteswap4(tmp) : tmp; |
149 | 428k | } |
150 | | |
151 | 12.6M | uint16_t sas_read2(const char *data, int bswap) { |
152 | 12.6M | uint16_t tmp; |
153 | 12.6M | memcpy(&tmp, data, 2); |
154 | 12.6M | return bswap ? byteswap2(tmp) : tmp; |
155 | 12.6M | } |
156 | | |
157 | 6.51k | size_t sas_subheader_remainder(size_t len, size_t signature_len) { |
158 | 6.51k | return len - (4+2*signature_len); |
159 | 6.51k | } |
160 | | |
161 | | readstat_error_t sas_read_header(readstat_io_t *io, sas_header_info_t *hinfo, |
162 | 4.43k | readstat_error_handler error_handler, void *user_ctx) { |
163 | 4.43k | sas_header_start_t header_start; |
164 | 4.43k | sas_header_end_t header_end; |
165 | 4.43k | int retval = READSTAT_OK; |
166 | 4.43k | char error_buf[1024]; |
167 | 4.43k | time_t epoch = sas_epoch(); |
168 | | |
169 | 4.43k | if (io->read(&header_start, sizeof(sas_header_start_t), io->io_ctx) < sizeof(sas_header_start_t)) { |
170 | 40 | retval = READSTAT_ERROR_READ; |
171 | 40 | goto cleanup; |
172 | 40 | } |
173 | 4.39k | if (memcmp(header_start.magic, sas7bdat_magic_number, sizeof(sas7bdat_magic_number)) != 0 && |
174 | 2.12k | memcmp(header_start.magic, sas7bcat_magic_number, sizeof(sas7bcat_magic_number)) != 0) { |
175 | 254 | retval = READSTAT_ERROR_PARSE; |
176 | 254 | goto cleanup; |
177 | 254 | } |
178 | 4.14k | if (header_start.a1 == SAS_ALIGNMENT_OFFSET_4) { |
179 | 16 | hinfo->pad1 = 4; |
180 | 16 | } |
181 | 4.14k | if (header_start.a2 == SAS_ALIGNMENT_OFFSET_4) { |
182 | 1.70k | hinfo->u64 = 1; |
183 | 1.70k | } |
184 | 4.14k | int bswap = 0; |
185 | 4.14k | if (header_start.endian == SAS_ENDIAN_BIG) { |
186 | 2.43k | bswap = machine_is_little_endian(); |
187 | 2.43k | hinfo->little_endian = 0; |
188 | 2.43k | } else if (header_start.endian == SAS_ENDIAN_LITTLE) { |
189 | 1.67k | bswap = !machine_is_little_endian(); |
190 | 1.67k | hinfo->little_endian = 1; |
191 | 1.67k | } else { |
192 | 23 | retval = READSTAT_ERROR_PARSE; |
193 | 23 | goto cleanup; |
194 | 23 | } |
195 | 4.11k | int i; |
196 | 32.1k | for (i=0; i<sizeof(_charset_table)/sizeof(_charset_table[0]); i++) { |
197 | 32.1k | if (header_start.encoding == _charset_table[i].code) { |
198 | 4.11k | hinfo->encoding = _charset_table[i].name; |
199 | 4.11k | break; |
200 | 4.11k | } |
201 | 32.1k | } |
202 | 4.11k | if (hinfo->encoding == NULL) { |
203 | 6 | if (error_handler) { |
204 | 0 | snprintf(error_buf, sizeof(error_buf), "Unsupported character set code: %d", header_start.encoding); |
205 | 0 | error_handler(error_buf, user_ctx); |
206 | 0 | } |
207 | 6 | retval = READSTAT_ERROR_UNSUPPORTED_CHARSET; |
208 | 6 | goto cleanup; |
209 | 6 | } |
210 | 4.11k | memcpy(hinfo->table_name, header_start.table_name, sizeof(header_start.table_name)); |
211 | 4.11k | if (io->seek(hinfo->pad1, READSTAT_SEEK_CUR, io->io_ctx) == -1) { |
212 | 7 | retval = READSTAT_ERROR_SEEK; |
213 | 7 | goto cleanup; |
214 | 7 | } |
215 | | |
216 | 4.10k | double creation_time, modification_time, creation_time_diff, modification_time_diff; |
217 | | |
218 | 4.10k | if (io->read(&creation_time, sizeof(double), io->io_ctx) < sizeof(double)) { |
219 | 22 | retval = READSTAT_ERROR_READ; |
220 | 22 | goto cleanup; |
221 | 22 | } |
222 | 4.08k | if (bswap) |
223 | 2.41k | creation_time = byteswap_double(creation_time); |
224 | | |
225 | 4.08k | if (io->read(&modification_time, sizeof(double), io->io_ctx) < sizeof(double)) { |
226 | 13 | retval = READSTAT_ERROR_READ; |
227 | 13 | goto cleanup; |
228 | 13 | } |
229 | 4.06k | if (bswap) |
230 | 2.40k | modification_time = byteswap_double(modification_time); |
231 | | |
232 | 4.06k | if (io->read(&creation_time_diff, sizeof(double), io->io_ctx) < sizeof(double)) { |
233 | 12 | retval = READSTAT_ERROR_READ; |
234 | 12 | goto cleanup; |
235 | 12 | } |
236 | 4.05k | if (bswap) |
237 | 2.40k | creation_time_diff = byteswap_double(creation_time_diff); |
238 | | |
239 | 4.05k | if (io->read(&modification_time_diff, sizeof(double), io->io_ctx) < sizeof(double)) { |
240 | 13 | retval = READSTAT_ERROR_READ; |
241 | 13 | goto cleanup; |
242 | 13 | } |
243 | 4.04k | if (bswap) |
244 | 2.39k | modification_time_diff = byteswap_double(modification_time_diff); |
245 | | |
246 | 4.04k | hinfo->creation_time = sas_convert_time(creation_time, creation_time_diff, epoch); |
247 | 4.04k | hinfo->modification_time = sas_convert_time(modification_time, modification_time_diff, epoch); |
248 | | |
249 | 4.04k | uint32_t header_size, page_size; |
250 | | |
251 | 4.04k | if (io->read(&header_size, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) { |
252 | 13 | retval = READSTAT_ERROR_READ; |
253 | 13 | goto cleanup; |
254 | 13 | } |
255 | 4.03k | if (io->read(&page_size, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) { |
256 | 6 | retval = READSTAT_ERROR_READ; |
257 | 6 | goto cleanup; |
258 | 6 | } |
259 | | |
260 | 4.02k | hinfo->header_size = bswap ? byteswap4(header_size) : header_size; |
261 | 4.02k | hinfo->page_size = bswap ? byteswap4(page_size) : page_size; |
262 | | |
263 | 4.02k | if (hinfo->header_size < 1024 || hinfo->page_size < 1024) { |
264 | 43 | retval = READSTAT_ERROR_PARSE; |
265 | 43 | goto cleanup; |
266 | 43 | } |
267 | 3.98k | if (hinfo->header_size > (1<<24) || hinfo->page_size > (1<<24)) { |
268 | 73 | retval = READSTAT_ERROR_PARSE; |
269 | 73 | goto cleanup; |
270 | 73 | } |
271 | | |
272 | 3.90k | if (hinfo->u64) { |
273 | 1.63k | hinfo->page_header_size = SAS_PAGE_HEADER_SIZE_64BIT; |
274 | 1.63k | hinfo->subheader_pointer_size = SAS_SUBHEADER_POINTER_SIZE_64BIT; |
275 | 2.27k | } else { |
276 | 2.27k | hinfo->page_header_size = SAS_PAGE_HEADER_SIZE_32BIT; |
277 | 2.27k | hinfo->subheader_pointer_size = SAS_SUBHEADER_POINTER_SIZE_32BIT; |
278 | 2.27k | } |
279 | | |
280 | 3.90k | if (hinfo->u64) { |
281 | 1.63k | uint64_t page_count; |
282 | 1.63k | if (io->read(&page_count, sizeof(uint64_t), io->io_ctx) < sizeof(uint64_t)) { |
283 | 27 | retval = READSTAT_ERROR_READ; |
284 | 27 | goto cleanup; |
285 | 27 | } |
286 | 1.60k | hinfo->page_count = bswap ? byteswap8(page_count) : page_count; |
287 | 2.27k | } else { |
288 | 2.27k | uint32_t page_count; |
289 | 2.27k | if (io->read(&page_count, sizeof(uint32_t), io->io_ctx) < sizeof(uint32_t)) { |
290 | 44 | retval = READSTAT_ERROR_READ; |
291 | 44 | goto cleanup; |
292 | 44 | } |
293 | 2.23k | hinfo->page_count = bswap ? byteswap4(page_count) : page_count; |
294 | 2.23k | } |
295 | 3.83k | if (hinfo->page_count > (1<<24)) { |
296 | 20 | retval = READSTAT_ERROR_PARSE; |
297 | 20 | goto cleanup; |
298 | 20 | } |
299 | | |
300 | 3.81k | if (io->seek(8, READSTAT_SEEK_CUR, io->io_ctx) == -1) { |
301 | 142 | retval = READSTAT_ERROR_SEEK; |
302 | 142 | if (error_handler) { |
303 | 0 | snprintf(error_buf, sizeof(error_buf), "ReadStat: Failed to seek forward by %d", 8); |
304 | 0 | error_handler(error_buf, user_ctx); |
305 | 0 | } |
306 | 142 | goto cleanup; |
307 | 142 | } |
308 | 3.67k | if (io->read(&header_end, sizeof(sas_header_end_t), io->io_ctx) < sizeof(sas_header_end_t)) { |
309 | 33 | retval = READSTAT_ERROR_READ; |
310 | 33 | goto cleanup; |
311 | 33 | } |
312 | 3.64k | char major, revision_tag; |
313 | 3.64k | int minor, revision; |
314 | 3.64k | if (sscanf(header_end.release, "%c.%04d%c%1d", &major, &minor, &revision_tag, &revision) != 4) { |
315 | 6 | retval = READSTAT_ERROR_PARSE; |
316 | 6 | goto cleanup; |
317 | 6 | } |
318 | | |
319 | 3.63k | if (major >= '1' && major <= '9') { |
320 | 3.59k | hinfo->major_version = major - '0'; |
321 | 3.59k | } else if (major == 'V') { |
322 | | // It appears that SAS Visual Forecaster reports the major version as "V" |
323 | | // Treat it as version 9 for all intents and purposes |
324 | 8 | hinfo->major_version = 9; |
325 | 34 | } else { |
326 | 34 | retval = READSTAT_ERROR_PARSE; |
327 | 34 | goto cleanup; |
328 | 34 | } |
329 | | // revision_tag is usually M, but J has been observed in the wild (not created with SAS?) |
330 | 3.60k | if (revision_tag != 'M' && revision_tag != 'J') { |
331 | 26 | retval = READSTAT_ERROR_PARSE; |
332 | 26 | goto cleanup; |
333 | 26 | } |
334 | 3.57k | hinfo->minor_version = minor; |
335 | 3.57k | hinfo->revision = revision; |
336 | | |
337 | 3.57k | if ((major == '8' || major == '9') && minor == 0 && revision == 0) { |
338 | | /* A bit of a hack, but most SAS installations are running a minor update */ |
339 | 63 | hinfo->vendor = READSTAT_VENDOR_STAT_TRANSFER; |
340 | 3.51k | } else { |
341 | 3.51k | hinfo->vendor = READSTAT_VENDOR_SAS; |
342 | 3.51k | } |
343 | 3.57k | if (io->seek(hinfo->header_size, READSTAT_SEEK_SET, io->io_ctx) == -1) { |
344 | 89 | retval = READSTAT_ERROR_SEEK; |
345 | 89 | if (error_handler) { |
346 | 0 | snprintf(error_buf, sizeof(error_buf), |
347 | 0 | "ReadStat: Failed to seek to position %" PRId64, hinfo->header_size); |
348 | 0 | error_handler(error_buf, user_ctx); |
349 | 0 | } |
350 | 89 | goto cleanup; |
351 | 89 | } |
352 | | |
353 | 4.43k | cleanup: |
354 | 4.43k | return retval; |
355 | 3.57k | } |
356 | | |
357 | 0 | readstat_error_t sas_write_header(readstat_writer_t *writer, sas_header_info_t *hinfo, sas_header_start_t header_start) { |
358 | 0 | readstat_error_t retval = READSTAT_OK; |
359 | 0 | time_t epoch = sas_epoch(); |
360 | |
|
361 | 0 | memset(header_start.table_name, ' ', sizeof(header_start.table_name)); |
362 | |
|
363 | 0 | size_t table_name_len = strlen(writer->table_name); |
364 | 0 | if (table_name_len > sizeof(header_start.table_name)) |
365 | 0 | table_name_len = sizeof(header_start.table_name); |
366 | |
|
367 | 0 | if (table_name_len) { |
368 | 0 | memcpy(header_start.table_name, writer->table_name, table_name_len); |
369 | 0 | } else { |
370 | 0 | memcpy(header_start.table_name, "DATASET", sizeof("DATASET")-1); |
371 | 0 | } |
372 | |
|
373 | 0 | retval = readstat_write_bytes(writer, &header_start, sizeof(sas_header_start_t)); |
374 | 0 | if (retval != READSTAT_OK) |
375 | 0 | goto cleanup; |
376 | | |
377 | 0 | retval = readstat_write_zeros(writer, hinfo->pad1); |
378 | 0 | if (retval != READSTAT_OK) |
379 | 0 | goto cleanup; |
380 | | |
381 | 0 | double creation_time = hinfo->creation_time - epoch; |
382 | |
|
383 | 0 | retval = readstat_write_bytes(writer, &creation_time, sizeof(double)); |
384 | 0 | if (retval != READSTAT_OK) |
385 | 0 | goto cleanup; |
386 | | |
387 | 0 | double modification_time = hinfo->modification_time - epoch; |
388 | |
|
389 | 0 | retval = readstat_write_bytes(writer, &modification_time, sizeof(double)); |
390 | 0 | if (retval != READSTAT_OK) |
391 | 0 | goto cleanup; |
392 | | |
393 | 0 | retval = readstat_write_zeros(writer, 16); |
394 | 0 | if (retval != READSTAT_OK) |
395 | 0 | goto cleanup; |
396 | | |
397 | 0 | uint32_t header_size = hinfo->header_size; |
398 | 0 | uint32_t page_size = hinfo->page_size; |
399 | |
|
400 | 0 | retval = readstat_write_bytes(writer, &header_size, sizeof(uint32_t)); |
401 | 0 | if (retval != READSTAT_OK) |
402 | 0 | goto cleanup; |
403 | | |
404 | 0 | retval = readstat_write_bytes(writer, &page_size, sizeof(uint32_t)); |
405 | 0 | if (retval != READSTAT_OK) |
406 | 0 | goto cleanup; |
407 | | |
408 | 0 | if (hinfo->u64) { |
409 | 0 | uint64_t page_count = hinfo->page_count; |
410 | 0 | retval = readstat_write_bytes(writer, &page_count, sizeof(uint64_t)); |
411 | 0 | } else { |
412 | 0 | uint32_t page_count = hinfo->page_count; |
413 | 0 | retval = readstat_write_bytes(writer, &page_count, sizeof(uint32_t)); |
414 | 0 | } |
415 | 0 | if (retval != READSTAT_OK) |
416 | 0 | goto cleanup; |
417 | | |
418 | 0 | retval = readstat_write_zeros(writer, 8); |
419 | 0 | if (retval != READSTAT_OK) |
420 | 0 | goto cleanup; |
421 | | |
422 | 0 | sas_header_end_t header_end = { |
423 | 0 | .host = "9.0401M6Linux" |
424 | 0 | }; |
425 | |
|
426 | 0 | char release[sizeof(header_end.release)+1] = { 0 }; |
427 | 0 | snprintf(release, sizeof(release), "%1d.%04dM0", (unsigned int)writer->version % 10, 101); |
428 | 0 | memcpy(header_end.release, release, sizeof(header_end.release)); |
429 | |
|
430 | 0 | retval = readstat_write_bytes(writer, &header_end, sizeof(sas_header_end_t)); |
431 | 0 | if (retval != READSTAT_OK) |
432 | 0 | goto cleanup; |
433 | | |
434 | 0 | retval = readstat_write_zeros(writer, hinfo->header_size-writer->bytes_written); |
435 | 0 | if (retval != READSTAT_OK) |
436 | 0 | goto cleanup; |
437 | | |
438 | 0 | cleanup: |
439 | 0 | return retval; |
440 | 0 | } |
441 | | |
442 | 0 | sas_header_info_t *sas_header_info_init(readstat_writer_t *writer, int is_64bit) { |
443 | 0 | sas_header_info_t *hinfo = calloc(1, sizeof(sas_header_info_t)); |
444 | 0 | hinfo->creation_time = writer->timestamp; |
445 | 0 | hinfo->modification_time = writer->timestamp; |
446 | 0 | hinfo->page_size = SAS_DEFAULT_PAGE_SIZE; |
447 | 0 | hinfo->u64 = !!is_64bit; |
448 | |
|
449 | 0 | if (hinfo->u64) { |
450 | 0 | hinfo->header_size = SAS_FILE_HEADER_SIZE_64BIT; |
451 | 0 | hinfo->page_header_size = SAS_PAGE_HEADER_SIZE_64BIT; |
452 | 0 | hinfo->subheader_pointer_size = SAS_SUBHEADER_POINTER_SIZE_64BIT; |
453 | 0 | } else { |
454 | 0 | hinfo->header_size = SAS_FILE_HEADER_SIZE_32BIT; |
455 | 0 | hinfo->page_header_size = SAS_PAGE_HEADER_SIZE_32BIT; |
456 | 0 | hinfo->subheader_pointer_size = SAS_SUBHEADER_POINTER_SIZE_32BIT; |
457 | 0 | } |
458 | |
|
459 | 0 | return hinfo; |
460 | 0 | } |
461 | | |
462 | 0 | readstat_error_t sas_fill_page(readstat_writer_t *writer, sas_header_info_t *hinfo) { |
463 | 0 | if ((writer->bytes_written - hinfo->header_size) % hinfo->page_size) { |
464 | 0 | size_t num_zeros = (hinfo->page_size - |
465 | 0 | (writer->bytes_written - hinfo->header_size) % hinfo->page_size); |
466 | 0 | return readstat_write_zeros(writer, num_zeros); |
467 | 0 | } |
468 | 0 | return READSTAT_OK; |
469 | 0 | } |
470 | | |
471 | 0 | readstat_error_t sas_validate_name(const char *name, size_t max_len) { |
472 | 0 | int j; |
473 | 0 | for (j=0; name[j]; j++) { |
474 | 0 | if (name[j] != '_' && |
475 | 0 | !(name[j] >= 'a' && name[j] <= 'z') && |
476 | 0 | !(name[j] >= 'A' && name[j] <= 'Z') && |
477 | 0 | !(name[j] >= '0' && name[j] <= '9')) { |
478 | 0 | return READSTAT_ERROR_NAME_CONTAINS_ILLEGAL_CHARACTER; |
479 | 0 | } |
480 | 0 | } |
481 | 0 | char first_char = name[0]; |
482 | |
|
483 | 0 | if (!first_char) |
484 | 0 | return READSTAT_ERROR_NAME_IS_ZERO_LENGTH; |
485 | | |
486 | 0 | if (first_char != '_' && |
487 | 0 | !(first_char >= 'a' && first_char <= 'z') && |
488 | 0 | !(first_char >= 'A' && first_char <= 'Z')) { |
489 | 0 | return READSTAT_ERROR_NAME_BEGINS_WITH_ILLEGAL_CHARACTER; |
490 | 0 | } |
491 | 0 | if (strcmp(name, "_N_") == 0 || strcmp(name, "_ERROR_") == 0 || |
492 | 0 | strcmp(name, "_NUMERIC_") == 0 || strcmp(name, "_CHARACTER_") == 0 || |
493 | 0 | strcmp(name, "_ALL_") == 0) { |
494 | 0 | return READSTAT_ERROR_NAME_IS_RESERVED_WORD; |
495 | 0 | } |
496 | | |
497 | 0 | if (strlen(name) > max_len) |
498 | 0 | return READSTAT_ERROR_NAME_IS_TOO_LONG; |
499 | | |
500 | 0 | return READSTAT_OK; |
501 | 0 | } |
502 | | |
503 | 0 | readstat_error_t sas_validate_variable(const readstat_variable_t *variable) { |
504 | 0 | return sas_validate_name(readstat_variable_get_name(variable), 32); |
505 | 0 | } |
506 | | |
507 | 8.51k | readstat_error_t sas_validate_tag(char tag) { |
508 | 8.51k | if (tag == '_' || (tag >= 'A' && tag <= 'Z')) |
509 | 4.74k | return READSTAT_OK; |
510 | | |
511 | 3.77k | return READSTAT_ERROR_TAGGED_VALUE_IS_OUT_OF_RANGE; |
512 | 8.51k | } |
513 | | |
514 | 7.61k | void sas_assign_tag(readstat_value_t *value, uint8_t tag) { |
515 | | /* We accommodate two tag schemes. In the first, the tag is an ASCII code |
516 | | * given by uint8_t tag above. System missing is represented by an ASCII |
517 | | * period. In the second scheme, (tag-2) is an offset from 'A', except when |
518 | | * tag == 0, in which case it represents an underscore, or tag == 1, in |
519 | | * which case it represents system-missing. |
520 | | */ |
521 | 7.61k | if (tag == 0) { |
522 | 3.16k | tag = '_'; |
523 | 4.45k | } else if (tag >= 2 && tag < 28) { |
524 | 981 | tag = 'A' + (tag - 2); |
525 | 981 | } |
526 | 7.61k | if (sas_validate_tag(tag) == READSTAT_OK) { |
527 | 4.35k | value->tag = tag; |
528 | 4.35k | value->is_tagged_missing = 1; |
529 | 4.35k | } else { |
530 | 3.25k | value->tag = 0; |
531 | 3.25k | value->is_system_missing = 1; |
532 | 3.25k | } |
533 | 7.61k | } |