/src/readstat/src/spss/readstat_zsav_read.c
Line | Count | Source |
1 | | #include <stdlib.h> |
2 | | #include <zlib.h> |
3 | | |
4 | | #include "../readstat.h" |
5 | | #include "../readstat_bits.h" |
6 | | #include "../readstat_iconv.h" |
7 | | #include "../readstat_malloc.h" |
8 | | #include "readstat_sav.h" |
9 | | #include "readstat_sav_compress.h" |
10 | | |
11 | | struct zheader { |
12 | | uint64_t zheader_ofs; |
13 | | uint64_t ztrailer_ofs; |
14 | | uint64_t ztrailer_len; |
15 | | }; |
16 | | |
17 | | struct ztrailer { |
18 | | int64_t bias; |
19 | | int64_t zero; |
20 | | int32_t block_size; |
21 | | int32_t n_blocks; |
22 | | }; |
23 | | |
24 | | struct ztrailer_entry { |
25 | | int64_t uncompressed_ofs; |
26 | | int64_t compressed_ofs; |
27 | | int32_t uncompressed_size; |
28 | | int32_t compressed_size; |
29 | | }; |
30 | | |
31 | | readstat_error_t zsav_read_compressed_data(sav_ctx_t *ctx, |
32 | 453 | readstat_error_t (*row_handler)(unsigned char *, size_t, sav_ctx_t *)) { |
33 | 453 | readstat_error_t retval = READSTAT_OK; |
34 | 453 | readstat_io_t *io = ctx->io; |
35 | 453 | readstat_off_t data_offset = 0; |
36 | | |
37 | 453 | size_t uncompressed_row_len = ctx->var_offset * 8; |
38 | 453 | readstat_off_t uncompressed_offset = 0; |
39 | 453 | unsigned char *uncompressed_row = NULL; |
40 | | |
41 | 453 | uLongf uncompressed_block_len = 0; |
42 | 453 | unsigned char *compressed_block = NULL, *uncompressed_block = NULL; |
43 | | |
44 | 453 | struct sav_row_stream_s state = { |
45 | 453 | .missing_value = ctx->missing_double, |
46 | 453 | .bias = ctx->bias, |
47 | 453 | .bswap = ctx->bswap }; |
48 | | |
49 | 453 | struct zheader zheader; |
50 | 453 | struct ztrailer ztrailer; |
51 | 453 | struct ztrailer_entry *ztrailer_entries = NULL; |
52 | | |
53 | 453 | int n_blocks = 0; |
54 | 453 | int block_i = 0; |
55 | 453 | int i; |
56 | | |
57 | 453 | if (io->read(&zheader, sizeof(struct zheader), io->io_ctx) < sizeof(struct zheader)) { |
58 | 40 | retval = READSTAT_ERROR_READ; |
59 | 40 | goto cleanup; |
60 | 40 | } |
61 | | |
62 | 413 | zheader.zheader_ofs = ctx->bswap ? byteswap8(zheader.zheader_ofs) : zheader.zheader_ofs; |
63 | 413 | zheader.ztrailer_ofs = ctx->bswap ? byteswap8(zheader.ztrailer_ofs) : zheader.ztrailer_ofs; |
64 | 413 | zheader.ztrailer_len = ctx->bswap ? byteswap8(zheader.ztrailer_len) : zheader.ztrailer_len; |
65 | | |
66 | 413 | if (zheader.zheader_ofs != io->seek(0, READSTAT_SEEK_CUR, io->io_ctx) - sizeof(struct zheader)) { |
67 | 94 | retval = READSTAT_ERROR_PARSE; |
68 | 94 | goto cleanup; |
69 | 94 | } |
70 | | |
71 | 319 | n_blocks = (zheader.ztrailer_len - 24) / 24; |
72 | | |
73 | 319 | if (io->seek(zheader.ztrailer_ofs, READSTAT_SEEK_SET, io->io_ctx) == -1) { |
74 | 24 | retval = READSTAT_ERROR_SEEK; |
75 | 24 | goto cleanup; |
76 | 24 | } |
77 | | |
78 | 295 | if (io->read(&ztrailer, sizeof(struct ztrailer), io->io_ctx) < sizeof(struct ztrailer)) { |
79 | 16 | retval = READSTAT_ERROR_READ; |
80 | 16 | goto cleanup; |
81 | 16 | } |
82 | | |
83 | 279 | ztrailer.bias = ctx->bswap ? byteswap8(ztrailer.bias) : ztrailer.bias; |
84 | 279 | ztrailer.zero = ctx->bswap ? byteswap8(ztrailer.zero) : ztrailer.zero; |
85 | 279 | ztrailer.block_size = ctx->bswap ? byteswap4(ztrailer.block_size) : ztrailer.block_size; |
86 | 279 | ztrailer.n_blocks = ctx->bswap ? byteswap4(ztrailer.n_blocks) : ztrailer.n_blocks; |
87 | | |
88 | 279 | if (n_blocks != ztrailer.n_blocks) { |
89 | 50 | retval = READSTAT_ERROR_PARSE; |
90 | 50 | goto cleanup; |
91 | 50 | } |
92 | | |
93 | 229 | if (n_blocks && (ztrailer_entries = readstat_malloc(n_blocks * sizeof(struct ztrailer_entry))) == NULL) { |
94 | 24 | retval = READSTAT_ERROR_MALLOC; |
95 | 24 | goto cleanup; |
96 | 24 | } |
97 | | |
98 | 205 | if (io->read(ztrailer_entries, n_blocks * sizeof(struct ztrailer_entry), io->io_ctx) < |
99 | 205 | n_blocks * sizeof(struct ztrailer_entry)) { |
100 | 24 | retval = READSTAT_ERROR_READ; |
101 | 24 | goto cleanup; |
102 | 24 | } |
103 | | |
104 | 11.1k | for (i=0; i<n_blocks; i++) { |
105 | 10.9k | struct ztrailer_entry *entry = &ztrailer_entries[i]; |
106 | | |
107 | 10.9k | entry->uncompressed_ofs = ctx->bswap ? byteswap8(entry->uncompressed_ofs) : entry->uncompressed_ofs; |
108 | 10.9k | entry->compressed_ofs = ctx->bswap ? byteswap8(entry->compressed_ofs) : entry->compressed_ofs; |
109 | 10.9k | entry->uncompressed_size = ctx->bswap ? byteswap4(entry->uncompressed_size) : entry->uncompressed_size; |
110 | 10.9k | entry->compressed_size = ctx->bswap ? byteswap4(entry->compressed_size) : entry->compressed_size; |
111 | 10.9k | } |
112 | | |
113 | 181 | if (uncompressed_row_len && (uncompressed_row = readstat_malloc(uncompressed_row_len)) == NULL) { |
114 | 0 | retval = READSTAT_ERROR_MALLOC; |
115 | 0 | goto cleanup; |
116 | 0 | } |
117 | | |
118 | 181 | while (1) { |
119 | 181 | if (block_i == n_blocks) |
120 | 2 | goto cleanup; |
121 | | |
122 | 179 | struct ztrailer_entry *entry = &ztrailer_entries[block_i]; |
123 | 179 | if (io->seek(entry->compressed_ofs, READSTAT_SEEK_SET, io->io_ctx) == -1) { |
124 | 26 | retval = READSTAT_ERROR_SEEK; |
125 | 26 | goto cleanup; |
126 | 26 | } |
127 | 153 | if ((compressed_block = readstat_realloc(compressed_block, entry->compressed_size)) == NULL) { |
128 | 32 | retval = READSTAT_ERROR_MALLOC; |
129 | 32 | goto cleanup; |
130 | 32 | } |
131 | 121 | if (io->read(compressed_block, entry->compressed_size, io->io_ctx) != entry->compressed_size) { |
132 | 43 | retval = READSTAT_ERROR_READ; |
133 | 43 | goto cleanup; |
134 | 43 | } |
135 | | |
136 | 78 | uncompressed_block_len = entry->uncompressed_size; |
137 | 78 | if ((uncompressed_block = readstat_realloc(uncompressed_block, uncompressed_block_len)) == NULL) { |
138 | 7 | retval = READSTAT_ERROR_MALLOC; |
139 | 7 | goto cleanup; |
140 | 7 | } |
141 | 71 | int status = uncompress(uncompressed_block, &uncompressed_block_len, |
142 | 71 | compressed_block, entry->compressed_size); |
143 | 71 | if (status != Z_OK || uncompressed_block_len != entry->uncompressed_size) { |
144 | 25 | retval = READSTAT_ERROR_PARSE; |
145 | 25 | goto cleanup; |
146 | 25 | } |
147 | | |
148 | 46 | block_i++; |
149 | 46 | state.status = SAV_ROW_STREAM_HAVE_DATA; |
150 | 46 | data_offset = 0; |
151 | | |
152 | 259 | while (state.status != SAV_ROW_STREAM_NEED_DATA) { |
153 | 259 | state.next_in = &uncompressed_block[data_offset]; |
154 | 259 | state.avail_in = uncompressed_block_len - data_offset; |
155 | | |
156 | 259 | state.next_out = &uncompressed_row[uncompressed_offset]; |
157 | 259 | state.avail_out = uncompressed_row_len - uncompressed_offset; |
158 | | |
159 | 259 | sav_decompress_row(&state); |
160 | | |
161 | 259 | uncompressed_offset = uncompressed_row_len - state.avail_out; |
162 | 259 | data_offset = uncompressed_block_len - state.avail_in; |
163 | | |
164 | 259 | if (state.status == SAV_ROW_STREAM_FINISHED_ROW) { |
165 | 217 | retval = row_handler(uncompressed_row, uncompressed_row_len, ctx); |
166 | 217 | if (retval != READSTAT_OK) |
167 | 2 | goto cleanup; |
168 | | |
169 | 215 | uncompressed_offset = 0; |
170 | 215 | } |
171 | | |
172 | 257 | if (state.status == SAV_ROW_STREAM_FINISHED_ALL) |
173 | 42 | goto cleanup; |
174 | 215 | if (ctx->row_limit > 0 && ctx->current_row == ctx->row_limit) |
175 | 2 | goto cleanup; |
176 | 215 | } |
177 | 46 | } |
178 | | |
179 | 453 | cleanup: |
180 | 453 | if (uncompressed_row) |
181 | 181 | free(uncompressed_row); |
182 | 453 | if (ztrailer_entries) |
183 | 203 | free(ztrailer_entries); |
184 | 453 | if (compressed_block) |
185 | 121 | free(compressed_block); |
186 | 453 | if (uncompressed_block) |
187 | 71 | free(uncompressed_block); |
188 | | |
189 | 453 | return retval; |
190 | 181 | } |