/src/readstat/src/sas/readstat_sas_rle.c
Line | Count | Source (jump to first uncovered line) |
1 | | |
2 | | #include <sys/types.h> |
3 | | #include <string.h> |
4 | | #include <stdio.h> |
5 | | |
6 | | #if defined(_MSC_VER) |
7 | | #include <BaseTsd.h> |
8 | | typedef SSIZE_T ssize_t; |
9 | | #endif |
10 | | |
11 | | #include "readstat_sas_rle.h" |
12 | | |
13 | 1.15k | #define SAS_RLE_COMMAND_COPY64 0 |
14 | 63 | #define SAS_RLE_COMMAND_COPY64_PLUS_4096 1 |
15 | 357 | #define SAS_RLE_COMMAND_COPY96 2 |
16 | 921 | #define SAS_RLE_COMMAND_INSERT_BYTE18 4 |
17 | 834 | #define SAS_RLE_COMMAND_INSERT_AT17 5 |
18 | 1.08k | #define SAS_RLE_COMMAND_INSERT_BLANK17 6 |
19 | 1.24k | #define SAS_RLE_COMMAND_INSERT_ZERO17 7 |
20 | 751 | #define SAS_RLE_COMMAND_COPY1 8 |
21 | 359 | #define SAS_RLE_COMMAND_COPY17 9 |
22 | 273 | #define SAS_RLE_COMMAND_COPY33 10 |
23 | 318 | #define SAS_RLE_COMMAND_COPY49 11 |
24 | 625 | #define SAS_RLE_COMMAND_INSERT_BYTE3 12 |
25 | 844 | #define SAS_RLE_COMMAND_INSERT_AT2 13 |
26 | 1.88k | #define SAS_RLE_COMMAND_INSERT_BLANK2 14 |
27 | 2.44k | #define SAS_RLE_COMMAND_INSERT_ZERO2 15 |
28 | | |
29 | 0 | #define MAX_INSERT_RUN 4112 // 4095 + 17 |
30 | 0 | #define MAX_COPY_RUN 4159 // 4095 + 64 |
31 | | |
32 | | static size_t command_lengths[16] = { |
33 | | [SAS_RLE_COMMAND_COPY64] = 1, |
34 | | [SAS_RLE_COMMAND_COPY64_PLUS_4096] = 1, |
35 | | [SAS_RLE_COMMAND_INSERT_BYTE18] = 2, |
36 | | [SAS_RLE_COMMAND_INSERT_AT17] = 1, |
37 | | [SAS_RLE_COMMAND_INSERT_BLANK17] = 1, |
38 | | [SAS_RLE_COMMAND_INSERT_ZERO17] = 1, |
39 | | [SAS_RLE_COMMAND_INSERT_BYTE3] = 1 |
40 | | }; |
41 | | |
42 | 0 | ssize_t sas_rle_decompressed_len(const void *input_buf, size_t input_len) { |
43 | 0 | return sas_rle_decompress(NULL, 0, input_buf, input_len); |
44 | 0 | } |
45 | | |
46 | | ssize_t sas_rle_decompress(void *output_buf, size_t output_len, |
47 | 263 | const void *input_buf, size_t input_len) { |
48 | 263 | unsigned char *buffer = (unsigned char *)output_buf; |
49 | 263 | unsigned char *output = buffer; |
50 | 263 | size_t output_written = 0; |
51 | | |
52 | 263 | const unsigned char *input = (const unsigned char *)input_buf; |
53 | | |
54 | 15.6k | while (input < (const unsigned char *)input_buf + input_len) { |
55 | 15.4k | unsigned char control = *input++; |
56 | 15.4k | unsigned char command = (control & 0xF0) >> 4; |
57 | 15.4k | unsigned char length = (control & 0x0F); |
58 | 15.4k | int copy_len = 0; |
59 | 15.4k | int insert_len = 0; |
60 | 15.4k | unsigned char insert_byte = '\0'; |
61 | 15.4k | if (input + command_lengths[command] > (const unsigned char *)input_buf + input_len) { |
62 | 4 | return -1; |
63 | 4 | } |
64 | 15.4k | switch (command) { |
65 | 1.15k | case SAS_RLE_COMMAND_COPY64: |
66 | 1.15k | copy_len = (*input++) + 64 + length * 256; |
67 | 1.15k | break; |
68 | 63 | case SAS_RLE_COMMAND_COPY64_PLUS_4096: |
69 | 63 | copy_len = (*input++) + 64 + length * 256 + 4096; |
70 | 63 | break; |
71 | 357 | case SAS_RLE_COMMAND_COPY96: copy_len = length + 96; break; |
72 | 921 | case SAS_RLE_COMMAND_INSERT_BYTE18: |
73 | 921 | insert_len = (*input++) + 18 + length * 256; |
74 | 921 | insert_byte = *input++; |
75 | 921 | break; |
76 | 834 | case SAS_RLE_COMMAND_INSERT_AT17: |
77 | 834 | insert_len = (*input++) + 17 + length * 256; |
78 | 834 | insert_byte = '@'; |
79 | 834 | break; |
80 | 1.08k | case SAS_RLE_COMMAND_INSERT_BLANK17: |
81 | 1.08k | insert_len = (*input++) + 17 + length * 256; |
82 | 1.08k | insert_byte = ' '; |
83 | 1.08k | break; |
84 | 1.24k | case SAS_RLE_COMMAND_INSERT_ZERO17: |
85 | 1.24k | insert_len = (*input++) + 17 + length * 256; |
86 | 1.24k | insert_byte = '\0'; |
87 | 1.24k | break; |
88 | 751 | case SAS_RLE_COMMAND_COPY1: copy_len = length + 1; break; |
89 | 359 | case SAS_RLE_COMMAND_COPY17: copy_len = length + 17; break; |
90 | 273 | case SAS_RLE_COMMAND_COPY33: copy_len = length + 33; break; |
91 | 318 | case SAS_RLE_COMMAND_COPY49: copy_len = length + 49; break; |
92 | 625 | case SAS_RLE_COMMAND_INSERT_BYTE3: |
93 | 625 | insert_byte = *input++; |
94 | 625 | insert_len = length + 3; |
95 | 625 | break; |
96 | 844 | case SAS_RLE_COMMAND_INSERT_AT2: |
97 | 844 | insert_byte = '@'; |
98 | 844 | insert_len = length + 2; |
99 | 844 | break; |
100 | 1.88k | case SAS_RLE_COMMAND_INSERT_BLANK2: |
101 | 1.88k | insert_byte = ' '; |
102 | 1.88k | insert_len = length + 2; |
103 | 1.88k | break; |
104 | 2.44k | case SAS_RLE_COMMAND_INSERT_ZERO2: |
105 | 2.44k | insert_byte = '\0'; |
106 | 2.44k | insert_len = length + 2; |
107 | 2.44k | break; |
108 | 2.29k | default: |
109 | | /* error out here? */ |
110 | 2.29k | break; |
111 | 15.4k | } |
112 | 15.4k | if (copy_len) { |
113 | 3.27k | if (output_written + copy_len > output_len) { |
114 | 16 | return -1; |
115 | 16 | } |
116 | 3.25k | if (input + copy_len > (const unsigned char *)input_buf + input_len) { |
117 | 53 | return -1; |
118 | 53 | } |
119 | 3.20k | if (output) { |
120 | 3.20k | memcpy(&output[output_written], input, copy_len); |
121 | 3.20k | } |
122 | 3.20k | input += copy_len; |
123 | 3.20k | output_written += copy_len; |
124 | 3.20k | } |
125 | 15.3k | if (insert_len) { |
126 | 9.88k | if (output_written + insert_len > output_len) { |
127 | 20 | return -1; |
128 | 20 | } |
129 | 9.86k | if (output) { |
130 | 9.86k | memset(&output[output_written], insert_byte, insert_len); |
131 | 9.86k | } |
132 | 9.86k | output_written += insert_len; |
133 | 9.86k | } |
134 | 15.3k | } |
135 | | |
136 | 170 | return output_written; |
137 | 263 | } |
138 | | |
139 | 0 | static size_t sas_rle_measure_copy_run(size_t copy_run) { |
140 | 0 | size_t len = 0; |
141 | 0 | while (copy_run >= MAX_COPY_RUN) { |
142 | 0 | len += 2 + MAX_COPY_RUN; |
143 | 0 | copy_run -= MAX_COPY_RUN; |
144 | 0 | } |
145 | 0 | return len + (copy_run > 64) + (copy_run > 0) + copy_run; |
146 | 0 | } |
147 | | |
148 | | static size_t sas_rle_copy_run(unsigned char *output_buf, size_t offset, |
149 | 0 | const unsigned char *copy, size_t copy_run) { |
150 | 0 | unsigned char *out = output_buf + offset; |
151 | 0 | if (output_buf == NULL) |
152 | 0 | return sas_rle_measure_copy_run(copy_run); |
153 | | |
154 | 0 | while (copy_run >= MAX_COPY_RUN) { |
155 | 0 | *out++ = (SAS_RLE_COMMAND_COPY64 << 4) + 0x0F; |
156 | 0 | *out++ = 0xFF; |
157 | 0 | memcpy(out, copy, MAX_COPY_RUN); |
158 | 0 | out += MAX_COPY_RUN; |
159 | 0 | copy += MAX_COPY_RUN; |
160 | 0 | copy_run -= MAX_COPY_RUN; |
161 | 0 | } |
162 | |
|
163 | 0 | if (copy_run > 64) { |
164 | 0 | int length = (copy_run - 64) / 256; |
165 | 0 | unsigned char rem = (copy_run - 64) % 256; |
166 | 0 | *out++ = (SAS_RLE_COMMAND_COPY64 << 4) + (length & 0x0F); |
167 | 0 | *out++ = rem; |
168 | 0 | } else if (copy_run >= 49) { |
169 | 0 | *out++ = (SAS_RLE_COMMAND_COPY49 << 4) + (copy_run - 49); |
170 | 0 | } else if (copy_run >= 33) { |
171 | 0 | *out++ = (SAS_RLE_COMMAND_COPY33 << 4) + (copy_run - 33); |
172 | 0 | } else if (copy_run >= 17) { |
173 | 0 | *out++ = (SAS_RLE_COMMAND_COPY17 << 4) + (copy_run - 17); |
174 | 0 | } else if (copy_run >= 1) { |
175 | 0 | *out++ = (SAS_RLE_COMMAND_COPY1 << 4) + (copy_run - 1); |
176 | 0 | } |
177 | 0 | memcpy(out, copy, copy_run); |
178 | 0 | out += copy_run; |
179 | 0 | return out - (output_buf + offset); |
180 | 0 | } |
181 | | |
182 | 0 | static int sas_rle_is_special_byte(unsigned char last_byte) { |
183 | 0 | return (last_byte == '@' || last_byte == ' ' || last_byte == '\0'); |
184 | 0 | } |
185 | | |
186 | 0 | static size_t sas_rle_measure_insert_run(unsigned char last_byte, size_t insert_run) { |
187 | 0 | if (sas_rle_is_special_byte(last_byte)) |
188 | 0 | return insert_run > 17 ? 2 : 1; |
189 | | |
190 | 0 | return insert_run > 18 ? 3 : 2; |
191 | 0 | } |
192 | | |
193 | 0 | static size_t sas_rle_insert_run(unsigned char *output_buf, size_t offset, unsigned char last_byte, size_t insert_run) { |
194 | 0 | unsigned char *out = output_buf + offset; |
195 | 0 | if (output_buf == NULL) |
196 | 0 | return sas_rle_measure_insert_run(last_byte, insert_run); |
197 | | |
198 | 0 | if (sas_rle_is_special_byte(last_byte)) { |
199 | 0 | if (insert_run > 17) { |
200 | 0 | int length = (insert_run - 17) / 256; |
201 | 0 | unsigned char rem = (insert_run - 17) % 256; |
202 | 0 | if (last_byte == '@') { |
203 | 0 | *out++ = (SAS_RLE_COMMAND_INSERT_AT17 << 4) + (length & 0x0F); |
204 | 0 | } else if (last_byte == ' ') { |
205 | 0 | *out++ = (SAS_RLE_COMMAND_INSERT_BLANK17 << 4) + (length & 0x0F); |
206 | 0 | } else if (last_byte == '\0') { |
207 | 0 | *out++ = (SAS_RLE_COMMAND_INSERT_ZERO17 << 4) + (length & 0x0F); |
208 | 0 | } |
209 | 0 | *out++ = rem; |
210 | 0 | } else if (insert_run >= 2) { |
211 | 0 | if (last_byte == '@') { |
212 | 0 | *out++ = (SAS_RLE_COMMAND_INSERT_AT2 << 4) + (insert_run - 2); |
213 | 0 | } else if (last_byte == ' ') { |
214 | 0 | *out++ = (SAS_RLE_COMMAND_INSERT_BLANK2 << 4) + (insert_run - 2); |
215 | 0 | } else if (last_byte == '\0') { |
216 | 0 | *out++ = (SAS_RLE_COMMAND_INSERT_ZERO2 << 4) + (insert_run - 2); |
217 | 0 | } |
218 | 0 | } |
219 | 0 | } else if (insert_run > 18) { |
220 | 0 | int length = (insert_run - 18) / 256; |
221 | 0 | unsigned char rem = (insert_run - 18) % 256; |
222 | 0 | *out++ = (SAS_RLE_COMMAND_INSERT_BYTE18 << 4) + (length & 0x0F); |
223 | 0 | *out++ = rem; |
224 | 0 | *out++ = last_byte; |
225 | 0 | } else if (insert_run >= 3) { |
226 | 0 | *out++ = (SAS_RLE_COMMAND_INSERT_BYTE3 << 4) + (insert_run - 3); |
227 | 0 | *out++ = last_byte; |
228 | 0 | } |
229 | 0 | return out - (output_buf + offset); |
230 | 0 | } |
231 | | |
232 | 0 | static int sas_rle_is_insert_run(unsigned char last_byte, size_t insert_run) { |
233 | 0 | if (sas_rle_is_special_byte(last_byte)) |
234 | 0 | return (insert_run > 1); |
235 | | |
236 | 0 | return (insert_run > 2); |
237 | 0 | } |
238 | | |
239 | 0 | ssize_t sas_rle_compressed_len(const void *bytes, size_t len) { |
240 | 0 | return sas_rle_compress(NULL, 0, bytes, len); |
241 | 0 | } |
242 | | |
243 | | ssize_t sas_rle_compress(void *output_buf, size_t output_len, |
244 | 0 | const void *input_buf, size_t input_len) { |
245 | | /* TODO bounds check */ |
246 | 0 | const unsigned char *p = (const unsigned char *)input_buf; |
247 | 0 | const unsigned char *pe = p + input_len; |
248 | 0 | const unsigned char *copy = p; |
249 | |
|
250 | 0 | unsigned char *out = (unsigned char *)output_buf; |
251 | |
|
252 | 0 | size_t insert_run = 0; |
253 | 0 | size_t copy_run = 0; |
254 | 0 | size_t out_written = 0; |
255 | 0 | unsigned char last_byte = 0; |
256 | |
|
257 | 0 | while (p < pe) { |
258 | 0 | unsigned char c = *p; |
259 | 0 | if (insert_run == 0) { |
260 | 0 | insert_run = 1; |
261 | 0 | } else if (c == last_byte && insert_run < MAX_INSERT_RUN) { |
262 | 0 | insert_run++; |
263 | 0 | } else { |
264 | 0 | if (sas_rle_is_insert_run(last_byte, insert_run)) { |
265 | 0 | out_written += sas_rle_copy_run(out, out_written, copy, copy_run); |
266 | 0 | out_written += sas_rle_insert_run(out, out_written, last_byte, insert_run); |
267 | 0 | copy_run = 0; |
268 | 0 | copy = p; |
269 | 0 | } else { |
270 | 0 | copy_run += insert_run; |
271 | 0 | } |
272 | 0 | insert_run = 1; |
273 | 0 | } |
274 | 0 | last_byte = c; |
275 | 0 | p++; |
276 | 0 | } |
277 | |
|
278 | 0 | if (sas_rle_is_insert_run(last_byte, insert_run)) { |
279 | 0 | out_written += sas_rle_copy_run(out, out_written, copy, copy_run); |
280 | 0 | out_written += sas_rle_insert_run(out, out_written, last_byte, insert_run); |
281 | 0 | } else { |
282 | 0 | out_written += sas_rle_copy_run(out, out_written, copy, copy_run + insert_run); |
283 | 0 | } |
284 | |
|
285 | 0 | return out_written; |
286 | 0 | } |