Coverage Report

Created: 2025-01-09 06:41

/src/readstat/src/sas/readstat_sas_rle.c
Line
Count
Source (jump to first uncovered line)
1
2
#include <sys/types.h>
3
#include <string.h>
4
#include <stdio.h>
5
6
#if defined(_MSC_VER)
7
#include <BaseTsd.h>
8
typedef SSIZE_T ssize_t;
9
#endif
10
11
#include "readstat_sas_rle.h"
12
13
1.15k
#define SAS_RLE_COMMAND_COPY64          0
14
63
#define SAS_RLE_COMMAND_COPY64_PLUS_4096 1
15
357
#define SAS_RLE_COMMAND_COPY96          2
16
921
#define SAS_RLE_COMMAND_INSERT_BYTE18   4
17
834
#define SAS_RLE_COMMAND_INSERT_AT17     5
18
1.08k
#define SAS_RLE_COMMAND_INSERT_BLANK17  6
19
1.24k
#define SAS_RLE_COMMAND_INSERT_ZERO17   7
20
751
#define SAS_RLE_COMMAND_COPY1           8
21
359
#define SAS_RLE_COMMAND_COPY17          9
22
273
#define SAS_RLE_COMMAND_COPY33         10
23
318
#define SAS_RLE_COMMAND_COPY49         11
24
625
#define SAS_RLE_COMMAND_INSERT_BYTE3   12
25
844
#define SAS_RLE_COMMAND_INSERT_AT2     13
26
1.88k
#define SAS_RLE_COMMAND_INSERT_BLANK2  14
27
2.44k
#define SAS_RLE_COMMAND_INSERT_ZERO2   15
28
29
0
#define MAX_INSERT_RUN 4112 // 4095 + 17
30
0
#define MAX_COPY_RUN 4159 // 4095 + 64
31
32
static size_t command_lengths[16] = {
33
    [SAS_RLE_COMMAND_COPY64] = 1,
34
    [SAS_RLE_COMMAND_COPY64_PLUS_4096] = 1,
35
    [SAS_RLE_COMMAND_INSERT_BYTE18] = 2,
36
    [SAS_RLE_COMMAND_INSERT_AT17] = 1,
37
    [SAS_RLE_COMMAND_INSERT_BLANK17] = 1,
38
    [SAS_RLE_COMMAND_INSERT_ZERO17] = 1,
39
    [SAS_RLE_COMMAND_INSERT_BYTE3] = 1
40
};
41
42
0
ssize_t sas_rle_decompressed_len(const void *input_buf, size_t input_len) {
43
0
    return sas_rle_decompress(NULL, 0, input_buf, input_len);
44
0
}
45
46
ssize_t sas_rle_decompress(void *output_buf, size_t output_len, 
47
263
        const void *input_buf, size_t input_len) {
48
263
    unsigned char *buffer = (unsigned char *)output_buf;
49
263
    unsigned char *output = buffer;
50
263
    size_t output_written = 0;
51
52
263
    const unsigned char *input = (const unsigned char *)input_buf;
53
54
15.6k
    while (input < (const unsigned char *)input_buf + input_len) {
55
15.4k
        unsigned char control = *input++;
56
15.4k
        unsigned char command = (control & 0xF0) >> 4;
57
15.4k
        unsigned char length = (control & 0x0F);
58
15.4k
        int copy_len = 0;
59
15.4k
        int insert_len = 0;
60
15.4k
        unsigned char insert_byte = '\0';
61
15.4k
        if (input + command_lengths[command] > (const unsigned char *)input_buf + input_len) {
62
4
            return -1;
63
4
        }
64
15.4k
        switch (command) {
65
1.15k
            case SAS_RLE_COMMAND_COPY64:
66
1.15k
                copy_len = (*input++) + 64 + length * 256;
67
1.15k
                break;
68
63
            case SAS_RLE_COMMAND_COPY64_PLUS_4096:
69
63
                copy_len = (*input++) + 64 + length * 256 + 4096;
70
63
                break;
71
357
            case SAS_RLE_COMMAND_COPY96: copy_len = length + 96; break;
72
921
            case SAS_RLE_COMMAND_INSERT_BYTE18:
73
921
                insert_len = (*input++) + 18 + length * 256;
74
921
                insert_byte = *input++;
75
921
                break;
76
834
            case SAS_RLE_COMMAND_INSERT_AT17:
77
834
                insert_len = (*input++) + 17 + length * 256;
78
834
                insert_byte = '@';
79
834
                break;
80
1.08k
            case SAS_RLE_COMMAND_INSERT_BLANK17:
81
1.08k
                insert_len = (*input++) + 17 + length * 256;
82
1.08k
                insert_byte = ' ';
83
1.08k
                break;
84
1.24k
            case SAS_RLE_COMMAND_INSERT_ZERO17:
85
1.24k
                insert_len = (*input++) + 17 + length * 256;
86
1.24k
                insert_byte = '\0';
87
1.24k
                break;
88
751
            case SAS_RLE_COMMAND_COPY1:  copy_len = length + 1; break;
89
359
            case SAS_RLE_COMMAND_COPY17: copy_len = length + 17; break;
90
273
            case SAS_RLE_COMMAND_COPY33: copy_len = length + 33; break;
91
318
            case SAS_RLE_COMMAND_COPY49: copy_len = length + 49; break;
92
625
            case SAS_RLE_COMMAND_INSERT_BYTE3:
93
625
                insert_byte = *input++;
94
625
                insert_len = length + 3;
95
625
                break;
96
844
            case SAS_RLE_COMMAND_INSERT_AT2:
97
844
                insert_byte = '@';
98
844
                insert_len = length + 2;
99
844
                break;
100
1.88k
            case SAS_RLE_COMMAND_INSERT_BLANK2:
101
1.88k
                insert_byte = ' ';
102
1.88k
                insert_len = length + 2;
103
1.88k
                break;
104
2.44k
            case SAS_RLE_COMMAND_INSERT_ZERO2:
105
2.44k
                insert_byte = '\0';
106
2.44k
                insert_len = length + 2;
107
2.44k
                break;
108
2.29k
            default:
109
                /* error out here? */
110
2.29k
                break;
111
15.4k
        }
112
15.4k
        if (copy_len) {
113
3.27k
            if (output_written + copy_len > output_len) {
114
16
                return -1;
115
16
            }
116
3.25k
            if (input + copy_len > (const unsigned char *)input_buf + input_len) {
117
53
                return -1;
118
53
            }
119
3.20k
            if (output) {
120
3.20k
                memcpy(&output[output_written], input, copy_len);
121
3.20k
            }
122
3.20k
            input += copy_len;
123
3.20k
            output_written += copy_len;
124
3.20k
        }
125
15.3k
        if (insert_len) {
126
9.88k
            if (output_written + insert_len > output_len) {
127
20
                return -1;
128
20
            }
129
9.86k
            if (output) {
130
9.86k
                memset(&output[output_written], insert_byte, insert_len);
131
9.86k
            }
132
9.86k
            output_written += insert_len;
133
9.86k
        }
134
15.3k
    }
135
136
170
    return output_written;
137
263
}
138
139
0
static size_t sas_rle_measure_copy_run(size_t copy_run) {
140
0
    size_t len = 0;
141
0
    while (copy_run >= MAX_COPY_RUN) {
142
0
        len += 2 + MAX_COPY_RUN;
143
0
        copy_run -= MAX_COPY_RUN;
144
0
    }
145
0
    return len + (copy_run > 64) + (copy_run > 0) + copy_run;
146
0
}
147
148
static size_t sas_rle_copy_run(unsigned char *output_buf, size_t offset,
149
0
        const unsigned char *copy, size_t copy_run) {
150
0
    unsigned char *out = output_buf + offset;
151
0
    if (output_buf == NULL)
152
0
        return sas_rle_measure_copy_run(copy_run);
153
154
0
    while (copy_run >= MAX_COPY_RUN) {
155
0
        *out++ = (SAS_RLE_COMMAND_COPY64 << 4) + 0x0F;
156
0
        *out++ = 0xFF;
157
0
        memcpy(out, copy, MAX_COPY_RUN);
158
0
        out += MAX_COPY_RUN;
159
0
        copy += MAX_COPY_RUN;
160
0
        copy_run -= MAX_COPY_RUN;
161
0
    }
162
163
0
    if (copy_run > 64) {
164
0
        int length = (copy_run - 64) / 256;
165
0
        unsigned char rem = (copy_run - 64) % 256;
166
0
        *out++ = (SAS_RLE_COMMAND_COPY64 << 4) + (length & 0x0F);
167
0
        *out++ = rem;
168
0
    } else if (copy_run >= 49) {
169
0
        *out++ = (SAS_RLE_COMMAND_COPY49 << 4) + (copy_run - 49);
170
0
    } else if (copy_run >= 33) {
171
0
        *out++ = (SAS_RLE_COMMAND_COPY33 << 4) + (copy_run - 33);
172
0
    } else if (copy_run >= 17) {
173
0
        *out++ = (SAS_RLE_COMMAND_COPY17 << 4) + (copy_run - 17);
174
0
    } else if (copy_run >= 1) {
175
0
        *out++ = (SAS_RLE_COMMAND_COPY1 << 4) + (copy_run - 1);
176
0
    }
177
0
    memcpy(out, copy, copy_run);
178
0
    out += copy_run;
179
0
    return out - (output_buf + offset);
180
0
}
181
182
0
static int sas_rle_is_special_byte(unsigned char last_byte) {
183
0
    return (last_byte == '@' || last_byte == ' ' || last_byte == '\0');
184
0
}
185
186
0
static size_t sas_rle_measure_insert_run(unsigned char last_byte, size_t insert_run) {
187
0
    if (sas_rle_is_special_byte(last_byte))
188
0
        return insert_run > 17 ? 2 : 1;
189
190
0
    return insert_run > 18 ? 3 : 2;
191
0
}
192
193
0
static size_t sas_rle_insert_run(unsigned char *output_buf, size_t offset, unsigned char last_byte, size_t insert_run) {
194
0
    unsigned char *out = output_buf + offset;
195
0
    if (output_buf == NULL)
196
0
        return sas_rle_measure_insert_run(last_byte, insert_run);
197
198
0
    if (sas_rle_is_special_byte(last_byte)) {
199
0
        if (insert_run > 17) {
200
0
            int length = (insert_run - 17) / 256;
201
0
            unsigned char rem = (insert_run - 17) % 256;
202
0
            if (last_byte == '@') {
203
0
                *out++ = (SAS_RLE_COMMAND_INSERT_AT17 << 4) + (length & 0x0F);
204
0
            } else if (last_byte == ' ') {
205
0
                *out++ = (SAS_RLE_COMMAND_INSERT_BLANK17 << 4) + (length & 0x0F);
206
0
            } else if (last_byte == '\0') {
207
0
                *out++ = (SAS_RLE_COMMAND_INSERT_ZERO17 << 4) + (length & 0x0F);
208
0
            }
209
0
            *out++ = rem;
210
0
        } else if (insert_run >= 2) {
211
0
            if (last_byte == '@') {
212
0
                *out++ = (SAS_RLE_COMMAND_INSERT_AT2 << 4) + (insert_run - 2);
213
0
            } else if (last_byte == ' ') {
214
0
                *out++ = (SAS_RLE_COMMAND_INSERT_BLANK2 << 4) + (insert_run - 2);
215
0
            } else if (last_byte == '\0') {
216
0
                *out++ = (SAS_RLE_COMMAND_INSERT_ZERO2 << 4) + (insert_run - 2);
217
0
            }
218
0
        }
219
0
    } else if (insert_run > 18) {
220
0
        int length = (insert_run - 18) / 256;
221
0
        unsigned char rem = (insert_run - 18) % 256;
222
0
        *out++ = (SAS_RLE_COMMAND_INSERT_BYTE18 << 4) + (length & 0x0F);
223
0
        *out++ = rem;
224
0
        *out++ = last_byte;
225
0
    } else if (insert_run >= 3) {
226
0
        *out++ = (SAS_RLE_COMMAND_INSERT_BYTE3 << 4) + (insert_run - 3);
227
0
        *out++ = last_byte;
228
0
    }
229
0
    return out - (output_buf + offset);
230
0
}
231
232
0
static int sas_rle_is_insert_run(unsigned char last_byte, size_t insert_run) {
233
0
    if (sas_rle_is_special_byte(last_byte))
234
0
        return (insert_run > 1);
235
236
0
    return (insert_run > 2);
237
0
}
238
239
0
ssize_t sas_rle_compressed_len(const void *bytes, size_t len) {
240
0
    return sas_rle_compress(NULL, 0, bytes, len);
241
0
}
242
243
ssize_t sas_rle_compress(void *output_buf, size_t output_len,
244
0
        const void *input_buf, size_t input_len) {
245
    /* TODO bounds check */
246
0
    const unsigned char *p = (const unsigned char *)input_buf;
247
0
    const unsigned char *pe = p + input_len;
248
0
    const unsigned char *copy = p;
249
250
0
    unsigned char *out = (unsigned char *)output_buf;
251
252
0
    size_t insert_run = 0;
253
0
    size_t copy_run = 0;
254
0
    size_t out_written = 0;
255
0
    unsigned char last_byte = 0;
256
257
0
    while (p < pe) {
258
0
        unsigned char c = *p;
259
0
        if (insert_run == 0) {
260
0
            insert_run = 1;
261
0
        } else if (c == last_byte && insert_run < MAX_INSERT_RUN) {
262
0
            insert_run++;
263
0
        } else {
264
0
            if (sas_rle_is_insert_run(last_byte, insert_run)) {
265
0
                out_written += sas_rle_copy_run(out, out_written, copy, copy_run);
266
0
                out_written += sas_rle_insert_run(out, out_written, last_byte, insert_run);
267
0
                copy_run = 0;
268
0
                copy = p;
269
0
            } else {
270
0
                copy_run += insert_run;
271
0
            }
272
0
            insert_run = 1;
273
0
        }
274
0
        last_byte = c;
275
0
        p++;
276
0
    }
277
278
0
    if (sas_rle_is_insert_run(last_byte, insert_run)) {
279
0
        out_written += sas_rle_copy_run(out, out_written, copy, copy_run);
280
0
        out_written += sas_rle_insert_run(out, out_written, last_byte, insert_run);
281
0
    } else {
282
0
        out_written += sas_rle_copy_run(out, out_written, copy, copy_run + insert_run);
283
0
    }
284
285
0
    return out_written;
286
0
}