/src/readstat/src/spss/readstat_sav_compress.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include <string.h> |
2 | | #include <stdint.h> |
3 | | |
4 | | #include "../readstat.h" |
5 | | #include "../readstat_bits.h" |
6 | | #include "../readstat_iconv.h" |
7 | | #include "readstat_sav.h" |
8 | | #include "readstat_sav_compress.h" |
9 | | |
10 | 0 | size_t sav_compressed_row_bound(size_t uncompressed_length) { |
11 | 0 | return uncompressed_length + (uncompressed_length/8 + 8)/8*8; |
12 | 0 | } |
13 | | |
14 | | size_t sav_compress_row(void *output_row, void *input_row, size_t input_len, |
15 | 0 | readstat_writer_t *writer) { |
16 | 0 | unsigned char *output = output_row; |
17 | 0 | unsigned char *input = input_row; |
18 | 0 | off_t input_offset = 0; |
19 | |
|
20 | 0 | off_t output_offset = 8; |
21 | 0 | off_t control_offset = 0; |
22 | 0 | int i; |
23 | |
|
24 | 0 | memset(&output[control_offset], 0, 8); |
25 | |
|
26 | 0 | for (i=0; i<writer->variables_count; i++) { |
27 | 0 | readstat_variable_t *variable = readstat_get_variable(writer, i); |
28 | 0 | if (variable->type == READSTAT_TYPE_STRING) { |
29 | 0 | size_t width = variable->storage_width; |
30 | 0 | while (width > 0) { |
31 | 0 | if (memcmp(&input[input_offset], SAV_EIGHT_SPACES, 8) == 0) { |
32 | 0 | output[control_offset++] = 254; |
33 | 0 | } else { |
34 | 0 | output[control_offset++] = 253; |
35 | 0 | memcpy(&output[output_offset], &input[input_offset], 8); |
36 | 0 | output_offset += 8; |
37 | 0 | } |
38 | 0 | if (control_offset % 8 == 0) { |
39 | 0 | control_offset = output_offset; |
40 | 0 | memset(&output[control_offset], 0, 8); |
41 | 0 | output_offset += 8; |
42 | 0 | } |
43 | 0 | input_offset += 8; |
44 | 0 | width -= 8; |
45 | 0 | } |
46 | 0 | } else { |
47 | 0 | uint64_t int_value; |
48 | 0 | memcpy(&int_value, &input[input_offset], 8); |
49 | 0 | if (int_value == SAV_MISSING_DOUBLE) { |
50 | 0 | output[control_offset++] = 255; |
51 | 0 | } else { |
52 | 0 | double fp_value; |
53 | 0 | memcpy(&fp_value, &input[input_offset], 8); |
54 | 0 | if (fp_value > -100 && fp_value < 152 && (int)fp_value == fp_value) { |
55 | 0 | output[control_offset++] = (int)fp_value + 100; |
56 | 0 | } else { |
57 | 0 | output[control_offset++] = 253; |
58 | 0 | memcpy(&output[output_offset], &input[input_offset], 8); |
59 | 0 | output_offset += 8; |
60 | 0 | } |
61 | 0 | } |
62 | 0 | if (control_offset % 8 == 0) { |
63 | 0 | control_offset = output_offset; |
64 | 0 | memset(&output[control_offset], 0, 8); |
65 | 0 | output_offset += 8; |
66 | 0 | } |
67 | 0 | input_offset += 8; |
68 | 0 | } |
69 | 0 | } |
70 | |
|
71 | 0 | if (writer->current_row + 1 == writer->row_count) |
72 | 0 | output[control_offset] = 252; |
73 | |
|
74 | 0 | return output_offset; |
75 | 0 | } |
76 | | |
77 | 2.26M | void sav_decompress_row(struct sav_row_stream_s *state) { |
78 | 2.26M | double fp_value; |
79 | 2.26M | uint64_t missing_value = state->bswap ? byteswap8(state->missing_value) : state->missing_value; |
80 | 2.26M | int i = 8 - state->i; |
81 | 3.09M | while (1) { |
82 | 3.09M | if (i == 8) { |
83 | 1.10M | if (state->avail_in < 8) { |
84 | 368 | state->status = SAV_ROW_STREAM_NEED_DATA; |
85 | 368 | goto done; |
86 | 368 | } |
87 | | |
88 | 1.10M | memcpy(state->chunk, state->next_in, 8); |
89 | 1.10M | state->next_in += 8; |
90 | 1.10M | state->avail_in -= 8; |
91 | 1.10M | i = 0; |
92 | 1.10M | } |
93 | | |
94 | 9.66M | while (i<8) { |
95 | 8.83M | switch (state->chunk[i]) { |
96 | 6.24M | case 0: |
97 | 6.24M | break; |
98 | 74 | case 252: |
99 | 74 | state->status = SAV_ROW_STREAM_FINISHED_ALL; |
100 | 74 | goto done; |
101 | 99.7k | case 253: |
102 | 99.7k | if (state->avail_in < 8) { |
103 | 70 | state->status = SAV_ROW_STREAM_NEED_DATA; |
104 | 70 | goto done; |
105 | 70 | } |
106 | 99.6k | memcpy(state->next_out, state->next_in, 8); |
107 | 99.6k | state->next_out += 8; |
108 | 99.6k | state->avail_out -= 8; |
109 | 99.6k | state->next_in += 8; |
110 | 99.6k | state->avail_in -= 8; |
111 | 99.6k | break; |
112 | 9.70k | case 254: |
113 | 9.70k | memset(state->next_out, ' ', 8); |
114 | 9.70k | state->next_out += 8; |
115 | 9.70k | state->avail_out -= 8; |
116 | 9.70k | break; |
117 | 153k | case 255: |
118 | 153k | memcpy(state->next_out, &missing_value, sizeof(uint64_t)); |
119 | 153k | state->next_out += 8; |
120 | 153k | state->avail_out -= 8; |
121 | 153k | break; |
122 | 2.33M | default: |
123 | 2.33M | fp_value = state->chunk[i] - state->bias; |
124 | 2.33M | fp_value = state->bswap ? byteswap_double(fp_value) : fp_value; |
125 | 2.33M | memcpy(state->next_out, &fp_value, sizeof(double)); |
126 | 2.33M | state->next_out += 8; |
127 | 2.33M | state->avail_out -= 8; |
128 | 2.33M | break; |
129 | 8.83M | } |
130 | 8.83M | i++; |
131 | 8.83M | if (state->avail_out < 8) { |
132 | 2.26M | state->status = SAV_ROW_STREAM_FINISHED_ROW; |
133 | 2.26M | goto done; |
134 | 2.26M | } |
135 | 8.83M | } |
136 | 3.08M | } |
137 | 2.26M | done: |
138 | 2.26M | state->i = 8 - i; |
139 | 2.26M | } |