/src/gdal/frmts/mrf/Packer_RLE.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | Copyright 2016-2021 Esri |
3 | | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | you may not use this file except in compliance with the License. |
5 | | You may obtain a copy of the License at |
6 | | http://www.apache.org/licenses/LICENSE-2.0 |
7 | | Unless required by applicable law or agreed to in writing, software |
8 | | distributed under the License is distributed on an "AS IS" BASIS, |
9 | | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
10 | | See the License for the specific language governing permissions and |
11 | | limitations under the License. |
12 | | |
13 | | Contributors: Lucian Plesea |
14 | | */ |
15 | | |
16 | | // |
17 | | // Implements an RLE codec packer. The RLE uses a dedicated marker code |
18 | | // This particular packer picks the least used byte value as marker, |
19 | | // which makes the worst case data expansion more reasonable, at the expense |
20 | | // of taking two passes over the input data |
21 | | // |
22 | | |
23 | | // For memset |
24 | | #include <cstring> |
25 | | #include <vector> |
26 | | #include <algorithm> |
27 | | |
28 | | #include "marfa.h" |
29 | | #include "Packer_RLE.h" |
30 | | |
31 | | NAMESPACE_MRF_START |
32 | | |
33 | 0 | Packer::~Packer() = default; |
34 | | |
35 | | // |
36 | | // RLE yarn codec, uses a dedicated code as marker, default value is 0xC3 |
37 | | // This is the C implementation, there is also a C++ one |
38 | | // For yarnball compression, it performs better than using a double char marker |
39 | | // |
40 | | // Includes both encoder and decoder |
41 | | // Worst case input:output ratio is 1:2, when the input is 1,2 or three marker |
42 | | // codes For long input streams, the input:output ratio is between 13260.6:1 |
43 | | // (best) and 1:1.75 (worst) |
44 | | // |
45 | | // Could be a byte stream filter which needs very little local storage |
46 | | // |
47 | | |
48 | | constexpr int MAX_RUN = 768 + 0xffff; |
49 | | typedef unsigned char Byte; |
50 | | |
51 | 0 | #define UC(X) static_cast<Byte>(X) |
52 | | |
53 | | // Encode helper function |
54 | | // It returns how many times the byte at *s is repeated |
55 | | // a value between 1 and min(max_count, MAX_RUN) |
56 | | inline static int run_length(const Byte *s, int max_count) |
57 | 0 | { |
58 | 0 | if (max_count > MAX_RUN) |
59 | 0 | max_count = MAX_RUN; |
60 | 0 | const Byte c = *s++; |
61 | 0 | for (int count = 1; count < max_count; count++) |
62 | 0 | if (c != *s++) |
63 | 0 | return count; |
64 | 0 | return max_count; |
65 | 0 | } |
66 | | |
67 | | #define RET_NOW \ |
68 | 0 | do \ |
69 | 0 | { \ |
70 | 0 | return static_cast<size_t>(next - reinterpret_cast<Byte *>(obuf)); \ |
71 | 0 | } while (0) |
72 | | |
73 | | // |
74 | | // C compress function, returns compressed size |
75 | | // len is the size of the input buffer |
76 | | // caller should ensure that output buffer is at least 2 * N to be safe, |
77 | | // dropping to N * 7/4 for larger input |
78 | | // If the Code is chosen to be the least present value in input, the |
79 | | // output size requirement is bound by N / 256 + N |
80 | | // |
81 | | static size_t toYarn(const char *ibuffer, char *obuf, size_t len, |
82 | | Byte CODE = 0xC3) |
83 | 0 | { |
84 | 0 | Byte *next = reinterpret_cast<Byte *>(obuf); |
85 | |
|
86 | 0 | while (len) |
87 | 0 | { |
88 | 0 | Byte b = static_cast<Byte>(*ibuffer); |
89 | 0 | int run = run_length(reinterpret_cast<const Byte *>(ibuffer), |
90 | 0 | static_cast<int>(len)); |
91 | 0 | if (run < 4) |
92 | 0 | { // Encoded as single bytes, stored as such, CODE followed by a zero |
93 | 0 | run = 1; |
94 | 0 | *next++ = b; |
95 | 0 | if (CODE == b) |
96 | 0 | *next++ = 0; |
97 | 0 | } |
98 | 0 | else |
99 | 0 | { // Encoded as a sequence |
100 | 0 | *next++ = CODE; // Start with Marker code, always present |
101 | |
|
102 | 0 | if (run > 767) |
103 | 0 | { // long sequence |
104 | 0 | ibuffer += 768; // May be unsafe to read *ibuffer |
105 | 0 | len -= 768; |
106 | 0 | run -= 768; |
107 | 0 | *next++ = 3; |
108 | 0 | *next++ = UC(run >> 8); // Forced high count |
109 | 0 | } |
110 | 0 | else if (run > 255) |
111 | 0 | { // Between 256 and 767, medium sequence |
112 | 0 | *next++ = UC(run >> 8); // High count |
113 | 0 | } |
114 | |
|
115 | 0 | *next++ = UC(run & 0xff); // Low count, always present |
116 | 0 | *next++ = b; // End with Value, always present |
117 | 0 | } |
118 | 0 | ibuffer += run; |
119 | 0 | len -= run; |
120 | 0 | } |
121 | 0 | RET_NOW; |
122 | 0 | } |
123 | | |
124 | | // Check that another input byte can be read, return now otherwise |
125 | | // Adjusts the input length too |
126 | | #define CHECK_INPUT \ |
127 | 0 | if (ilen == 0) \ |
128 | 0 | RET_NOW |
129 | | // Reads a byte and adjust the input pointer |
130 | 0 | #define NEXT_BYTE UC(*ibuffer++) |
131 | | |
132 | | // |
133 | | // C decompress function, returns actual decompressed size |
134 | | // Stops when either olen is reached or when ilen is exhausted |
135 | | // returns the number of output bytes written |
136 | | // |
137 | | static size_t fromYarn(const char *ibuffer, size_t ilen, char *obuf, |
138 | | size_t olen, Byte CODE = 0xC3) |
139 | 0 | { |
140 | 0 | Byte *next = reinterpret_cast<Byte *>(obuf); |
141 | 0 | while (ilen > 0 && olen > 0) |
142 | 0 | { |
143 | | // It is safe to read and write one byte |
144 | 0 | Byte b = NEXT_BYTE; |
145 | 0 | ilen--; |
146 | 0 | if (b != CODE) |
147 | 0 | { // Copy single chars |
148 | 0 | *next++ = b; |
149 | 0 | olen--; |
150 | 0 | } |
151 | 0 | else |
152 | 0 | { // Marker found, which type of sequence is it? |
153 | 0 | CHECK_INPUT; |
154 | 0 | b = NEXT_BYTE; |
155 | 0 | ilen--; |
156 | 0 | if (b == 0) |
157 | 0 | { // Emit one code |
158 | 0 | *next++ = CODE; |
159 | 0 | olen--; |
160 | 0 | } |
161 | 0 | else |
162 | 0 | { // Sequence |
163 | 0 | size_t run = 0; |
164 | 0 | if (b < 4) |
165 | 0 | { |
166 | 0 | run = 256 * b; |
167 | 0 | if (3 == b) |
168 | 0 | { // Second byte of high count |
169 | 0 | CHECK_INPUT; |
170 | 0 | run += 256 * NEXT_BYTE; |
171 | 0 | ilen--; |
172 | 0 | } |
173 | 0 | CHECK_INPUT; |
174 | 0 | run += NEXT_BYTE; |
175 | 0 | ilen--; |
176 | 0 | } |
177 | 0 | else |
178 | 0 | { // Single byte count |
179 | 0 | run = b; |
180 | 0 | } |
181 | | |
182 | | // Write the sequence out, after checking |
183 | 0 | if (olen < run) |
184 | 0 | RET_NOW; |
185 | 0 | CHECK_INPUT; |
186 | 0 | b = NEXT_BYTE; |
187 | 0 | ilen--; |
188 | 0 | memset(next, b, run); |
189 | |
|
190 | 0 | next += run; |
191 | 0 | olen -= run; |
192 | 0 | } |
193 | 0 | } |
194 | 0 | } |
195 | 0 | RET_NOW; |
196 | 0 | } |
197 | | |
198 | | // Returns the least used byte value from a buffer |
199 | | static Byte getLeastUsed(const Byte *src, size_t len) |
200 | 0 | { |
201 | 0 | std::vector<unsigned int> hist(256, 0); |
202 | 0 | while (len) |
203 | 0 | { |
204 | 0 | --len; |
205 | 0 | hist[*src++]++; |
206 | 0 | } |
207 | 0 | const size_t nIdxMin = |
208 | 0 | std::min_element(hist.begin(), hist.end()) - hist.begin(); |
209 | 0 | return UC(nIdxMin); |
210 | 0 | } |
211 | | |
212 | | // Read from a packed source until the src is exhausted |
213 | | // Returns true if all output buffer was filled, 0 otherwise |
214 | | int RLEC3Packer::load(storage_manager *src, storage_manager *dst) |
215 | 0 | { |
216 | | // Use the first char in the input buffer as marker code |
217 | 0 | return dst->size == fromYarn(src->buffer + 1, src->size - 1, dst->buffer, |
218 | 0 | dst->size, *src->buffer); |
219 | 0 | } |
220 | | |
221 | | // |
222 | | // Picks the least use value as the marker code and stores it first in the data |
223 | | // This choice improves the worst case expansion, which becomes (1 + N / 256 + |
224 | | // N) : N It also improves compression in general Makes best case marginally |
225 | | // worse because the chosen code adds one byte to the output |
226 | | // |
227 | | |
228 | | int RLEC3Packer::store(storage_manager *src, storage_manager *dst) |
229 | 0 | { |
230 | 0 | size_t N = src->size; |
231 | 0 | if (dst->size < 1 + N + N / 256) |
232 | 0 | return 0; // Failed, destination might overflow |
233 | 0 | Byte c = |
234 | 0 | getLeastUsed(reinterpret_cast<const Byte *>(src->buffer), src->size); |
235 | 0 | *dst->buffer++ = static_cast<char>(c); |
236 | 0 | dst->size = 1 + toYarn(src->buffer, dst->buffer, src->size, c); |
237 | 0 | return 1; // Success, size is in dst->size |
238 | 0 | } |
239 | | |
240 | | NAMESPACE_MRF_END |