/src/gdal/frmts/mrf/Packer_RLE.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  | Copyright 2016-2021 Esri  | 
3  |  | Licensed under the Apache License, Version 2.0 (the "License");  | 
4  |  | you may not use this file except in compliance with the License.  | 
5  |  | You may obtain a copy of the License at  | 
6  |  | http://www.apache.org/licenses/LICENSE-2.0  | 
7  |  | Unless required by applicable law or agreed to in writing, software  | 
8  |  | distributed under the License is distributed on an "AS IS" BASIS,  | 
9  |  | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  | 
10  |  | See the License for the specific language governing permissions and  | 
11  |  | limitations under the License.  | 
12  |  |  | 
13  |  | Contributors:  Lucian Plesea  | 
14  |  | */  | 
15  |  |  | 
16  |  | //  | 
17  |  | // Implements an RLE codec packer.  The RLE uses a dedicated marker code  | 
18  |  | // This particular packer picks the least used byte value as marker,  | 
19  |  | // which makes the worst case data expansion more reasonable, at the expense  | 
20  |  | // of taking two passes over the input data  | 
21  |  | //  | 
22  |  |  | 
23  |  | // For memset  | 
24  |  | #include <cstring>  | 
25  |  | #include <vector>  | 
26  |  | #include <algorithm>  | 
27  |  |  | 
28  |  | #include "marfa.h"  | 
29  |  | #include "Packer_RLE.h"  | 
30  |  |  | 
31  |  | NAMESPACE_MRF_START  | 
32  |  |  | 
33  | 0  | Packer::~Packer() = default;  | 
34  |  |  | 
35  |  | //  | 
36  |  | // RLE yarn codec, uses a dedicated code as marker, default value is 0xC3  | 
37  |  | // This is the C implementation, there is also a C++ one  | 
38  |  | // For yarnball compression, it performs better than using a double char marker  | 
39  |  | //  | 
40  |  | // Includes both encoder and decoder  | 
41  |  | // Worst case input:output ratio is 1:2, when the input is 1,2 or three marker  | 
42  |  | // codes For long input streams, the input:output ratio is between 13260.6:1  | 
43  |  | // (best) and 1:1.75 (worst)  | 
44  |  | //  | 
45  |  | // Could be a byte stream filter which needs very little local storage  | 
46  |  | //  | 
47  |  |  | 
48  |  | constexpr int MAX_RUN = 768 + 0xffff;  | 
49  |  | typedef unsigned char Byte;  | 
50  |  |  | 
51  | 0  | #define UC(X) static_cast<Byte>(X)  | 
52  |  |  | 
53  |  | // Encode helper function  | 
54  |  | // It returns how many times the byte at *s is repeated  | 
55  |  | // a value between 1 and min(max_count, MAX_RUN)  | 
56  |  | inline static int run_length(const Byte *s, int max_count)  | 
57  | 0  | { | 
58  | 0  |     if (max_count > MAX_RUN)  | 
59  | 0  |         max_count = MAX_RUN;  | 
60  | 0  |     const Byte c = *s++;  | 
61  | 0  |     for (int count = 1; count < max_count; count++)  | 
62  | 0  |         if (c != *s++)  | 
63  | 0  |             return count;  | 
64  | 0  |     return max_count;  | 
65  | 0  | }  | 
66  |  |  | 
67  |  | #define RET_NOW                                                                \  | 
68  | 0  |     do                                                                         \  | 
69  | 0  |     {                                                                          \ | 
70  | 0  |         return static_cast<size_t>(next - reinterpret_cast<Byte *>(obuf));     \  | 
71  | 0  |     } while (0)  | 
72  |  |  | 
73  |  | //  | 
74  |  | // C compress function, returns compressed size  | 
75  |  | // len is the size of the input buffer  | 
76  |  | // caller should ensure that output buffer is at least 2 * N to be safe,  | 
77  |  | // dropping to N * 7/4 for larger input  | 
78  |  | // If the Code is chosen to be the least present value in input, the  | 
79  |  | // output size requirement is bound by N / 256 + N  | 
80  |  | //  | 
81  |  | static size_t toYarn(const char *ibuffer, char *obuf, size_t len,  | 
82  |  |                      Byte CODE = 0xC3)  | 
83  | 0  | { | 
84  | 0  |     Byte *next = reinterpret_cast<Byte *>(obuf);  | 
85  |  | 
  | 
86  | 0  |     while (len)  | 
87  | 0  |     { | 
88  | 0  |         Byte b = static_cast<Byte>(*ibuffer);  | 
89  | 0  |         int run = run_length(reinterpret_cast<const Byte *>(ibuffer),  | 
90  | 0  |                              static_cast<int>(len));  | 
91  | 0  |         if (run < 4)  | 
92  | 0  |         {  // Encoded as single bytes, stored as such, CODE followed by a zero | 
93  | 0  |             run = 1;  | 
94  | 0  |             *next++ = b;  | 
95  | 0  |             if (CODE == b)  | 
96  | 0  |                 *next++ = 0;  | 
97  | 0  |         }  | 
98  | 0  |         else  | 
99  | 0  |         {                    // Encoded as a sequence | 
100  | 0  |             *next++ = CODE;  // Start with Marker code, always present  | 
101  |  | 
  | 
102  | 0  |             if (run > 767)  | 
103  | 0  |             {                    // long sequence | 
104  | 0  |                 ibuffer += 768;  // May be unsafe to read *ibuffer  | 
105  | 0  |                 len -= 768;  | 
106  | 0  |                 run -= 768;  | 
107  | 0  |                 *next++ = 3;  | 
108  | 0  |                 *next++ = UC(run >> 8);  // Forced high count  | 
109  | 0  |             }  | 
110  | 0  |             else if (run > 255)  | 
111  | 0  |             {                            // Between 256 and 767, medium sequence | 
112  | 0  |                 *next++ = UC(run >> 8);  // High count  | 
113  | 0  |             }  | 
114  |  | 
  | 
115  | 0  |             *next++ = UC(run & 0xff);  // Low count, always present  | 
116  | 0  |             *next++ = b;               // End with Value, always present  | 
117  | 0  |         }  | 
118  | 0  |         ibuffer += run;  | 
119  | 0  |         len -= run;  | 
120  | 0  |     }  | 
121  | 0  |     RET_NOW;  | 
122  | 0  | }  | 
123  |  |  | 
124  |  | // Check that another input byte can be read, return now otherwise  | 
125  |  | // Adjusts the input length too  | 
126  |  | #define CHECK_INPUT                                                            \  | 
127  | 0  |     if (ilen == 0)                                                             \  | 
128  | 0  |     RET_NOW  | 
129  |  | // Reads a byte and adjust the input pointer  | 
130  | 0  | #define NEXT_BYTE UC(*ibuffer++)  | 
131  |  |  | 
132  |  | //  | 
133  |  | // C decompress function, returns actual decompressed size  | 
134  |  | // Stops when either olen is reached or when ilen is exhausted  | 
135  |  | // returns the number of output bytes written  | 
136  |  | //  | 
137  |  | static size_t fromYarn(const char *ibuffer, size_t ilen, char *obuf,  | 
138  |  |                        size_t olen, Byte CODE = 0xC3)  | 
139  | 0  | { | 
140  | 0  |     Byte *next = reinterpret_cast<Byte *>(obuf);  | 
141  | 0  |     while (ilen > 0 && olen > 0)  | 
142  | 0  |     { | 
143  |  |         // It is safe to read and write one byte  | 
144  | 0  |         Byte b = NEXT_BYTE;  | 
145  | 0  |         ilen--;  | 
146  | 0  |         if (b != CODE)  | 
147  | 0  |         {  // Copy single chars | 
148  | 0  |             *next++ = b;  | 
149  | 0  |             olen--;  | 
150  | 0  |         }  | 
151  | 0  |         else  | 
152  | 0  |         {  // Marker found, which type of sequence is it? | 
153  | 0  |             CHECK_INPUT;  | 
154  | 0  |             b = NEXT_BYTE;  | 
155  | 0  |             ilen--;  | 
156  | 0  |             if (b == 0)  | 
157  | 0  |             {  // Emit one code | 
158  | 0  |                 *next++ = CODE;  | 
159  | 0  |                 olen--;  | 
160  | 0  |             }  | 
161  | 0  |             else  | 
162  | 0  |             {  // Sequence | 
163  | 0  |                 size_t run = 0;  | 
164  | 0  |                 if (b < 4)  | 
165  | 0  |                 { | 
166  | 0  |                     run = 256 * b;  | 
167  | 0  |                     if (3 == b)  | 
168  | 0  |                     {  // Second byte of high count | 
169  | 0  |                         CHECK_INPUT;  | 
170  | 0  |                         run += 256 * NEXT_BYTE;  | 
171  | 0  |                         ilen--;  | 
172  | 0  |                     }  | 
173  | 0  |                     CHECK_INPUT;  | 
174  | 0  |                     run += NEXT_BYTE;  | 
175  | 0  |                     ilen--;  | 
176  | 0  |                 }  | 
177  | 0  |                 else  | 
178  | 0  |                 {  // Single byte count | 
179  | 0  |                     run = b;  | 
180  | 0  |                 }  | 
181  |  |  | 
182  |  |                 // Write the sequence out, after checking  | 
183  | 0  |                 if (olen < run)  | 
184  | 0  |                     RET_NOW;  | 
185  | 0  |                 CHECK_INPUT;  | 
186  | 0  |                 b = NEXT_BYTE;  | 
187  | 0  |                 ilen--;  | 
188  | 0  |                 memset(next, b, run);  | 
189  |  | 
  | 
190  | 0  |                 next += run;  | 
191  | 0  |                 olen -= run;  | 
192  | 0  |             }  | 
193  | 0  |         }  | 
194  | 0  |     }  | 
195  | 0  |     RET_NOW;  | 
196  | 0  | }  | 
197  |  |  | 
198  |  | // Returns the least used byte value from a buffer  | 
199  |  | static Byte getLeastUsed(const Byte *src, size_t len)  | 
200  | 0  | { | 
201  | 0  |     std::vector<unsigned int> hist(256, 0);  | 
202  | 0  |     while (len)  | 
203  | 0  |     { | 
204  | 0  |         --len;  | 
205  | 0  |         hist[*src++]++;  | 
206  | 0  |     }  | 
207  | 0  |     const size_t nIdxMin =  | 
208  | 0  |         std::min_element(hist.begin(), hist.end()) - hist.begin();  | 
209  | 0  |     return UC(nIdxMin);  | 
210  | 0  | }  | 
211  |  |  | 
212  |  | // Read from a packed source until the src is exhausted  | 
213  |  | // Returns true if all output buffer was filled, 0 otherwise  | 
214  |  | int RLEC3Packer::load(storage_manager *src, storage_manager *dst)  | 
215  | 0  | { | 
216  |  |     // Use the first char in the input buffer as marker code  | 
217  | 0  |     return dst->size == fromYarn(src->buffer + 1, src->size - 1, dst->buffer,  | 
218  | 0  |                                  dst->size, *src->buffer);  | 
219  | 0  | }  | 
220  |  |  | 
221  |  | //  | 
222  |  | // Picks the least use value as the marker code and stores it first in the data  | 
223  |  | // This choice improves the worst case expansion, which becomes (1 + N / 256 +  | 
224  |  | // N) : N It also improves compression in general Makes best case marginally  | 
225  |  | // worse because the chosen code adds one byte to the output  | 
226  |  | //  | 
227  |  |  | 
228  |  | int RLEC3Packer::store(storage_manager *src, storage_manager *dst)  | 
229  | 0  | { | 
230  | 0  |     size_t N = src->size;  | 
231  | 0  |     if (dst->size < 1 + N + N / 256)  | 
232  | 0  |         return 0;  // Failed, destination might overflow  | 
233  | 0  |     Byte c =  | 
234  | 0  |         getLeastUsed(reinterpret_cast<const Byte *>(src->buffer), src->size);  | 
235  | 0  |     *dst->buffer++ = static_cast<char>(c);  | 
236  | 0  |     dst->size = 1 + toYarn(src->buffer, dst->buffer, src->size, c);  | 
237  | 0  |     return 1;  // Success, size is in dst->size  | 
238  | 0  | }  | 
239  |  |  | 
240  |  | NAMESPACE_MRF_END  |