Coverage Report

Created: 2025-06-09 07:42

/src/gdal/frmts/mrf/Packer_RLE.cpp
Line
Count
Source (jump to first uncovered line)
1
/*
2
Copyright 2016-2021 Esri
3
Licensed under the Apache License, Version 2.0 (the "License");
4
you may not use this file except in compliance with the License.
5
You may obtain a copy of the License at
6
http://www.apache.org/licenses/LICENSE-2.0
7
Unless required by applicable law or agreed to in writing, software
8
distributed under the License is distributed on an "AS IS" BASIS,
9
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
See the License for the specific language governing permissions and
11
limitations under the License.
12
13
Contributors:  Lucian Plesea
14
*/
15
16
//
17
// Implements an RLE codec packer.  The RLE uses a dedicated marker code
18
// This particular packer picks the least used byte value as marker,
19
// which makes the worst case data expansion more reasonable, at the expense
20
// of taking two passes over the input data
21
//
22
23
// For memset
24
#include <cstring>
25
#include <vector>
26
#include <algorithm>
27
28
#include "marfa.h"
29
#include "Packer_RLE.h"
30
31
NAMESPACE_MRF_START
32
33
0
Packer::~Packer() = default;
34
35
//
36
// RLE yarn codec, uses a dedicated code as marker, default value is 0xC3
37
// This is the C implementation, there is also a C++ one
38
// For yarnball compression, it performs better than using a double char marker
39
//
40
// Includes both encoder and decoder
41
// Worst case input:output ratio is 1:2, when the input is 1,2 or three marker
42
// codes For long input streams, the input:output ratio is between 13260.6:1
43
// (best) and 1:1.75 (worst)
44
//
45
// Could be a byte stream filter which needs very little local storage
46
//
47
48
constexpr int MAX_RUN = 768 + 0xffff;
49
typedef unsigned char Byte;
50
51
0
#define UC(X) static_cast<Byte>(X)
52
53
// Encode helper function
54
// It returns how many times the byte at *s is repeated
55
// a value between 1 and min(max_count, MAX_RUN)
56
inline static int run_length(const Byte *s, int max_count)
57
0
{
58
0
    if (max_count > MAX_RUN)
59
0
        max_count = MAX_RUN;
60
0
    const Byte c = *s++;
61
0
    for (int count = 1; count < max_count; count++)
62
0
        if (c != *s++)
63
0
            return count;
64
0
    return max_count;
65
0
}
66
67
#define RET_NOW                                                                \
68
0
    do                                                                         \
69
0
    {                                                                          \
70
0
        return static_cast<size_t>(next - reinterpret_cast<Byte *>(obuf));     \
71
0
    } while (0)
72
73
//
74
// C compress function, returns compressed size
75
// len is the size of the input buffer
76
// caller should ensure that output buffer is at least 2 * N to be safe,
77
// dropping to N * 7/4 for larger input
78
// If the Code is chosen to be the least present value in input, the
79
// output size requirement is bound by N / 256 + N
80
//
81
static size_t toYarn(const char *ibuffer, char *obuf, size_t len,
82
                     Byte CODE = 0xC3)
83
0
{
84
0
    Byte *next = reinterpret_cast<Byte *>(obuf);
85
86
0
    while (len)
87
0
    {
88
0
        Byte b = static_cast<Byte>(*ibuffer);
89
0
        int run = run_length(reinterpret_cast<const Byte *>(ibuffer),
90
0
                             static_cast<int>(len));
91
0
        if (run < 4)
92
0
        {  // Encoded as single bytes, stored as such, CODE followed by a zero
93
0
            run = 1;
94
0
            *next++ = b;
95
0
            if (CODE == b)
96
0
                *next++ = 0;
97
0
        }
98
0
        else
99
0
        {                    // Encoded as a sequence
100
0
            *next++ = CODE;  // Start with Marker code, always present
101
102
0
            if (run > 767)
103
0
            {                    // long sequence
104
0
                ibuffer += 768;  // May be unsafe to read *ibuffer
105
0
                len -= 768;
106
0
                run -= 768;
107
0
                *next++ = 3;
108
0
                *next++ = UC(run >> 8);  // Forced high count
109
0
            }
110
0
            else if (run > 255)
111
0
            {                            // Between 256 and 767, medium sequence
112
0
                *next++ = UC(run >> 8);  // High count
113
0
            }
114
115
0
            *next++ = UC(run & 0xff);  // Low count, always present
116
0
            *next++ = b;               // End with Value, always present
117
0
        }
118
0
        ibuffer += run;
119
0
        len -= run;
120
0
    }
121
0
    RET_NOW;
122
0
}
123
124
// Check that another input byte can be read, return now otherwise
125
// Adjusts the input length too
126
#define CHECK_INPUT                                                            \
127
0
    if (ilen == 0)                                                             \
128
0
    RET_NOW
129
// Reads a byte and adjust the input pointer
130
0
#define NEXT_BYTE UC(*ibuffer++)
131
132
//
133
// C decompress function, returns actual decompressed size
134
// Stops when either olen is reached or when ilen is exhausted
135
// returns the number of output bytes written
136
//
137
static size_t fromYarn(const char *ibuffer, size_t ilen, char *obuf,
138
                       size_t olen, Byte CODE = 0xC3)
139
0
{
140
0
    Byte *next = reinterpret_cast<Byte *>(obuf);
141
0
    while (ilen > 0 && olen > 0)
142
0
    {
143
        // It is safe to read and write one byte
144
0
        Byte b = NEXT_BYTE;
145
0
        ilen--;
146
0
        if (b != CODE)
147
0
        {  // Copy single chars
148
0
            *next++ = b;
149
0
            olen--;
150
0
        }
151
0
        else
152
0
        {  // Marker found, which type of sequence is it?
153
0
            CHECK_INPUT;
154
0
            b = NEXT_BYTE;
155
0
            ilen--;
156
0
            if (b == 0)
157
0
            {  // Emit one code
158
0
                *next++ = CODE;
159
0
                olen--;
160
0
            }
161
0
            else
162
0
            {  // Sequence
163
0
                size_t run = 0;
164
0
                if (b < 4)
165
0
                {
166
0
                    run = 256 * b;
167
0
                    if (3 == b)
168
0
                    {  // Second byte of high count
169
0
                        CHECK_INPUT;
170
0
                        run += 256 * NEXT_BYTE;
171
0
                        ilen--;
172
0
                    }
173
0
                    CHECK_INPUT;
174
0
                    run += NEXT_BYTE;
175
0
                    ilen--;
176
0
                }
177
0
                else
178
0
                {  // Single byte count
179
0
                    run = b;
180
0
                }
181
182
                // Write the sequence out, after checking
183
0
                if (olen < run)
184
0
                    RET_NOW;
185
0
                CHECK_INPUT;
186
0
                b = NEXT_BYTE;
187
0
                ilen--;
188
0
                memset(next, b, run);
189
190
0
                next += run;
191
0
                olen -= run;
192
0
            }
193
0
        }
194
0
    }
195
0
    RET_NOW;
196
0
}
197
198
// Returns the least used byte value from a buffer
199
static Byte getLeastUsed(const Byte *src, size_t len)
200
0
{
201
0
    std::vector<unsigned int> hist(256, 0);
202
0
    while (len)
203
0
    {
204
0
        --len;
205
0
        hist[*src++]++;
206
0
    }
207
0
    const size_t nIdxMin =
208
0
        std::min_element(hist.begin(), hist.end()) - hist.begin();
209
0
    return UC(nIdxMin);
210
0
}
211
212
// Read from a packed source until the src is exhausted
213
// Returns true if all output buffer was filled, 0 otherwise
214
int RLEC3Packer::load(storage_manager *src, storage_manager *dst)
215
0
{
216
    // Use the first char in the input buffer as marker code
217
0
    return dst->size == fromYarn(src->buffer + 1, src->size - 1, dst->buffer,
218
0
                                 dst->size, *src->buffer);
219
0
}
220
221
//
222
// Picks the least use value as the marker code and stores it first in the data
223
// This choice improves the worst case expansion, which becomes (1 + N / 256 +
224
// N) : N It also improves compression in general Makes best case marginally
225
// worse because the chosen code adds one byte to the output
226
//
227
228
int RLEC3Packer::store(storage_manager *src, storage_manager *dst)
229
0
{
230
0
    size_t N = src->size;
231
0
    if (dst->size < 1 + N + N / 256)
232
0
        return 0;  // Failed, destination might overflow
233
0
    Byte c =
234
0
        getLeastUsed(reinterpret_cast<const Byte *>(src->buffer), src->size);
235
0
    *dst->buffer++ = static_cast<char>(c);
236
0
    dst->size = 1 + toYarn(src->buffer, dst->buffer, src->size, c);
237
0
    return 1;  // Success, size is in dst->size
238
0
}
239
240
NAMESPACE_MRF_END