/src/openjpeg/src/lib/openjp2/sparse_array.c
Line | Count | Source |
1 | | /* |
2 | | * The copyright in this software is being made available under the 2-clauses |
3 | | * BSD License, included below. This software may be subject to other third |
4 | | * party and contributor rights, including patent rights, and no such rights |
5 | | * are granted under this license. |
6 | | * |
7 | | * Copyright (c) 2017, IntoPix SA <contact@intopix.com> |
8 | | * All rights reserved. |
9 | | * |
10 | | * Redistribution and use in source and binary forms, with or without |
11 | | * modification, are permitted provided that the following conditions |
12 | | * are met: |
13 | | * 1. Redistributions of source code must retain the above copyright |
14 | | * notice, this list of conditions and the following disclaimer. |
15 | | * 2. Redistributions in binary form must reproduce the above copyright |
16 | | * notice, this list of conditions and the following disclaimer in the |
17 | | * documentation and/or other materials provided with the distribution. |
18 | | * |
19 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS' |
20 | | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
21 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
22 | | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
23 | | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
24 | | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
25 | | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
26 | | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
27 | | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
28 | | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 | | * POSSIBILITY OF SUCH DAMAGE. |
30 | | */ |
31 | | |
32 | | #include "opj_includes.h" |
33 | | |
34 | | |
35 | | struct opj_sparse_array_int32 { |
36 | | OPJ_UINT32 width; |
37 | | OPJ_UINT32 height; |
38 | | OPJ_UINT32 block_width; |
39 | | OPJ_UINT32 block_height; |
40 | | OPJ_UINT32 block_count_hor; |
41 | | OPJ_UINT32 block_count_ver; |
42 | | OPJ_INT32** data_blocks; |
43 | | }; |
44 | | |
45 | | opj_sparse_array_int32_t* opj_sparse_array_int32_create(OPJ_UINT32 width, |
46 | | OPJ_UINT32 height, |
47 | | OPJ_UINT32 block_width, |
48 | | OPJ_UINT32 block_height) |
49 | 90.9k | { |
50 | 90.9k | opj_sparse_array_int32_t* sa; |
51 | | |
52 | 90.9k | if (width == 0 || height == 0 || block_width == 0 || block_height == 0) { |
53 | 0 | return NULL; |
54 | 0 | } |
55 | 90.9k | if (block_width > ((OPJ_UINT32)~0U) / block_height / sizeof(OPJ_INT32)) { |
56 | 0 | return NULL; |
57 | 0 | } |
58 | | |
59 | 90.9k | sa = (opj_sparse_array_int32_t*) opj_calloc(1, |
60 | 90.9k | sizeof(opj_sparse_array_int32_t)); |
61 | 90.9k | sa->width = width; |
62 | 90.9k | sa->height = height; |
63 | 90.9k | sa->block_width = block_width; |
64 | 90.9k | sa->block_height = block_height; |
65 | 90.9k | sa->block_count_hor = opj_uint_ceildiv(width, block_width); |
66 | 90.9k | sa->block_count_ver = opj_uint_ceildiv(height, block_height); |
67 | 90.9k | if (sa->block_count_hor > ((OPJ_UINT32)~0U) / sa->block_count_ver) { |
68 | 0 | opj_free(sa); |
69 | 0 | return NULL; |
70 | 0 | } |
71 | 90.9k | sa->data_blocks = (OPJ_INT32**) opj_calloc(sizeof(OPJ_INT32*), |
72 | 90.9k | (size_t) sa->block_count_hor * sa->block_count_ver); |
73 | 90.9k | if (sa->data_blocks == NULL) { |
74 | 0 | opj_free(sa); |
75 | 0 | return NULL; |
76 | 0 | } |
77 | | |
78 | 90.9k | return sa; |
79 | 90.9k | } |
80 | | |
81 | | void opj_sparse_array_int32_free(opj_sparse_array_int32_t* sa) |
82 | 90.9k | { |
83 | 90.9k | if (sa) { |
84 | 90.9k | OPJ_UINT32 i; |
85 | 2.02M | for (i = 0; i < sa->block_count_hor * sa->block_count_ver; i++) { |
86 | 1.93M | if (sa->data_blocks[i]) { |
87 | 1.61M | opj_free(sa->data_blocks[i]); |
88 | 1.61M | } |
89 | 1.93M | } |
90 | 90.9k | opj_free(sa->data_blocks); |
91 | 90.9k | opj_free(sa); |
92 | 90.9k | } |
93 | 90.9k | } |
94 | | |
95 | | OPJ_BOOL opj_sparse_array_is_region_valid(const opj_sparse_array_int32_t* sa, |
96 | | OPJ_UINT32 x0, |
97 | | OPJ_UINT32 y0, |
98 | | OPJ_UINT32 x1, |
99 | | OPJ_UINT32 y1) |
100 | 50.6M | { |
101 | 50.6M | return !(x0 >= sa->width || x1 <= x0 || x1 > sa->width || |
102 | 48.8M | y0 >= sa->height || y1 <= y0 || y1 > sa->height); |
103 | 50.6M | } |
104 | | |
105 | | static OPJ_BOOL opj_sparse_array_int32_read_or_write( |
106 | | const opj_sparse_array_int32_t* sa, |
107 | | OPJ_UINT32 x0, |
108 | | OPJ_UINT32 y0, |
109 | | OPJ_UINT32 x1, |
110 | | OPJ_UINT32 y1, |
111 | | OPJ_INT32* buf, |
112 | | OPJ_UINT32 buf_col_stride, |
113 | | OPJ_UINT32 buf_line_stride, |
114 | | OPJ_BOOL forgiving, |
115 | | OPJ_BOOL is_read_op) |
116 | 50.6M | { |
117 | 50.6M | OPJ_UINT32 y, block_y; |
118 | 50.6M | OPJ_UINT32 y_incr = 0; |
119 | 50.6M | const OPJ_UINT32 block_width = sa->block_width; |
120 | | |
121 | 50.6M | if (!opj_sparse_array_is_region_valid(sa, x0, y0, x1, y1)) { |
122 | 2.22M | return forgiving; |
123 | 2.22M | } |
124 | | |
125 | 48.4M | block_y = y0 / sa->block_height; |
126 | 109M | for (y = y0; y < y1; block_y ++, y += y_incr) { |
127 | 60.5M | OPJ_UINT32 x, block_x; |
128 | 60.5M | OPJ_UINT32 x_incr = 0; |
129 | 60.5M | OPJ_UINT32 block_y_offset; |
130 | 60.5M | y_incr = (y == y0) ? sa->block_height - (y0 % sa->block_height) : |
131 | 60.5M | sa->block_height; |
132 | 60.5M | block_y_offset = sa->block_height - y_incr; |
133 | 60.5M | y_incr = opj_uint_min(y_incr, y1 - y); |
134 | 60.5M | block_x = x0 / block_width; |
135 | 158M | for (x = x0; x < x1; block_x ++, x += x_incr) { |
136 | 97.6M | OPJ_UINT32 j; |
137 | 97.6M | OPJ_UINT32 block_x_offset; |
138 | 97.6M | OPJ_INT32* src_block; |
139 | 97.6M | x_incr = (x == x0) ? block_width - (x0 % block_width) : block_width; |
140 | 97.6M | block_x_offset = block_width - x_incr; |
141 | 97.6M | x_incr = opj_uint_min(x_incr, x1 - x); |
142 | 97.6M | src_block = sa->data_blocks[block_y * sa->block_count_hor + block_x]; |
143 | 97.6M | if (is_read_op) { |
144 | 40.2M | if (src_block == NULL) { |
145 | 5.78k | if (buf_col_stride == 1) { |
146 | 0 | OPJ_INT32* dest_ptr = buf + (y - y0) * (OPJ_SIZE_T)buf_line_stride + |
147 | 0 | (x - x0) * buf_col_stride; |
148 | 0 | for (j = 0; j < y_incr; j++) { |
149 | 0 | memset(dest_ptr, 0, sizeof(OPJ_INT32) * x_incr); |
150 | 0 | dest_ptr += buf_line_stride; |
151 | 0 | } |
152 | 5.78k | } else { |
153 | 5.78k | OPJ_INT32* dest_ptr = buf + (y - y0) * (OPJ_SIZE_T)buf_line_stride + |
154 | 5.78k | (x - x0) * buf_col_stride; |
155 | 11.5k | for (j = 0; j < y_incr; j++) { |
156 | 5.78k | OPJ_UINT32 k; |
157 | 250k | for (k = 0; k < x_incr; k++) { |
158 | 244k | dest_ptr[k * buf_col_stride] = 0; |
159 | 244k | } |
160 | 5.78k | dest_ptr += buf_line_stride; |
161 | 5.78k | } |
162 | 5.78k | } |
163 | 40.1M | } else { |
164 | 40.1M | const OPJ_INT32* OPJ_RESTRICT src_ptr = src_block + block_y_offset * |
165 | 40.1M | (OPJ_SIZE_T)block_width + block_x_offset; |
166 | 40.1M | if (buf_col_stride == 1) { |
167 | 8.12M | OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (OPJ_SIZE_T)buf_line_stride |
168 | 8.12M | + |
169 | 8.12M | (x - x0) * buf_col_stride; |
170 | 8.12M | if (x_incr == 4) { |
171 | | /* Same code as general branch, but the compiler */ |
172 | | /* can have an efficient memcpy() */ |
173 | 4.84M | (void)(x_incr); /* trick to silent cppcheck duplicateBranch warning */ |
174 | 226M | for (j = 0; j < y_incr; j++) { |
175 | 221M | memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr); |
176 | 221M | dest_ptr += buf_line_stride; |
177 | 221M | src_ptr += block_width; |
178 | 221M | } |
179 | 4.84M | } else { |
180 | 153M | for (j = 0; j < y_incr; j++) { |
181 | 149M | memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr); |
182 | 149M | dest_ptr += buf_line_stride; |
183 | 149M | src_ptr += block_width; |
184 | 149M | } |
185 | 3.28M | } |
186 | 32.0M | } else { |
187 | 32.0M | OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (OPJ_SIZE_T)buf_line_stride |
188 | 32.0M | + |
189 | 32.0M | (x - x0) * buf_col_stride; |
190 | 32.0M | if (x_incr == 1) { |
191 | 4.07M | for (j = 0; j < y_incr; j++) { |
192 | 2.03M | *dest_ptr = *src_ptr; |
193 | 2.03M | dest_ptr += buf_line_stride; |
194 | 2.03M | src_ptr += block_width; |
195 | 2.03M | } |
196 | 30.0M | } else if (y_incr == 1 && buf_col_stride == 2) { |
197 | 19.3M | OPJ_UINT32 k; |
198 | 238M | for (k = 0; k < (x_incr & ~3U); k += 4) { |
199 | 218M | dest_ptr[k * buf_col_stride] = src_ptr[k]; |
200 | 218M | dest_ptr[(k + 1) * buf_col_stride] = src_ptr[k + 1]; |
201 | 218M | dest_ptr[(k + 2) * buf_col_stride] = src_ptr[k + 2]; |
202 | 218M | dest_ptr[(k + 3) * buf_col_stride] = src_ptr[k + 3]; |
203 | 218M | } |
204 | 32.1M | for (; k < x_incr; k++) { |
205 | 12.7M | dest_ptr[k * buf_col_stride] = src_ptr[k]; |
206 | 12.7M | } |
207 | 19.3M | } else if (x_incr >= 8 && buf_col_stride == 8) { |
208 | 0 | for (j = 0; j < y_incr; j++) { |
209 | 0 | OPJ_UINT32 k; |
210 | 0 | for (k = 0; k < (x_incr & ~3U); k += 4) { |
211 | 0 | dest_ptr[k * buf_col_stride] = src_ptr[k]; |
212 | 0 | dest_ptr[(k + 1) * buf_col_stride] = src_ptr[k + 1]; |
213 | 0 | dest_ptr[(k + 2) * buf_col_stride] = src_ptr[k + 2]; |
214 | 0 | dest_ptr[(k + 3) * buf_col_stride] = src_ptr[k + 3]; |
215 | 0 | } |
216 | 0 | for (; k < x_incr; k++) { |
217 | 0 | dest_ptr[k * buf_col_stride] = src_ptr[k]; |
218 | 0 | } |
219 | 0 | dest_ptr += buf_line_stride; |
220 | 0 | src_ptr += block_width; |
221 | 0 | } |
222 | 10.6M | } else { |
223 | | /* General case */ |
224 | 21.3M | for (j = 0; j < y_incr; j++) { |
225 | 10.6M | OPJ_UINT32 k; |
226 | 515M | for (k = 0; k < x_incr; k++) { |
227 | 504M | dest_ptr[k * buf_col_stride] = src_ptr[k]; |
228 | 504M | } |
229 | 10.6M | dest_ptr += buf_line_stride; |
230 | 10.6M | src_ptr += block_width; |
231 | 10.6M | } |
232 | 10.6M | } |
233 | 32.0M | } |
234 | 40.1M | } |
235 | 57.4M | } else { |
236 | 57.4M | if (src_block == NULL) { |
237 | 1.61M | src_block = (OPJ_INT32*) opj_calloc(1, |
238 | 1.61M | (size_t) sa->block_width * sa->block_height * sizeof(OPJ_INT32)); |
239 | 1.61M | if (src_block == NULL) { |
240 | 0 | return OPJ_FALSE; |
241 | 0 | } |
242 | 1.61M | sa->data_blocks[block_y * sa->block_count_hor + block_x] = src_block; |
243 | 1.61M | } |
244 | | |
245 | 57.4M | if (buf_col_stride == 1) { |
246 | 56.2M | OPJ_INT32* OPJ_RESTRICT dest_ptr = src_block + block_y_offset * |
247 | 56.2M | (OPJ_SIZE_T)block_width + block_x_offset; |
248 | 56.2M | const OPJ_INT32* OPJ_RESTRICT src_ptr = buf + (y - y0) * |
249 | 56.2M | (OPJ_SIZE_T)buf_line_stride + (x - x0) * buf_col_stride; |
250 | 56.2M | if (x_incr == 4) { |
251 | | /* Same code as general branch, but the compiler */ |
252 | | /* can have an efficient memcpy() */ |
253 | 12.7M | (void)(x_incr); /* trick to silent cppcheck duplicateBranch warning */ |
254 | 337M | for (j = 0; j < y_incr; j++) { |
255 | 325M | memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr); |
256 | 325M | dest_ptr += block_width; |
257 | 325M | src_ptr += buf_line_stride; |
258 | 325M | } |
259 | 43.5M | } else { |
260 | 445M | for (j = 0; j < y_incr; j++) { |
261 | 402M | memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr); |
262 | 402M | dest_ptr += block_width; |
263 | 402M | src_ptr += buf_line_stride; |
264 | 402M | } |
265 | 43.5M | } |
266 | 56.2M | } else { |
267 | 1.23M | OPJ_INT32* OPJ_RESTRICT dest_ptr = src_block + block_y_offset * |
268 | 1.23M | (OPJ_SIZE_T)block_width + block_x_offset; |
269 | 1.23M | const OPJ_INT32* OPJ_RESTRICT src_ptr = buf + (y - y0) * |
270 | 1.23M | (OPJ_SIZE_T)buf_line_stride + (x - x0) * buf_col_stride; |
271 | 1.23M | if (x_incr == 1) { |
272 | 404k | for (j = 0; j < y_incr; j++) { |
273 | 340k | *dest_ptr = *src_ptr; |
274 | 340k | src_ptr += buf_line_stride; |
275 | 340k | dest_ptr += block_width; |
276 | 340k | } |
277 | 1.16M | } else if (x_incr >= 8 && buf_col_stride == 8) { |
278 | 9.75M | for (j = 0; j < y_incr; j++) { |
279 | 8.65M | OPJ_UINT32 k; |
280 | 134M | for (k = 0; k < (x_incr & ~3U); k += 4) { |
281 | 125M | dest_ptr[k] = src_ptr[k * buf_col_stride]; |
282 | 125M | dest_ptr[k + 1] = src_ptr[(k + 1) * buf_col_stride]; |
283 | 125M | dest_ptr[k + 2] = src_ptr[(k + 2) * buf_col_stride]; |
284 | 125M | dest_ptr[k + 3] = src_ptr[(k + 3) * buf_col_stride]; |
285 | 125M | } |
286 | 10.8M | for (; k < x_incr; k++) { |
287 | 2.24M | dest_ptr[k] = src_ptr[k * buf_col_stride]; |
288 | 2.24M | } |
289 | 8.65M | src_ptr += buf_line_stride; |
290 | 8.65M | dest_ptr += block_width; |
291 | 8.65M | } |
292 | 1.10M | } else { |
293 | | /* General case */ |
294 | 500k | for (j = 0; j < y_incr; j++) { |
295 | 437k | OPJ_UINT32 k; |
296 | 2.14M | for (k = 0; k < x_incr; k++) { |
297 | 1.70M | dest_ptr[k] = src_ptr[k * buf_col_stride]; |
298 | 1.70M | } |
299 | 437k | src_ptr += buf_line_stride; |
300 | 437k | dest_ptr += block_width; |
301 | 437k | } |
302 | 63.4k | } |
303 | 1.23M | } |
304 | 57.4M | } |
305 | 97.6M | } |
306 | 60.5M | } |
307 | | |
308 | 48.4M | return OPJ_TRUE; |
309 | 48.4M | } |
310 | | |
311 | | OPJ_BOOL opj_sparse_array_int32_read(const opj_sparse_array_int32_t* sa, |
312 | | OPJ_UINT32 x0, |
313 | | OPJ_UINT32 y0, |
314 | | OPJ_UINT32 x1, |
315 | | OPJ_UINT32 y1, |
316 | | OPJ_INT32* dest, |
317 | | OPJ_UINT32 dest_col_stride, |
318 | | OPJ_UINT32 dest_line_stride, |
319 | | OPJ_BOOL forgiving) |
320 | 17.2M | { |
321 | 17.2M | return opj_sparse_array_int32_read_or_write( |
322 | 17.2M | (opj_sparse_array_int32_t*)sa, x0, y0, x1, y1, |
323 | 17.2M | dest, |
324 | 17.2M | dest_col_stride, |
325 | 17.2M | dest_line_stride, |
326 | 17.2M | forgiving, |
327 | 17.2M | OPJ_TRUE); |
328 | 17.2M | } |
329 | | |
330 | | OPJ_BOOL opj_sparse_array_int32_write(opj_sparse_array_int32_t* sa, |
331 | | OPJ_UINT32 x0, |
332 | | OPJ_UINT32 y0, |
333 | | OPJ_UINT32 x1, |
334 | | OPJ_UINT32 y1, |
335 | | const OPJ_INT32* src, |
336 | | OPJ_UINT32 src_col_stride, |
337 | | OPJ_UINT32 src_line_stride, |
338 | | OPJ_BOOL forgiving) |
339 | 33.3M | { |
340 | 33.3M | return opj_sparse_array_int32_read_or_write(sa, x0, y0, x1, y1, |
341 | 33.3M | (OPJ_INT32*)src, |
342 | 33.3M | src_col_stride, |
343 | 33.3M | src_line_stride, |
344 | 33.3M | forgiving, |
345 | 33.3M | OPJ_FALSE); |
346 | 33.3M | } |