/src/c-blosc2/blosc/frame.c
Line | Count | Source (jump to first uncovered line) |
1 | | /********************************************************************* |
2 | | Blosc - Blocked Shuffling and Compression Library |
3 | | |
4 | | Copyright (c) 2021 Blosc Development Team <blosc@blosc.org> |
5 | | https://blosc.org |
6 | | License: BSD 3-Clause (see LICENSE.txt) |
7 | | |
8 | | See LICENSE.txt for details about copyright and rights to use. |
9 | | **********************************************************************/ |
10 | | |
11 | | #include "frame.h" |
12 | | #include "sframe.h" |
13 | | #include "context.h" |
14 | | #include "blosc-private.h" |
15 | | #include "blosc2.h" |
16 | | |
17 | | #include <sys/stat.h> |
18 | | #if defined(_WIN32) |
19 | | #include <windows.h> |
20 | | #include <malloc.h> |
21 | | // See https://github.com/Blosc/python-blosc2/issues/359#issuecomment-2625380236 |
22 | | #define stat _stat64 |
23 | | #endif /* _WIN32 */ |
24 | | |
25 | | #include <inttypes.h> |
26 | | #include <stdbool.h> |
27 | | #include <stdio.h> |
28 | | #include <stdint.h> |
29 | | #include <stdlib.h> |
30 | | #include <string.h> |
31 | | |
32 | | /* If C11 is supported, use it's built-in aligned allocation. */ |
33 | | #if __STDC_VERSION__ >= 201112L |
34 | | #include <stdalign.h> |
35 | | #endif |
36 | | |
37 | | |
38 | | /* Create a new (empty) frame */ |
39 | 0 | blosc2_frame_s* frame_new(const char* urlpath) { |
40 | 0 | blosc2_frame_s* new_frame = calloc(1, sizeof(blosc2_frame_s)); |
41 | 0 | if (urlpath != NULL) { |
42 | 0 | char* new_urlpath = malloc(strlen(urlpath) + 1); // + 1 for the trailing NULL |
43 | 0 | new_frame->urlpath = strcpy(new_urlpath, urlpath); |
44 | 0 | new_frame->file_offset = 0; |
45 | 0 | } |
46 | 0 | return new_frame; |
47 | 0 | } |
48 | | |
49 | | |
50 | | /* Free memory from a frame. */ |
51 | 0 | int frame_free(blosc2_frame_s* frame) { |
52 | |
|
53 | 0 | if (frame->cframe != NULL && !frame->avoid_cframe_free) { |
54 | 0 | free(frame->cframe); |
55 | 0 | } |
56 | |
|
57 | 0 | if (frame->coffsets != NULL && frame->coffsets_needs_free) { |
58 | 0 | free(frame->coffsets); |
59 | 0 | } |
60 | |
|
61 | 0 | if (frame->urlpath != NULL) { |
62 | 0 | free(frame->urlpath); |
63 | 0 | } |
64 | |
|
65 | 0 | free(frame); |
66 | |
|
67 | 0 | return 0; |
68 | 0 | } |
69 | | |
70 | | |
71 | 0 | void *new_header_frame(blosc2_schunk *schunk, blosc2_frame_s *frame) { |
72 | 0 | if (frame == NULL) { |
73 | 0 | return NULL; |
74 | 0 | } |
75 | 0 | uint8_t* h2 = calloc(FRAME_HEADER_MINLEN, 1); |
76 | 0 | uint8_t* h2p = h2; |
77 | | |
78 | | // The msgpack header starts here |
79 | 0 | *h2p = 0x90; // fixarray... |
80 | 0 | *h2p += 14; // ...with 13 elements |
81 | 0 | h2p += 1; |
82 | | |
83 | | // Magic number |
84 | 0 | *h2p = 0xa0 + 8; // str with 8 elements |
85 | 0 | h2p += 1; |
86 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
87 | 0 | return NULL; |
88 | 0 | } |
89 | 0 | strcpy((char*)h2p, "b2frame"); |
90 | 0 | h2p += 8; |
91 | | |
92 | | // Header size |
93 | 0 | *h2p = 0xd2; // int32 |
94 | 0 | h2p += 1 + 4; |
95 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
96 | 0 | return NULL; |
97 | 0 | } |
98 | | |
99 | | // Total frame size |
100 | 0 | *h2p = 0xcf; // uint64 |
101 | | // Fill it with frame->len which is known *after* the creation of the frame (e.g. when updating the header) |
102 | 0 | int64_t flen = frame->len; |
103 | 0 | to_big(h2 + FRAME_LEN, &flen, sizeof(flen)); |
104 | 0 | h2p += 1 + 8; |
105 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
106 | 0 | return NULL; |
107 | 0 | } |
108 | | |
109 | | // Flags |
110 | 0 | *h2p = 0xa0 + 4; // str with 4 elements |
111 | 0 | h2p += 1; |
112 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
113 | 0 | return NULL; |
114 | 0 | } |
115 | | // General flags |
116 | 0 | *h2p = BLOSC2_VERSION_FRAME_FORMAT; // version |
117 | 0 | *h2p += 0x10; // 64-bit offsets. We only support this for now. |
118 | 0 | h2p += 1; |
119 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
120 | 0 | return NULL; |
121 | 0 | } |
122 | | |
123 | | // Frame type |
124 | | // We only support contiguous and sparse directories frames currently |
125 | 0 | *h2p = frame->sframe ? 1 : 0; |
126 | 0 | h2p += 1; |
127 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
128 | 0 | return NULL; |
129 | 0 | } |
130 | | |
131 | | // Codec flags |
132 | 0 | *h2p = schunk->compcode; |
133 | 0 | if (schunk->compcode >= BLOSC_LAST_CODEC) { |
134 | 0 | *h2p = BLOSC_UDCODEC_FORMAT; |
135 | 0 | } |
136 | 0 | *h2p += (schunk->clevel) << 4u; // clevel |
137 | 0 | h2p += 1; |
138 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
139 | 0 | return NULL; |
140 | 0 | } |
141 | | |
142 | | // Other flags |
143 | 0 | *h2p = schunk->splitmode - 1; |
144 | 0 | h2p += 1; |
145 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
146 | 0 | return NULL; |
147 | 0 | } |
148 | | |
149 | | // Uncompressed size |
150 | 0 | *h2p = 0xd3; // int64 |
151 | 0 | h2p += 1; |
152 | 0 | int64_t nbytes = schunk->nbytes; |
153 | 0 | to_big(h2p, &nbytes, sizeof(nbytes)); |
154 | 0 | h2p += 8; |
155 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
156 | 0 | return NULL; |
157 | 0 | } |
158 | | |
159 | | // Compressed size |
160 | 0 | *h2p = 0xd3; // int64 |
161 | 0 | h2p += 1; |
162 | 0 | int64_t cbytes = schunk->cbytes; |
163 | 0 | to_big(h2p, &cbytes, sizeof(cbytes)); |
164 | 0 | h2p += 8; |
165 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
166 | 0 | return NULL; |
167 | 0 | } |
168 | | |
169 | | // Type size |
170 | 0 | *h2p = 0xd2; // int32 |
171 | 0 | h2p += 1; |
172 | 0 | int32_t typesize = schunk->typesize; |
173 | 0 | to_big(h2p, &typesize, sizeof(typesize)); |
174 | 0 | h2p += 4; |
175 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
176 | 0 | return NULL; |
177 | 0 | } |
178 | | |
179 | | // Block size |
180 | 0 | *h2p = 0xd2; // int32 |
181 | 0 | h2p += 1; |
182 | 0 | int32_t blocksize = schunk->blocksize; |
183 | 0 | to_big(h2p, &blocksize, sizeof(blocksize)); |
184 | 0 | h2p += 4; |
185 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
186 | 0 | return NULL; |
187 | 0 | } |
188 | | |
189 | | // Chunk size |
190 | 0 | *h2p = 0xd2; // int32 |
191 | 0 | h2p += 1; |
192 | 0 | int32_t chunksize = schunk->chunksize; |
193 | 0 | to_big(h2p, &chunksize, sizeof(chunksize)); |
194 | 0 | h2p += 4; |
195 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
196 | 0 | return NULL; |
197 | 0 | } |
198 | | |
199 | | // Number of threads for compression |
200 | 0 | *h2p = 0xd1; // int16 |
201 | 0 | h2p += 1; |
202 | 0 | int16_t nthreads = (int16_t)schunk->cctx->nthreads; |
203 | 0 | to_big(h2p, &nthreads, sizeof(nthreads)); |
204 | 0 | h2p += 2; |
205 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
206 | 0 | return NULL; |
207 | 0 | } |
208 | | |
209 | | // Number of threads for decompression |
210 | 0 | *h2p = 0xd1; // int16 |
211 | 0 | h2p += 1; |
212 | 0 | nthreads = (int16_t)schunk->dctx->nthreads; |
213 | 0 | to_big(h2p, &nthreads, sizeof(nthreads)); |
214 | 0 | h2p += 2; |
215 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
216 | 0 | return NULL; |
217 | 0 | } |
218 | | |
219 | | // The boolean for variable-length metalayers |
220 | 0 | *h2p = (schunk->nvlmetalayers > 0) ? (uint8_t)0xc3 : (uint8_t)0xc2; |
221 | 0 | h2p += 1; |
222 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
223 | 0 | return NULL; |
224 | 0 | } |
225 | | |
226 | | // The space for FRAME_FILTER_PIPELINE |
227 | 0 | *h2p = 0xd8; // fixext 16 |
228 | 0 | h2p += 1; |
229 | 0 | if (BLOSC2_MAX_FILTERS > FRAME_FILTER_PIPELINE_MAX) { |
230 | 0 | return NULL; |
231 | 0 | } |
232 | | // Store the filter pipeline in header |
233 | 0 | uint8_t* mp_filters = h2 + FRAME_FILTER_PIPELINE + 1; |
234 | 0 | uint8_t* mp_meta = h2 + FRAME_FILTER_PIPELINE + 1 + FRAME_FILTER_PIPELINE_MAX; |
235 | 0 | for (int i = 0; i < BLOSC2_MAX_FILTERS; i++) { |
236 | 0 | mp_filters[i] = schunk->filters[i]; |
237 | 0 | mp_meta[i] = schunk->filters_meta[i]; |
238 | 0 | } |
239 | 0 | *h2p = (uint8_t) BLOSC2_MAX_FILTERS; |
240 | 0 | h2p += 1; |
241 | 0 | h2p += 16; |
242 | | |
243 | | // User-defined codec and codec metadata |
244 | 0 | uint8_t* udcodec = h2 + FRAME_UDCODEC; |
245 | 0 | *udcodec = schunk->compcode; |
246 | 0 | uint8_t* codec_meta = h2 + FRAME_CODEC_META; |
247 | 0 | *codec_meta = schunk->compcode_meta; |
248 | |
|
249 | 0 | if (h2p - h2 != FRAME_HEADER_MINLEN) { |
250 | 0 | return NULL; |
251 | 0 | } |
252 | | |
253 | 0 | int32_t hsize = FRAME_HEADER_MINLEN; |
254 | | |
255 | | // Now, deal with metalayers |
256 | 0 | uint16_t nmetalayers = schunk->nmetalayers; |
257 | 0 | if (nmetalayers > BLOSC2_MAX_METALAYERS) { |
258 | 0 | return NULL; |
259 | 0 | } |
260 | | |
261 | | // Make space for the header of metalayers (array marker, size, map of offsets) |
262 | 0 | h2 = realloc(h2, (size_t)hsize + 1 + 1 + 2 + 1 + 2); |
263 | 0 | h2p = h2 + hsize; |
264 | | |
265 | | // The msgpack header for the metalayers (array_marker, size, map of offsets, list of metalayers) |
266 | 0 | *h2p = 0x90 + 3; // array with 3 elements |
267 | 0 | h2p += 1; |
268 | | |
269 | | // Size for the map (index) of offsets, including this uint16 size (to be filled out later on) |
270 | 0 | *h2p = 0xcd; // uint16 |
271 | 0 | h2p += 1 + 2; |
272 | | |
273 | | // Map (index) of offsets for optional metalayers |
274 | 0 | *h2p = 0xde; // map 16 with N keys |
275 | 0 | h2p += 1; |
276 | 0 | to_big(h2p, &nmetalayers, sizeof(nmetalayers)); |
277 | 0 | h2p += sizeof(nmetalayers); |
278 | 0 | int32_t current_header_len = (int32_t)(h2p - h2); |
279 | 0 | int32_t *offtooff = malloc(nmetalayers * sizeof(int32_t)); |
280 | 0 | for (int nmetalayer = 0; nmetalayer < nmetalayers; nmetalayer++) { |
281 | 0 | if (frame == NULL) { |
282 | 0 | return NULL; |
283 | 0 | } |
284 | 0 | blosc2_metalayer *metalayer = schunk->metalayers[nmetalayer]; |
285 | 0 | uint8_t namelen = (uint8_t) strlen(metalayer->name); |
286 | 0 | h2 = realloc(h2, (size_t)current_header_len + 1 + namelen + 1 + 4); |
287 | 0 | h2p = h2 + current_header_len; |
288 | | // Store the metalayer |
289 | 0 | if (namelen >= (1U << 5U)) { // metalayer strings cannot be longer than 32 bytes |
290 | 0 | free(offtooff); |
291 | 0 | return NULL; |
292 | 0 | } |
293 | 0 | *h2p = (uint8_t)0xa0 + namelen; // str |
294 | 0 | h2p += 1; |
295 | 0 | memcpy(h2p, metalayer->name, namelen); |
296 | 0 | h2p += namelen; |
297 | | // Space for storing the offset for the value of this metalayer |
298 | 0 | *h2p = 0xd2; // int32 |
299 | 0 | h2p += 1; |
300 | 0 | offtooff[nmetalayer] = (int32_t)(h2p - h2); |
301 | 0 | h2p += 4; |
302 | 0 | current_header_len += 1 + namelen + 1 + 4; |
303 | 0 | } |
304 | 0 | int32_t hsize2 = (int32_t)(h2p - h2); |
305 | 0 | if (hsize2 != current_header_len) { // sanity check |
306 | 0 | return NULL; |
307 | 0 | } |
308 | | |
309 | | // Map size + int16 size |
310 | 0 | if ((uint32_t) (hsize2 - hsize) >= (1U << 16U)) { |
311 | 0 | return NULL; |
312 | 0 | } |
313 | 0 | uint16_t map_size = (uint16_t) (hsize2 - hsize); |
314 | 0 | to_big(h2 + FRAME_IDX_SIZE, &map_size, sizeof(map_size)); |
315 | | |
316 | | // Make space for an (empty) array |
317 | 0 | hsize = (int32_t)(h2p - h2); |
318 | 0 | h2 = realloc(h2, (size_t)hsize + 2 + 1 + 2); |
319 | 0 | h2p = h2 + hsize; |
320 | | |
321 | | // Now, store the values in an array |
322 | 0 | *h2p = 0xdc; // array 16 with N elements |
323 | 0 | h2p += 1; |
324 | 0 | to_big(h2p, &nmetalayers, sizeof(nmetalayers)); |
325 | 0 | h2p += sizeof(nmetalayers); |
326 | 0 | current_header_len = (int32_t)(h2p - h2); |
327 | 0 | for (int nmetalayer = 0; nmetalayer < nmetalayers; nmetalayer++) { |
328 | 0 | if (frame == NULL) { |
329 | 0 | return NULL; |
330 | 0 | } |
331 | 0 | blosc2_metalayer *metalayer = schunk->metalayers[nmetalayer]; |
332 | 0 | h2 = realloc(h2, (size_t)current_header_len + 1 + 4 + metalayer->content_len); |
333 | 0 | h2p = h2 + current_header_len; |
334 | | // Store the serialized contents for this metalayer |
335 | 0 | *h2p = 0xc6; // bin32 |
336 | 0 | h2p += 1; |
337 | 0 | to_big(h2p, &(metalayer->content_len), sizeof(metalayer->content_len)); |
338 | 0 | h2p += 4; |
339 | 0 | memcpy(h2p, metalayer->content, metalayer->content_len); // buffer, no need to swap |
340 | 0 | h2p += metalayer->content_len; |
341 | | // Update the offset now that we know it |
342 | 0 | to_big(h2 + offtooff[nmetalayer], ¤t_header_len, sizeof(current_header_len)); |
343 | 0 | current_header_len += 1 + 4 + metalayer->content_len; |
344 | 0 | } |
345 | 0 | free(offtooff); |
346 | 0 | hsize = (int32_t)(h2p - h2); |
347 | 0 | if (hsize != current_header_len) { // sanity check |
348 | 0 | return NULL; |
349 | 0 | } |
350 | | |
351 | | // Set the length of the whole header now that we know it |
352 | 0 | to_big(h2 + FRAME_HEADER_LEN, &hsize, sizeof(hsize)); |
353 | |
|
354 | 0 | return h2; |
355 | 0 | } |
356 | | |
357 | | |
358 | | int get_header_info(blosc2_frame_s *frame, int32_t *header_len, int64_t *frame_len, int64_t *nbytes, int64_t *cbytes, |
359 | | int32_t *blocksize, int32_t *chunksize, int64_t *nchunks, int32_t *typesize, uint8_t *compcode, |
360 | | uint8_t *compcode_meta, uint8_t *clevel, uint8_t *filters, uint8_t *filters_meta, |
361 | 0 | uint8_t *splitmode, const blosc2_io *io) { |
362 | 0 | uint8_t* framep = frame->cframe; |
363 | 0 | uint8_t* header_ptr; |
364 | 0 | uint8_t header[FRAME_HEADER_MINLEN]; |
365 | |
|
366 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(io->id); |
367 | 0 | if (io_cb == NULL) { |
368 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
369 | 0 | return BLOSC2_ERROR_PLUGIN_IO; |
370 | 0 | } |
371 | | |
372 | 0 | if (frame->len <= 0) { |
373 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
374 | 0 | } |
375 | | |
376 | 0 | if (frame->cframe == NULL) { |
377 | 0 | int64_t rbytes = 0; |
378 | 0 | void* fp = NULL; |
379 | 0 | int64_t io_pos = 0; |
380 | 0 | if (frame->sframe) { |
381 | 0 | fp = sframe_open_index(frame->urlpath, "rb", io); |
382 | 0 | if (fp == NULL) { |
383 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
384 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
385 | 0 | } |
386 | 0 | } |
387 | 0 | else { |
388 | 0 | fp = io_cb->open(frame->urlpath, "rb", io->params); |
389 | 0 | if (fp == NULL) { |
390 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
391 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
392 | 0 | } |
393 | 0 | io_pos = frame->file_offset; |
394 | 0 | } |
395 | 0 | if (io_cb->is_allocation_necessary) |
396 | 0 | header_ptr = header; |
397 | 0 | rbytes = io_cb->read((void**)&header_ptr, 1, FRAME_HEADER_MINLEN, io_pos, fp); |
398 | 0 | io_cb->close(fp); |
399 | 0 | if (rbytes != FRAME_HEADER_MINLEN) { |
400 | 0 | return BLOSC2_ERROR_FILE_READ; |
401 | 0 | } |
402 | 0 | framep = header_ptr; |
403 | 0 | } |
404 | | |
405 | | // Consistency check for frame type |
406 | 0 | uint8_t frame_type = framep[FRAME_TYPE]; |
407 | 0 | if (frame->sframe) { |
408 | 0 | if (frame_type != FRAME_DIRECTORY_TYPE) { |
409 | 0 | return BLOSC2_ERROR_FRAME_TYPE; |
410 | 0 | } |
411 | 0 | } else { |
412 | 0 | if (frame_type != FRAME_CONTIGUOUS_TYPE) { |
413 | 0 | return BLOSC2_ERROR_FRAME_TYPE; |
414 | 0 | } |
415 | 0 | } |
416 | | |
417 | | // Fetch some internal lengths |
418 | 0 | from_big(header_len, framep + FRAME_HEADER_LEN, sizeof(*header_len)); |
419 | 0 | if (*header_len < FRAME_HEADER_MINLEN) { |
420 | 0 | BLOSC_TRACE_ERROR("Header length is zero or smaller than min allowed."); |
421 | 0 | return BLOSC2_ERROR_INVALID_HEADER; |
422 | 0 | } |
423 | 0 | from_big(frame_len, framep + FRAME_LEN, sizeof(*frame_len)); |
424 | 0 | if (*header_len > *frame_len) { |
425 | 0 | BLOSC_TRACE_ERROR("Header length exceeds length of the frame."); |
426 | 0 | return BLOSC2_ERROR_INVALID_HEADER; |
427 | 0 | } |
428 | 0 | from_big(nbytes, framep + FRAME_NBYTES, sizeof(*nbytes)); |
429 | 0 | from_big(cbytes, framep + FRAME_CBYTES, sizeof(*cbytes)); |
430 | 0 | from_big(blocksize, framep + FRAME_BLOCKSIZE, sizeof(*blocksize)); |
431 | 0 | if (chunksize != NULL) { |
432 | 0 | from_big(chunksize, framep + FRAME_CHUNKSIZE, sizeof(*chunksize)); |
433 | 0 | } |
434 | 0 | if (typesize != NULL) { |
435 | 0 | from_big(typesize, framep + FRAME_TYPESIZE, sizeof(*typesize)); |
436 | 0 | if (*typesize <= 0) { |
437 | 0 | BLOSC_TRACE_ERROR("`typesize` cannot be zero or negative."); |
438 | 0 | return BLOSC2_ERROR_INVALID_HEADER; |
439 | 0 | } |
440 | 0 | } |
441 | | |
442 | | // Codecs |
443 | 0 | uint8_t frame_codecs = framep[FRAME_CODECS]; |
444 | 0 | if (clevel != NULL) { |
445 | 0 | *clevel = frame_codecs >> 4u; |
446 | 0 | } |
447 | 0 | if (compcode != NULL) { |
448 | 0 | *compcode = frame_codecs & 0xFu; |
449 | 0 | if (*compcode == BLOSC_UDCODEC_FORMAT) { |
450 | 0 | from_big(compcode, framep + FRAME_UDCODEC, sizeof(*compcode)); |
451 | 0 | } |
452 | 0 | } |
453 | | |
454 | | // Other flags |
455 | 0 | uint8_t other_flags = framep[FRAME_OTHER_FLAGS]; |
456 | 0 | if (splitmode != NULL) { |
457 | 0 | *splitmode = other_flags & 0x4u; |
458 | 0 | from_big(splitmode, framep + FRAME_OTHER_FLAGS, sizeof(*splitmode)); |
459 | 0 | *splitmode += 1; |
460 | 0 | } |
461 | |
|
462 | 0 | if (compcode_meta != NULL) { |
463 | 0 | from_big(compcode_meta, framep + FRAME_CODEC_META, sizeof(*compcode_meta)); |
464 | 0 | } |
465 | | |
466 | | // Filters |
467 | 0 | if (filters != NULL && filters_meta != NULL) { |
468 | 0 | uint8_t nfilters = framep[FRAME_FILTER_PIPELINE]; |
469 | 0 | if (nfilters > BLOSC2_MAX_FILTERS) { |
470 | 0 | BLOSC_TRACE_ERROR("The number of filters in frame header are too large for Blosc2."); |
471 | 0 | return BLOSC2_ERROR_INVALID_HEADER; |
472 | 0 | } |
473 | 0 | uint8_t *filters_ = framep + FRAME_FILTER_PIPELINE + 1; |
474 | 0 | uint8_t *filters_meta_ = framep + FRAME_FILTER_PIPELINE + 1 + FRAME_FILTER_PIPELINE_MAX; |
475 | 0 | for (int i = 0; i < nfilters; i++) { |
476 | 0 | filters[i] = filters_[i]; |
477 | 0 | filters_meta[i] = filters_meta_[i]; |
478 | 0 | } |
479 | 0 | } |
480 | | |
481 | 0 | if (*nbytes > 0 && *chunksize > 0) { |
482 | | // We can compute the number of chunks only when the frame has actual data |
483 | 0 | *nchunks = *nbytes / *chunksize; |
484 | 0 | if (*nbytes % *chunksize > 0) { |
485 | 0 | if (*nchunks == INT32_MAX) { |
486 | 0 | BLOSC_TRACE_ERROR("Number of chunks exceeds maximum allowed."); |
487 | 0 | return BLOSC2_ERROR_INVALID_HEADER; |
488 | 0 | } |
489 | 0 | *nchunks += 1; |
490 | 0 | } |
491 | | |
492 | | // Sanity check for compressed sizes |
493 | 0 | if ((*cbytes < 0) || ((int64_t)*nchunks * *chunksize < *nbytes)) { |
494 | 0 | BLOSC_TRACE_ERROR("Invalid compressed size in frame header."); |
495 | 0 | return BLOSC2_ERROR_INVALID_HEADER; |
496 | 0 | } |
497 | 0 | } else { |
498 | 0 | *nchunks = 0; |
499 | 0 | } |
500 | | |
501 | 0 | return 0; |
502 | 0 | } |
503 | | |
504 | | |
505 | 0 | int64_t get_trailer_offset(blosc2_frame_s *frame, int32_t header_len, bool has_coffsets) { |
506 | 0 | if (!has_coffsets) { |
507 | | // No data chunks yet |
508 | 0 | return header_len; |
509 | 0 | } |
510 | 0 | return frame->len - frame->trailer_len; |
511 | 0 | } |
512 | | |
513 | | |
514 | | // Update the length in the header |
515 | 0 | int update_frame_len(blosc2_frame_s* frame, int64_t len) { |
516 | 0 | int rc = 1; |
517 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
518 | 0 | if (io_cb == NULL) { |
519 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
520 | 0 | return BLOSC2_ERROR_PLUGIN_IO; |
521 | 0 | } |
522 | | |
523 | 0 | if (frame->cframe != NULL) { |
524 | 0 | to_big(frame->cframe + FRAME_LEN, &len, sizeof(int64_t)); |
525 | 0 | } |
526 | 0 | else { |
527 | 0 | void* fp = NULL; |
528 | 0 | if (frame->sframe) { |
529 | 0 | fp = sframe_open_index(frame->urlpath, "rb+", |
530 | 0 | frame->schunk->storage->io); |
531 | 0 | } |
532 | 0 | else { |
533 | 0 | fp = io_cb->open(frame->urlpath, "rb+", frame->schunk->storage->io->params); |
534 | 0 | } |
535 | 0 | if (fp == NULL) { |
536 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
537 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
538 | 0 | } |
539 | 0 | int64_t io_pos = frame->file_offset + FRAME_LEN; |
540 | 0 | int64_t swap_len; |
541 | 0 | to_big(&swap_len, &len, sizeof(int64_t)); |
542 | 0 | int64_t wbytes = io_cb->write(&swap_len, 1, sizeof(int64_t), io_pos, fp); |
543 | 0 | io_cb->close(fp); |
544 | 0 | if (wbytes != sizeof(int64_t)) { |
545 | 0 | BLOSC_TRACE_ERROR("Cannot write the frame length in header."); |
546 | 0 | return BLOSC2_ERROR_FILE_WRITE; |
547 | 0 | } |
548 | 0 | } |
549 | 0 | return rc; |
550 | 0 | } |
551 | | |
552 | | |
553 | 0 | int frame_update_trailer(blosc2_frame_s* frame, blosc2_schunk* schunk) { |
554 | 0 | if (frame != NULL && frame->len == 0) { |
555 | 0 | BLOSC_TRACE_ERROR("The trailer cannot be updated on empty frames."); |
556 | 0 | } |
557 | | |
558 | | // Create the trailer in msgpack (see the frame format document) |
559 | 0 | uint32_t trailer_len = FRAME_TRAILER_MINLEN; |
560 | 0 | uint8_t* trailer = (uint8_t*)calloc((size_t)trailer_len, 1); |
561 | 0 | uint8_t* ptrailer = trailer; |
562 | 0 | *ptrailer = 0x90 + 4; // fixarray with 4 elements |
563 | 0 | ptrailer += 1; |
564 | | // Trailer format version |
565 | 0 | *ptrailer = FRAME_TRAILER_VERSION; |
566 | 0 | ptrailer += 1; |
567 | |
|
568 | 0 | int32_t current_trailer_len = (int32_t)(ptrailer - trailer); |
569 | | |
570 | | // Now, deal with variable-length metalayers |
571 | 0 | int16_t nvlmetalayers = schunk->nvlmetalayers; |
572 | 0 | if (nvlmetalayers < 0 || nvlmetalayers > BLOSC2_MAX_METALAYERS) { |
573 | 0 | return -1; |
574 | 0 | } |
575 | | |
576 | | // Make space for the header of metalayers (array marker, size, map of offsets) |
577 | 0 | trailer = realloc(trailer, (size_t) current_trailer_len + 1 + 1 + 2 + 1 + 2); |
578 | 0 | ptrailer = trailer + current_trailer_len; |
579 | | |
580 | | // The msgpack header for the metalayers (array_marker, size, map of offsets, list of metalayers) |
581 | 0 | *ptrailer = 0x90 + 3; // array with 3 elements |
582 | 0 | ptrailer += 1; |
583 | |
|
584 | 0 | int32_t tsize = (int32_t)(ptrailer - trailer); |
585 | | |
586 | | // Size for the map (index) of metalayer offsets, including this uint16 size (to be filled out later on) |
587 | 0 | *ptrailer = 0xcd; // uint16 |
588 | 0 | ptrailer += 1 + 2; |
589 | | |
590 | | // Map (index) of offsets for optional metalayers |
591 | 0 | *ptrailer = 0xde; // map 16 with N keys |
592 | 0 | ptrailer += 1; |
593 | 0 | to_big(ptrailer, &nvlmetalayers, sizeof(nvlmetalayers)); |
594 | 0 | ptrailer += sizeof(nvlmetalayers); |
595 | 0 | current_trailer_len = (int32_t)(ptrailer - trailer); |
596 | 0 | int32_t *offtodata = malloc(nvlmetalayers * sizeof(int32_t)); |
597 | 0 | for (int nvlmetalayer = 0; nvlmetalayer < nvlmetalayers; nvlmetalayer++) { |
598 | 0 | if (frame == NULL) { |
599 | 0 | return -1; |
600 | 0 | } |
601 | 0 | blosc2_metalayer *vlmetalayer = schunk->vlmetalayers[nvlmetalayer]; |
602 | 0 | uint8_t name_len = (uint8_t) strlen(vlmetalayer->name); |
603 | 0 | trailer = realloc(trailer, (size_t)current_trailer_len + 1 + name_len + 1 + 4); |
604 | 0 | ptrailer = trailer + current_trailer_len; |
605 | | // Store the vlmetalayer |
606 | 0 | if (name_len >= (1U << 5U)) { // metalayer strings cannot be longer than 32 bytes |
607 | 0 | free(offtodata); |
608 | 0 | return -1; |
609 | 0 | } |
610 | 0 | *ptrailer = (uint8_t)0xa0 + name_len; // str |
611 | 0 | ptrailer += 1; |
612 | 0 | memcpy(ptrailer, vlmetalayer->name, name_len); |
613 | 0 | ptrailer += name_len; |
614 | | // Space for storing the offset for the value of this vlmetalayer |
615 | 0 | *ptrailer = 0xd2; // int32 |
616 | 0 | ptrailer += 1; |
617 | 0 | offtodata[nvlmetalayer] = (int32_t)(ptrailer - trailer); |
618 | 0 | ptrailer += 4; |
619 | 0 | current_trailer_len += 1 + name_len + 1 + 4; |
620 | 0 | } |
621 | 0 | int32_t tsize2 = (int32_t)(ptrailer - trailer); |
622 | 0 | if (tsize2 != current_trailer_len) { // sanity check |
623 | 0 | return -1; |
624 | 0 | } |
625 | | |
626 | | // Map size + int16 size |
627 | 0 | if ((uint32_t) (tsize2 - tsize) >= (1U << 16U)) { |
628 | 0 | return -1; |
629 | 0 | } |
630 | 0 | uint16_t map_size = (uint16_t) (tsize2 - tsize); |
631 | 0 | to_big(trailer + 4, &map_size, sizeof(map_size)); |
632 | | |
633 | | // Make space for an (empty) array |
634 | 0 | tsize = (int32_t)(ptrailer - trailer); |
635 | 0 | trailer = realloc(trailer, (size_t) tsize + 2 + 1 + 2); |
636 | 0 | ptrailer = trailer + tsize; |
637 | | |
638 | | // Now, store the values in an array |
639 | 0 | *ptrailer = 0xdc; // array 16 with N elements |
640 | 0 | ptrailer += 1; |
641 | 0 | to_big(ptrailer, &nvlmetalayers, sizeof(nvlmetalayers)); |
642 | 0 | ptrailer += sizeof(nvlmetalayers); |
643 | 0 | current_trailer_len = (int32_t)(ptrailer - trailer); |
644 | 0 | for (int nvlmetalayer = 0; nvlmetalayer < nvlmetalayers; nvlmetalayer++) { |
645 | 0 | if (frame == NULL) { |
646 | 0 | return -1; |
647 | 0 | } |
648 | 0 | blosc2_metalayer *vlmetalayer = schunk->vlmetalayers[nvlmetalayer]; |
649 | 0 | trailer = realloc(trailer, (size_t)current_trailer_len + 1 + 4 + vlmetalayer->content_len); |
650 | 0 | ptrailer = trailer + current_trailer_len; |
651 | | // Store the serialized contents for this vlmetalayer |
652 | 0 | *ptrailer = 0xc6; // bin32 |
653 | 0 | ptrailer += 1; |
654 | 0 | to_big(ptrailer, &(vlmetalayer->content_len), sizeof(vlmetalayer->content_len)); |
655 | 0 | ptrailer += 4; |
656 | 0 | memcpy(ptrailer, vlmetalayer->content, vlmetalayer->content_len); // buffer, no need to swap |
657 | 0 | ptrailer += vlmetalayer->content_len; |
658 | | // Update the offset now that we know it |
659 | 0 | to_big(trailer + offtodata[nvlmetalayer], ¤t_trailer_len, sizeof(current_trailer_len)); |
660 | 0 | current_trailer_len += 1 + 4 + vlmetalayer->content_len; |
661 | 0 | } |
662 | 0 | free(offtodata); |
663 | 0 | tsize = (int32_t)(ptrailer - trailer); |
664 | 0 | if (tsize != current_trailer_len) { // sanity check |
665 | 0 | return -1; |
666 | 0 | } |
667 | | |
668 | 0 | trailer = realloc(trailer, (size_t)current_trailer_len + 23); |
669 | 0 | ptrailer = trailer + current_trailer_len; |
670 | 0 | trailer_len = (ptrailer - trailer) + 23; |
671 | | |
672 | | // Trailer length |
673 | 0 | *ptrailer = 0xce; // uint32 |
674 | 0 | ptrailer += 1; |
675 | 0 | to_big(ptrailer, &trailer_len, sizeof(uint32_t)); |
676 | 0 | ptrailer += sizeof(uint32_t); |
677 | | // Up to 16 bytes for frame fingerprint (using XXH3 included in https://github.com/Cyan4973/xxHash) |
678 | | // Maybe someone would need 256-bit in the future, but for the time being 128-bit seems like a good tradeoff |
679 | 0 | *ptrailer = 0xd8; // fixext 16 |
680 | 0 | ptrailer += 1; |
681 | 0 | *ptrailer = 0; // fingerprint type: 0 -> no fp; 1 -> 32-bit; 2 -> 64-bit; 3 -> 128-bit |
682 | 0 | ptrailer += 1; |
683 | | |
684 | | // Remove call to memset when we compute an actual fingerprint |
685 | 0 | memset(ptrailer, 0, 16); |
686 | | // Uncomment call to memcpy when we compute an actual fingerprint |
687 | | // memcpy(ptrailer, xxh3_fingerprint, sizeof(xxh3_fingerprint)); |
688 | 0 | ptrailer += 16; |
689 | | |
690 | | // Sanity check |
691 | 0 | if (ptrailer - trailer != trailer_len) { |
692 | 0 | return BLOSC2_ERROR_DATA; |
693 | 0 | } |
694 | | |
695 | 0 | int32_t header_len; |
696 | 0 | int64_t frame_len; |
697 | 0 | int64_t nbytes; |
698 | 0 | int64_t cbytes; |
699 | 0 | int32_t blocksize; |
700 | 0 | int32_t chunksize; |
701 | 0 | int64_t nchunks; |
702 | 0 | int ret = get_header_info(frame, &header_len, &frame_len, &nbytes, &cbytes, |
703 | 0 | &blocksize, &chunksize, &nchunks, |
704 | 0 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
705 | 0 | frame->schunk->storage->io); |
706 | 0 | if (ret < 0) { |
707 | 0 | BLOSC_TRACE_ERROR("Unable to get meta info from frame."); |
708 | 0 | return ret; |
709 | 0 | } |
710 | | |
711 | 0 | int64_t trailer_offset = get_trailer_offset(frame, header_len, nbytes > 0); |
712 | |
|
713 | 0 | if (trailer_offset < BLOSC_EXTENDED_HEADER_LENGTH) { |
714 | 0 | BLOSC_TRACE_ERROR("Unable to get trailer offset in frame."); |
715 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
716 | 0 | } |
717 | | |
718 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
719 | 0 | if (io_cb == NULL) { |
720 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
721 | 0 | return BLOSC2_ERROR_PLUGIN_IO; |
722 | 0 | } |
723 | | // Update the trailer. As there are no internal offsets to the trailer section, |
724 | | // and it is always at the end of the frame, we can just write (or overwrite) it |
725 | | // at the end of the frame. |
726 | 0 | if (frame->cframe != NULL) { |
727 | 0 | frame->cframe = realloc(frame->cframe, (size_t)(trailer_offset + trailer_len)); |
728 | 0 | if (frame->cframe == NULL) { |
729 | 0 | BLOSC_TRACE_ERROR("Cannot realloc space for the frame."); |
730 | 0 | return BLOSC2_ERROR_MEMORY_ALLOC; |
731 | 0 | } |
732 | 0 | memcpy(frame->cframe + trailer_offset, trailer, trailer_len); |
733 | 0 | } |
734 | 0 | else { |
735 | 0 | void* fp = NULL; |
736 | 0 | if (frame->sframe) { |
737 | 0 | fp = sframe_open_index(frame->urlpath, "rb+", |
738 | 0 | frame->schunk->storage->io); |
739 | 0 | } |
740 | 0 | else { |
741 | 0 | fp = io_cb->open(frame->urlpath, "rb+", frame->schunk->storage->io->params); |
742 | 0 | } |
743 | 0 | if (fp == NULL) { |
744 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
745 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
746 | 0 | } |
747 | 0 | int64_t io_pos = frame->file_offset + trailer_offset; |
748 | 0 | int64_t wbytes = io_cb->write(trailer, 1, trailer_len, io_pos, fp); |
749 | 0 | if (wbytes != trailer_len) { |
750 | 0 | BLOSC_TRACE_ERROR("Cannot write the trailer length in trailer."); |
751 | 0 | return BLOSC2_ERROR_FILE_WRITE; |
752 | 0 | } |
753 | 0 | if (io_cb->truncate(fp, trailer_offset + trailer_len) != 0) { |
754 | 0 | BLOSC_TRACE_ERROR("Cannot truncate the frame."); |
755 | 0 | return BLOSC2_ERROR_FILE_TRUNCATE; |
756 | 0 | } |
757 | 0 | io_cb->close(fp); |
758 | |
|
759 | 0 | } |
760 | 0 | free(trailer); |
761 | |
|
762 | 0 | int rc = update_frame_len(frame, trailer_offset + trailer_len); |
763 | 0 | if (rc < 0) { |
764 | 0 | return rc; |
765 | 0 | } |
766 | 0 | frame->len = trailer_offset + trailer_len; |
767 | 0 | frame->trailer_len = trailer_len; |
768 | |
|
769 | 0 | return 1; |
770 | 0 | } |
771 | | |
772 | | |
773 | | // Remove a file:/// prefix |
774 | | // This is a temporary workaround for allowing to use proper URLs for local files/dirs |
775 | 0 | static char* normalize_urlpath(const char* urlpath) { |
776 | 0 | char* localpath = strstr(urlpath, "file:///"); |
777 | 0 | if (localpath == urlpath) { |
778 | | // There is a file:/// prefix. Get rid of it. |
779 | 0 | localpath += strlen("file:///"); |
780 | 0 | } |
781 | 0 | else { |
782 | 0 | localpath = (char*)urlpath; |
783 | 0 | } |
784 | 0 | return localpath; |
785 | 0 | } |
786 | | |
787 | | |
788 | | /* Initialize a frame out of a file */ |
789 | 0 | blosc2_frame_s* frame_from_file_offset(const char* urlpath, const blosc2_io *io, int64_t offset) { |
790 | | // Get the length of the frame |
791 | 0 | uint8_t* header_ptr; |
792 | 0 | uint8_t header[FRAME_HEADER_MINLEN]; |
793 | 0 | uint8_t* trailer_ptr; |
794 | 0 | uint8_t trailer[FRAME_TRAILER_MINLEN]; |
795 | |
|
796 | 0 | void* fp = NULL; |
797 | 0 | bool sframe = false; |
798 | 0 | struct stat path_stat; |
799 | |
|
800 | 0 | urlpath = normalize_urlpath(urlpath); |
801 | |
|
802 | 0 | if(stat(urlpath, &path_stat) < 0) { |
803 | 0 | BLOSC_TRACE_ERROR("Cannot get information about the path %s.", urlpath); |
804 | 0 | return NULL; |
805 | 0 | } |
806 | | |
807 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(io->id); |
808 | 0 | if (io_cb == NULL) { |
809 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
810 | 0 | return NULL; |
811 | 0 | } |
812 | | |
813 | 0 | char* urlpath_cpy; |
814 | 0 | if (path_stat.st_mode & S_IFDIR) { |
815 | 0 | urlpath_cpy = malloc(strlen(urlpath) + 1); |
816 | 0 | strcpy(urlpath_cpy, urlpath); |
817 | 0 | char last_char = urlpath[strlen(urlpath) - 1]; |
818 | 0 | if (last_char == '\\' || last_char == '/') { |
819 | 0 | urlpath_cpy[strlen(urlpath) - 1] = '\0'; |
820 | 0 | } |
821 | 0 | else { |
822 | 0 | } |
823 | 0 | fp = sframe_open_index(urlpath_cpy, "rb", io); |
824 | 0 | sframe = true; |
825 | 0 | } |
826 | 0 | else { |
827 | 0 | urlpath_cpy = malloc(strlen(urlpath) + 1); |
828 | 0 | strcpy(urlpath_cpy, urlpath); |
829 | 0 | fp = io_cb->open(urlpath, "rb", io->params); |
830 | 0 | } |
831 | 0 | if (fp == NULL) { |
832 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", urlpath); |
833 | 0 | return NULL; |
834 | 0 | } |
835 | | |
836 | 0 | if (io_cb->is_allocation_necessary) |
837 | 0 | header_ptr = header; |
838 | 0 | int64_t io_pos = offset; |
839 | 0 | int64_t rbytes = io_cb->read((void**)&header_ptr, 1, FRAME_HEADER_MINLEN, io_pos, fp); |
840 | 0 | if (rbytes != FRAME_HEADER_MINLEN) { |
841 | 0 | BLOSC_TRACE_ERROR("Cannot read from file '%s'.", urlpath); |
842 | 0 | io_cb->close(fp); |
843 | 0 | free(urlpath_cpy); |
844 | 0 | return NULL; |
845 | 0 | } |
846 | 0 | int64_t frame_len; |
847 | 0 | to_big(&frame_len, header_ptr + FRAME_LEN, sizeof(frame_len)); |
848 | |
|
849 | 0 | blosc2_frame_s* frame = calloc(1, sizeof(blosc2_frame_s)); |
850 | 0 | frame->urlpath = urlpath_cpy; |
851 | 0 | frame->len = frame_len; |
852 | 0 | frame->sframe = sframe; |
853 | 0 | frame->file_offset = offset; |
854 | | |
855 | | // Now, the trailer length |
856 | 0 | if (io_cb->is_allocation_necessary) |
857 | 0 | trailer_ptr = trailer; |
858 | 0 | io_pos = offset + frame_len - FRAME_TRAILER_MINLEN; |
859 | 0 | rbytes = io_cb->read((void**)&trailer_ptr, 1, FRAME_TRAILER_MINLEN, io_pos, fp); |
860 | 0 | io_cb->close(fp); |
861 | 0 | if (rbytes != FRAME_TRAILER_MINLEN) { |
862 | 0 | BLOSC_TRACE_ERROR("Cannot read from file '%s'.", urlpath); |
863 | 0 | free(urlpath_cpy); |
864 | 0 | free(frame); |
865 | 0 | return NULL; |
866 | 0 | } |
867 | 0 | int trailer_offset = FRAME_TRAILER_MINLEN - FRAME_TRAILER_LEN_OFFSET; |
868 | 0 | if (trailer_ptr[trailer_offset - 1] != 0xce) { |
869 | 0 | free(urlpath_cpy); |
870 | 0 | free(frame); |
871 | 0 | return NULL; |
872 | 0 | } |
873 | 0 | uint32_t trailer_len; |
874 | 0 | to_big(&trailer_len, trailer_ptr + trailer_offset, sizeof(trailer_len)); |
875 | 0 | frame->trailer_len = trailer_len; |
876 | |
|
877 | 0 | return frame; |
878 | 0 | } |
879 | | |
880 | | |
881 | | /* Initialize a frame out of a contiguous frame buffer */ |
882 | 0 | blosc2_frame_s* frame_from_cframe(uint8_t *cframe, int64_t len, bool copy) { |
883 | | // Get the length of the frame |
884 | 0 | const uint8_t* header = cframe; |
885 | 0 | int64_t frame_len; |
886 | 0 | if (len < FRAME_HEADER_MINLEN) { |
887 | 0 | return NULL; |
888 | 0 | } |
889 | | |
890 | 0 | from_big(&frame_len, header + FRAME_LEN, sizeof(frame_len)); |
891 | 0 | if (frame_len != len) { // sanity check |
892 | 0 | return NULL; |
893 | 0 | } |
894 | | |
895 | 0 | blosc2_frame_s* frame = calloc(1, sizeof(blosc2_frame_s)); |
896 | 0 | frame->len = frame_len; |
897 | 0 | frame->file_offset = 0; |
898 | | |
899 | | // Now, the trailer length |
900 | 0 | const uint8_t* trailer = cframe + frame_len - FRAME_TRAILER_MINLEN; |
901 | 0 | int trailer_offset = FRAME_TRAILER_MINLEN - FRAME_TRAILER_LEN_OFFSET; |
902 | 0 | if (trailer[trailer_offset - 1] != 0xce) { |
903 | 0 | free(frame); |
904 | 0 | return NULL; |
905 | 0 | } |
906 | 0 | uint32_t trailer_len; |
907 | 0 | from_big(&trailer_len, trailer + trailer_offset, sizeof(trailer_len)); |
908 | 0 | frame->trailer_len = trailer_len; |
909 | |
|
910 | 0 | if (copy) { |
911 | 0 | frame->cframe = malloc((size_t)len); |
912 | 0 | memcpy(frame->cframe, cframe, (size_t)len); |
913 | 0 | } |
914 | 0 | else { |
915 | 0 | frame->cframe = cframe; |
916 | 0 | frame->avoid_cframe_free = true; |
917 | 0 | } |
918 | |
|
919 | 0 | return frame; |
920 | 0 | } |
921 | | |
922 | | |
923 | | /* Create a frame out of a super-chunk. */ |
924 | 0 | int64_t frame_from_schunk(blosc2_schunk *schunk, blosc2_frame_s *frame) { |
925 | 0 | frame->file_offset = 0; |
926 | 0 | int64_t nchunks = schunk->nchunks; |
927 | 0 | int64_t cbytes = schunk->cbytes; |
928 | 0 | int32_t chunk_cbytes; |
929 | 0 | int32_t chunk_nbytes; |
930 | 0 | void* fp = NULL; |
931 | 0 | int rc; |
932 | |
|
933 | 0 | uint8_t* h2 = new_header_frame(schunk, frame); |
934 | 0 | if (h2 == NULL) { |
935 | 0 | return BLOSC2_ERROR_DATA; |
936 | 0 | } |
937 | 0 | uint32_t h2len; |
938 | 0 | from_big(&h2len, h2 + FRAME_HEADER_LEN, sizeof(h2len)); |
939 | | // Build the offsets chunk |
940 | 0 | int32_t chunksize = -1; |
941 | 0 | int32_t off_cbytes = 0; |
942 | 0 | uint64_t coffset = 0; |
943 | 0 | int32_t off_nbytes = (int32_t) (nchunks * sizeof(int64_t)); |
944 | 0 | uint64_t* data_tmp = malloc(off_nbytes); |
945 | 0 | bool needs_free = false; |
946 | 0 | for (int i = 0; i < nchunks; i++) { |
947 | 0 | uint8_t* data_chunk; |
948 | 0 | data_chunk = schunk->data[i]; |
949 | 0 | rc = blosc2_cbuffer_sizes(data_chunk, &chunk_nbytes, &chunk_cbytes, NULL); |
950 | 0 | if (rc < 0) { |
951 | 0 | return rc; |
952 | 0 | } |
953 | 0 | data_tmp[i] = coffset; |
954 | 0 | coffset += chunk_cbytes; |
955 | 0 | int32_t chunksize_ = chunk_nbytes; |
956 | 0 | if (i == 0) { |
957 | 0 | chunksize = chunksize_; |
958 | 0 | } |
959 | 0 | else if (chunksize != chunksize_) { |
960 | | // Variable size // TODO: update flags for this (or do not use them at all) |
961 | 0 | chunksize = 0; |
962 | 0 | } |
963 | 0 | if (needs_free) { |
964 | 0 | free(data_chunk); |
965 | 0 | } |
966 | 0 | } |
967 | 0 | if ((int64_t)coffset != cbytes) { |
968 | 0 | free(data_tmp); |
969 | 0 | return BLOSC2_ERROR_DATA; |
970 | 0 | } |
971 | 0 | uint8_t *off_chunk = NULL; |
972 | 0 | if (nchunks > 0) { |
973 | | // Compress the chunk of offsets |
974 | 0 | off_chunk = malloc(off_nbytes + BLOSC2_MAX_OVERHEAD); |
975 | 0 | blosc2_context *cctx = blosc2_create_cctx(BLOSC2_CPARAMS_DEFAULTS); |
976 | 0 | if (cctx == NULL) { |
977 | 0 | BLOSC_TRACE_ERROR("Error while creating the compression context"); |
978 | 0 | return BLOSC2_ERROR_NULL_POINTER; |
979 | 0 | } |
980 | 0 | cctx->typesize = sizeof(int64_t); |
981 | 0 | off_cbytes = blosc2_compress_ctx(cctx, data_tmp, off_nbytes, off_chunk, |
982 | 0 | off_nbytes + BLOSC2_MAX_OVERHEAD); |
983 | 0 | blosc2_free_ctx(cctx); |
984 | 0 | if (off_cbytes < 0) { |
985 | 0 | free(off_chunk); |
986 | 0 | free(h2); |
987 | 0 | return off_cbytes; |
988 | 0 | } |
989 | 0 | } |
990 | 0 | else { |
991 | 0 | off_cbytes = 0; |
992 | 0 | } |
993 | 0 | free(data_tmp); |
994 | | |
995 | | // Now that we know them, fill the chunksize and frame length in header |
996 | 0 | to_big(h2 + FRAME_CHUNKSIZE, &chunksize, sizeof(chunksize)); |
997 | 0 | frame->len = h2len + cbytes + off_cbytes + FRAME_TRAILER_MINLEN; |
998 | 0 | if (frame->sframe) { |
999 | 0 | frame->len = h2len + off_cbytes + FRAME_TRAILER_MINLEN; |
1000 | 0 | } |
1001 | 0 | int64_t tbytes = frame->len; |
1002 | 0 | to_big(h2 + FRAME_LEN, &tbytes, sizeof(tbytes)); |
1003 | |
|
1004 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
1005 | 0 | if (io_cb == NULL) { |
1006 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
1007 | 0 | return BLOSC2_ERROR_PLUGIN_IO; |
1008 | 0 | } |
1009 | | |
1010 | | // Create the frame and put the header at the beginning |
1011 | 0 | int64_t io_pos = 0; |
1012 | 0 | if (frame->urlpath == NULL) { |
1013 | 0 | frame->cframe = malloc((size_t)frame->len); |
1014 | 0 | memcpy(frame->cframe, h2, h2len); |
1015 | 0 | } |
1016 | 0 | else { |
1017 | 0 | if (frame->sframe) { |
1018 | 0 | fp = sframe_open_index(frame->urlpath, "wb", |
1019 | 0 | frame->schunk->storage->io); |
1020 | 0 | } |
1021 | 0 | else { |
1022 | 0 | fp = io_cb->open(frame->urlpath, "wb", frame->schunk->storage->io->params); |
1023 | 0 | } |
1024 | 0 | if (fp == NULL) { |
1025 | 0 | BLOSC_TRACE_ERROR("Error creating file in: %s", frame->urlpath); |
1026 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
1027 | 0 | } |
1028 | 0 | io_cb->write(h2, h2len, 1, io_pos, fp); |
1029 | 0 | io_pos += h2len; |
1030 | 0 | } |
1031 | 0 | free(h2); |
1032 | | |
1033 | | // Fill the frame with the actual data chunks |
1034 | 0 | if (!frame->sframe) { |
1035 | 0 | coffset = 0; |
1036 | 0 | for (int i = 0; i < nchunks; i++) { |
1037 | 0 | uint8_t* data_chunk = schunk->data[i]; |
1038 | 0 | rc = blosc2_cbuffer_sizes(data_chunk, NULL, &chunk_cbytes, NULL); |
1039 | 0 | if (rc < 0) { |
1040 | 0 | return rc; |
1041 | 0 | } |
1042 | 0 | if (frame->urlpath == NULL) { |
1043 | 0 | memcpy(frame->cframe + h2len + coffset, data_chunk, (size_t)chunk_cbytes); |
1044 | 0 | } else { |
1045 | 0 | io_cb->write(data_chunk, chunk_cbytes, 1, io_pos, fp); |
1046 | 0 | io_pos += chunk_cbytes; |
1047 | 0 | } |
1048 | 0 | coffset += chunk_cbytes; |
1049 | 0 | } |
1050 | 0 | if ((int64_t)coffset != cbytes) { |
1051 | 0 | return BLOSC2_ERROR_FAILURE; |
1052 | 0 | } |
1053 | 0 | } |
1054 | | |
1055 | | // Copy the offsets chunk at the end of the frame |
1056 | 0 | if (frame->urlpath == NULL) { |
1057 | 0 | memcpy(frame->cframe + h2len + cbytes, off_chunk, off_cbytes); |
1058 | 0 | } |
1059 | 0 | else { |
1060 | 0 | io_cb->write(off_chunk, off_cbytes, 1, io_pos, fp); |
1061 | 0 | io_cb->close(fp); |
1062 | 0 | } |
1063 | 0 | free(off_chunk); |
1064 | 0 | rc = frame_update_trailer(frame, schunk); |
1065 | 0 | if (rc < 0) { |
1066 | 0 | return rc; |
1067 | 0 | } |
1068 | | |
1069 | 0 | return frame->len; |
1070 | 0 | } |
1071 | | |
1072 | | |
1073 | | // Get the compressed data offsets |
1074 | | uint8_t* get_coffsets(blosc2_frame_s *frame, int32_t header_len, int64_t cbytes, |
1075 | 0 | int64_t nchunks, int32_t *off_cbytes) { |
1076 | 0 | int32_t chunk_cbytes; |
1077 | 0 | int rc; |
1078 | |
|
1079 | 0 | if (frame->coffsets != NULL) { |
1080 | 0 | if (off_cbytes != NULL) { |
1081 | 0 | rc = blosc2_cbuffer_sizes(frame->coffsets, NULL, &chunk_cbytes, NULL); |
1082 | 0 | if (rc < 0) { |
1083 | 0 | return NULL; |
1084 | 0 | } |
1085 | 0 | *off_cbytes = (int32_t)chunk_cbytes; |
1086 | 0 | } |
1087 | 0 | return frame->coffsets; |
1088 | 0 | } |
1089 | 0 | if (frame->cframe != NULL) { |
1090 | 0 | int64_t off_pos = header_len; |
1091 | 0 | if (cbytes < INT64_MAX - header_len) { |
1092 | 0 | off_pos += cbytes; |
1093 | 0 | } |
1094 | | // Check that there is enough room to read Blosc header |
1095 | 0 | if (off_pos < 0 || off_pos > INT64_MAX - BLOSC_EXTENDED_HEADER_LENGTH || |
1096 | 0 | off_pos + BLOSC_EXTENDED_HEADER_LENGTH > frame->len) { |
1097 | 0 | BLOSC_TRACE_ERROR("Cannot read the offsets outside of frame boundary."); |
1098 | 0 | return NULL; |
1099 | 0 | } |
1100 | | // For in-memory frames, the coffset is just one pointer away |
1101 | 0 | uint8_t* off_start = frame->cframe + off_pos; |
1102 | 0 | if (off_cbytes != NULL) { |
1103 | 0 | int32_t chunk_nbytes; |
1104 | 0 | int32_t chunk_blocksize; |
1105 | 0 | rc = blosc2_cbuffer_sizes(off_start, &chunk_nbytes, &chunk_cbytes, &chunk_blocksize); |
1106 | 0 | if (rc < 0) { |
1107 | 0 | return NULL; |
1108 | 0 | } |
1109 | 0 | *off_cbytes = (int32_t)chunk_cbytes; |
1110 | 0 | if (*off_cbytes < 0 || off_pos + *off_cbytes > frame->len) { |
1111 | 0 | BLOSC_TRACE_ERROR("Cannot read the cbytes outside of frame boundary."); |
1112 | 0 | return NULL; |
1113 | 0 | } |
1114 | 0 | if ((uint64_t)chunk_nbytes != nchunks * sizeof(int64_t)) { |
1115 | 0 | BLOSC_TRACE_ERROR("The number of chunks in offset idx " |
1116 | 0 | "does not match the ones in the header frame."); |
1117 | 0 | return NULL; |
1118 | 0 | } |
1119 | |
|
1120 | 0 | } |
1121 | 0 | return off_start; |
1122 | 0 | } |
1123 | | |
1124 | 0 | int64_t trailer_offset = get_trailer_offset(frame, header_len, true); |
1125 | |
|
1126 | 0 | if (trailer_offset < BLOSC_EXTENDED_HEADER_LENGTH || trailer_offset + FRAME_TRAILER_MINLEN > frame->len) { |
1127 | 0 | BLOSC_TRACE_ERROR("Cannot read the trailer out of the frame."); |
1128 | 0 | return NULL; |
1129 | 0 | } |
1130 | | |
1131 | 0 | int32_t coffsets_cbytes; |
1132 | 0 | if (frame->sframe) { |
1133 | 0 | coffsets_cbytes = (int32_t)(trailer_offset - (header_len + 0)); |
1134 | 0 | } |
1135 | 0 | else { |
1136 | 0 | coffsets_cbytes = (int32_t)(trailer_offset - (header_len + cbytes)); |
1137 | 0 | } |
1138 | |
|
1139 | 0 | if (off_cbytes != NULL) { |
1140 | 0 | *off_cbytes = coffsets_cbytes; |
1141 | 0 | } |
1142 | |
|
1143 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
1144 | 0 | if (io_cb == NULL) { |
1145 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
1146 | 0 | return NULL; |
1147 | 0 | } |
1148 | | |
1149 | 0 | void* fp = NULL; |
1150 | 0 | uint8_t* coffsets; |
1151 | 0 | if (io_cb->is_allocation_necessary) { |
1152 | 0 | coffsets = malloc((size_t)coffsets_cbytes); |
1153 | 0 | frame->coffsets_needs_free = true; |
1154 | 0 | } |
1155 | 0 | else { |
1156 | 0 | frame->coffsets_needs_free = false; |
1157 | 0 | } |
1158 | | |
1159 | 0 | int64_t io_pos = 0; |
1160 | 0 | if (frame->sframe) { |
1161 | 0 | fp = sframe_open_index(frame->urlpath, "rb", |
1162 | 0 | frame->schunk->storage->io); |
1163 | 0 | if (fp == NULL) { |
1164 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
1165 | 0 | return NULL; |
1166 | 0 | } |
1167 | 0 | io_pos = header_len + 0; |
1168 | 0 | } |
1169 | 0 | else { |
1170 | 0 | fp = io_cb->open(frame->urlpath, "rb", frame->schunk->storage->io->params); |
1171 | 0 | if (fp == NULL) { |
1172 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
1173 | 0 | return NULL; |
1174 | 0 | } |
1175 | 0 | io_pos = frame->file_offset + header_len + cbytes; |
1176 | 0 | } |
1177 | 0 | int64_t rbytes = io_cb->read((void**)&coffsets, 1, coffsets_cbytes, io_pos, fp); |
1178 | 0 | io_cb->close(fp); |
1179 | 0 | if (rbytes != coffsets_cbytes) { |
1180 | 0 | BLOSC_TRACE_ERROR("Cannot read the offsets out of the frame."); |
1181 | 0 | if (frame->coffsets_needs_free) |
1182 | 0 | free(coffsets); |
1183 | 0 | return NULL; |
1184 | 0 | } |
1185 | 0 | frame->coffsets = coffsets; |
1186 | 0 | return coffsets; |
1187 | 0 | } |
1188 | | |
1189 | | |
1190 | | // Get the data offsets from a frame |
1191 | 0 | int64_t* blosc2_frame_get_offsets(blosc2_schunk *schunk) { |
1192 | 0 | if (schunk->frame == NULL) { |
1193 | 0 | BLOSC_TRACE_ERROR("This function needs a frame."); |
1194 | 0 | return NULL; |
1195 | 0 | } |
1196 | 0 | blosc2_frame_s* frame = (blosc2_frame_s*)schunk->frame; |
1197 | | |
1198 | | // Get header info |
1199 | 0 | int32_t header_len; |
1200 | 0 | int64_t frame_len; |
1201 | 0 | int64_t nbytes; |
1202 | 0 | int64_t cbytes; |
1203 | 0 | int32_t blocksize; |
1204 | 0 | int32_t chunksize; |
1205 | 0 | int64_t nchunks; |
1206 | 0 | int ret = get_header_info(frame, &header_len, &frame_len, &nbytes, &cbytes, |
1207 | 0 | &blocksize, &chunksize, &nchunks, |
1208 | 0 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
1209 | 0 | frame->schunk->storage->io); |
1210 | 0 | if (ret < 0) { |
1211 | 0 | BLOSC_TRACE_ERROR("Cannot get the header info for the frame."); |
1212 | 0 | return NULL; |
1213 | 0 | } |
1214 | | |
1215 | 0 | int32_t off_nbytes = (int32_t) (nchunks * sizeof(int64_t)); |
1216 | 0 | int64_t* offsets = (int64_t *) malloc((size_t)off_nbytes); |
1217 | |
|
1218 | 0 | int32_t coffsets_cbytes = 0; |
1219 | 0 | uint8_t *coffsets = get_coffsets(frame, header_len, cbytes, nchunks, &coffsets_cbytes); |
1220 | | // Decompress offsets |
1221 | 0 | blosc2_dparams off_dparams = BLOSC2_DPARAMS_DEFAULTS; |
1222 | 0 | blosc2_context *dctx = blosc2_create_dctx(off_dparams); |
1223 | 0 | if (dctx == NULL) { |
1224 | 0 | BLOSC_TRACE_ERROR("Error while creating the decompression context"); |
1225 | 0 | return NULL; |
1226 | 0 | } |
1227 | 0 | int32_t prev_nbytes = blosc2_decompress_ctx(dctx, coffsets, coffsets_cbytes, |
1228 | 0 | offsets, off_nbytes); |
1229 | 0 | blosc2_free_ctx(dctx); |
1230 | 0 | if (prev_nbytes < 0) { |
1231 | 0 | free(offsets); |
1232 | 0 | BLOSC_TRACE_ERROR("Cannot decompress the offsets chunk."); |
1233 | 0 | return NULL; |
1234 | 0 | } |
1235 | 0 | return offsets; |
1236 | 0 | } |
1237 | | |
1238 | | |
1239 | 0 | int frame_update_header(blosc2_frame_s* frame, blosc2_schunk* schunk, bool new) { |
1240 | 0 | uint8_t* framep = frame->cframe; |
1241 | 0 | uint8_t* header_ptr; |
1242 | 0 | uint8_t header[FRAME_HEADER_MINLEN]; |
1243 | |
|
1244 | 0 | if (frame->len <= 0) { |
1245 | 0 | return BLOSC2_ERROR_INVALID_PARAM; |
1246 | 0 | } |
1247 | | |
1248 | 0 | if (new && schunk->cbytes > 0) { |
1249 | 0 | BLOSC_TRACE_ERROR("New metalayers cannot be added after actual data " |
1250 | 0 | "has been appended."); |
1251 | 0 | return BLOSC2_ERROR_INVALID_PARAM; |
1252 | 0 | } |
1253 | | |
1254 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
1255 | 0 | if (io_cb == NULL) { |
1256 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
1257 | 0 | return BLOSC2_ERROR_PLUGIN_IO; |
1258 | 0 | } |
1259 | | |
1260 | 0 | if (frame->cframe == NULL) { |
1261 | 0 | int64_t rbytes = 0; |
1262 | 0 | void* fp = NULL; |
1263 | 0 | int64_t io_pos = 0; |
1264 | 0 | if (frame->sframe) { |
1265 | 0 | fp = sframe_open_index(frame->urlpath, "rb+", |
1266 | 0 | frame->schunk->storage->io); |
1267 | 0 | if (fp == NULL) { |
1268 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
1269 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
1270 | 0 | } |
1271 | 0 | } |
1272 | 0 | else { |
1273 | 0 | fp = io_cb->open(frame->urlpath, "rb", frame->schunk->storage->io->params); |
1274 | 0 | if (fp == NULL) { |
1275 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
1276 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
1277 | 0 | } |
1278 | 0 | io_pos = frame->file_offset; |
1279 | 0 | } |
1280 | 0 | if (fp != NULL) { |
1281 | 0 | if (io_cb->is_allocation_necessary) |
1282 | 0 | header_ptr = header; |
1283 | 0 | rbytes = io_cb->read((void**)&header_ptr, 1, FRAME_HEADER_MINLEN, io_pos, fp); |
1284 | 0 | io_cb->close(fp); |
1285 | 0 | } |
1286 | 0 | (void) rbytes; |
1287 | 0 | if (rbytes != FRAME_HEADER_MINLEN) { |
1288 | 0 | return BLOSC2_ERROR_FILE_WRITE; |
1289 | 0 | } |
1290 | 0 | framep = header_ptr; |
1291 | 0 | } |
1292 | 0 | uint32_t prev_h2len; |
1293 | 0 | from_big(&prev_h2len, framep + FRAME_HEADER_LEN, sizeof(prev_h2len)); |
1294 | | |
1295 | | // Build a new header |
1296 | 0 | uint8_t* h2 = new_header_frame(schunk, frame); |
1297 | 0 | uint32_t h2len; |
1298 | 0 | from_big(&h2len, h2 + FRAME_HEADER_LEN, sizeof(h2len)); |
1299 | | |
1300 | | // The frame length is outdated when adding a new metalayer, so update it |
1301 | 0 | if (new) { |
1302 | 0 | int64_t frame_len = h2len; // at adding time, we only have to worry of the header for now |
1303 | 0 | to_big(h2 + FRAME_LEN, &frame_len, sizeof(frame_len)); |
1304 | 0 | frame->len = frame_len; |
1305 | 0 | } |
1306 | |
|
1307 | 0 | if (!new && prev_h2len != h2len) { |
1308 | 0 | BLOSC_TRACE_ERROR("The new metalayer sizes should be equal the existing ones."); |
1309 | 0 | return BLOSC2_ERROR_DATA; |
1310 | 0 | } |
1311 | | |
1312 | 0 | void* fp = NULL; |
1313 | 0 | if (frame->cframe == NULL) { |
1314 | | // Write updated header down to file |
1315 | 0 | if (frame->sframe) { |
1316 | 0 | fp = sframe_open_index(frame->urlpath, "rb+", |
1317 | 0 | frame->schunk->storage->io); |
1318 | 0 | } |
1319 | 0 | else { |
1320 | 0 | fp = io_cb->open(frame->urlpath, "rb+", frame->schunk->storage->io->params); |
1321 | 0 | } |
1322 | 0 | if (fp == NULL) { |
1323 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
1324 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
1325 | 0 | } |
1326 | 0 | int64_t io_pos = frame->file_offset; |
1327 | 0 | io_cb->write(h2, h2len, 1, io_pos, fp); |
1328 | 0 | io_cb->close(fp); |
1329 | 0 | } |
1330 | 0 | else { |
1331 | 0 | if (new) { |
1332 | 0 | frame->cframe = realloc(frame->cframe, h2len); |
1333 | 0 | } |
1334 | 0 | memcpy(frame->cframe, h2, h2len); |
1335 | 0 | } |
1336 | 0 | free(h2); |
1337 | |
|
1338 | 0 | return 1; |
1339 | 0 | } |
1340 | | |
1341 | | |
1342 | | static int get_meta_from_header(blosc2_frame_s* frame, blosc2_schunk* schunk, uint8_t* header, |
1343 | 0 | int32_t header_len) { |
1344 | 0 | BLOSC_UNUSED_PARAM(frame); |
1345 | 0 | int64_t header_pos = FRAME_IDX_SIZE; |
1346 | | |
1347 | | // Get the size for the index of metalayers |
1348 | 0 | uint16_t idx_size; |
1349 | 0 | header_pos += sizeof(idx_size); |
1350 | 0 | if (header_len < header_pos) { |
1351 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1352 | 0 | } |
1353 | 0 | from_big(&idx_size, header + FRAME_IDX_SIZE, sizeof(idx_size)); |
1354 | | |
1355 | | // Get the actual index of metalayers |
1356 | 0 | uint8_t* metalayers_idx = header + FRAME_IDX_SIZE + 2; |
1357 | 0 | header_pos += 1; |
1358 | 0 | if (header_len < header_pos) { |
1359 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1360 | 0 | } |
1361 | 0 | if (metalayers_idx[0] != 0xde) { // sanity check |
1362 | 0 | return BLOSC2_ERROR_DATA; |
1363 | 0 | } |
1364 | 0 | uint8_t* idxp = metalayers_idx + 1; |
1365 | 0 | uint16_t nmetalayers; |
1366 | 0 | header_pos += sizeof(nmetalayers); |
1367 | 0 | if (header_len < header_pos) { |
1368 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1369 | 0 | } |
1370 | 0 | from_big(&nmetalayers, idxp, sizeof(uint16_t)); |
1371 | 0 | idxp += 2; |
1372 | 0 | if (nmetalayers > BLOSC2_MAX_METALAYERS) { |
1373 | 0 | return BLOSC2_ERROR_DATA; |
1374 | 0 | } |
1375 | 0 | schunk->nmetalayers = nmetalayers; |
1376 | | |
1377 | | // Populate the metalayers and its serialized values |
1378 | 0 | for (int nmetalayer = 0; nmetalayer < nmetalayers; nmetalayer++) { |
1379 | 0 | header_pos += 1; |
1380 | 0 | if (header_len < header_pos) { |
1381 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1382 | 0 | } |
1383 | 0 | if ((*idxp & 0xe0u) != 0xa0u) { // sanity check |
1384 | 0 | return BLOSC2_ERROR_DATA; |
1385 | 0 | } |
1386 | 0 | blosc2_metalayer* metalayer = calloc(1, sizeof(blosc2_metalayer)); |
1387 | 0 | schunk->metalayers[nmetalayer] = metalayer; |
1388 | | |
1389 | | // Populate the metalayer string |
1390 | 0 | uint8_t nslen = *idxp & (uint8_t)0x1F; |
1391 | 0 | idxp += 1; |
1392 | 0 | header_pos += nslen; |
1393 | 0 | if (header_len < header_pos) { |
1394 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1395 | 0 | } |
1396 | 0 | char* ns = malloc((size_t)nslen + 1); |
1397 | 0 | memcpy(ns, idxp, nslen); |
1398 | 0 | ns[nslen] = '\0'; |
1399 | 0 | idxp += nslen; |
1400 | 0 | metalayer->name = ns; |
1401 | | |
1402 | | // Populate the serialized value for this metalayer |
1403 | | // Get the offset |
1404 | 0 | header_pos += 1; |
1405 | 0 | if (header_len < header_pos) { |
1406 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1407 | 0 | } |
1408 | 0 | if ((*idxp & 0xffu) != 0xd2u) { // sanity check |
1409 | 0 | return BLOSC2_ERROR_DATA; |
1410 | 0 | } |
1411 | 0 | idxp += 1; |
1412 | 0 | int32_t offset; |
1413 | 0 | header_pos += sizeof(offset); |
1414 | 0 | if (header_len < header_pos) { |
1415 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1416 | 0 | } |
1417 | 0 | from_big(&offset, idxp, sizeof(offset)); |
1418 | 0 | idxp += 4; |
1419 | 0 | if (offset < 0 || offset >= header_len) { |
1420 | | // Offset is less than zero or exceeds header length |
1421 | 0 | return BLOSC2_ERROR_DATA; |
1422 | 0 | } |
1423 | | // Go to offset and see if we have the correct marker |
1424 | 0 | uint8_t* content_marker = header + offset; |
1425 | 0 | if (header_len < offset + 1 + 4) { |
1426 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1427 | 0 | } |
1428 | 0 | if (*content_marker != 0xc6) { |
1429 | 0 | return BLOSC2_ERROR_DATA; |
1430 | 0 | } |
1431 | | |
1432 | | // Read the size of the content |
1433 | 0 | int32_t content_len; |
1434 | 0 | from_big(&content_len, content_marker + 1, sizeof(content_len)); |
1435 | 0 | if (content_len < 0) { |
1436 | 0 | return BLOSC2_ERROR_DATA; |
1437 | 0 | } |
1438 | 0 | metalayer->content_len = content_len; |
1439 | | |
1440 | | // Finally, read the content |
1441 | 0 | if (header_len < offset + 1 + 4 + content_len) { |
1442 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1443 | 0 | } |
1444 | 0 | char* content = malloc((size_t)content_len); |
1445 | 0 | memcpy(content, content_marker + 1 + 4, (size_t)content_len); |
1446 | 0 | metalayer->content = (uint8_t*)content; |
1447 | 0 | } |
1448 | | |
1449 | 0 | return 1; |
1450 | 0 | } |
1451 | | |
1452 | 0 | int frame_get_metalayers(blosc2_frame_s* frame, blosc2_schunk* schunk) { |
1453 | 0 | int32_t header_len; |
1454 | 0 | int64_t frame_len; |
1455 | 0 | int64_t nbytes; |
1456 | 0 | int64_t cbytes; |
1457 | 0 | int32_t blocksize; |
1458 | 0 | int32_t chunksize; |
1459 | 0 | int64_t nchunks; |
1460 | 0 | int ret = get_header_info(frame, &header_len, &frame_len, &nbytes, &cbytes, |
1461 | 0 | &blocksize, &chunksize, &nchunks, |
1462 | 0 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
1463 | 0 | schunk->storage->io); |
1464 | 0 | if (ret < 0) { |
1465 | 0 | BLOSC_TRACE_ERROR("Unable to get the header info from frame."); |
1466 | 0 | return ret; |
1467 | 0 | } |
1468 | | |
1469 | | // Get the header |
1470 | 0 | uint8_t* header = NULL; |
1471 | 0 | bool needs_free = false; |
1472 | 0 | if (frame->cframe != NULL) { |
1473 | 0 | header = frame->cframe; |
1474 | 0 | } else { |
1475 | 0 | int64_t rbytes = 0; |
1476 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
1477 | 0 | if (io_cb == NULL) { |
1478 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
1479 | 0 | return BLOSC2_ERROR_PLUGIN_IO; |
1480 | 0 | } |
1481 | | |
1482 | 0 | if (io_cb->is_allocation_necessary) { |
1483 | 0 | header = malloc(header_len); |
1484 | 0 | needs_free = true; |
1485 | 0 | } |
1486 | 0 | else { |
1487 | 0 | needs_free = false; |
1488 | 0 | } |
1489 | |
|
1490 | 0 | void* fp = NULL; |
1491 | 0 | int64_t io_pos = 0; |
1492 | 0 | if (frame->sframe) { |
1493 | 0 | fp = sframe_open_index(frame->urlpath, "rb", |
1494 | 0 | frame->schunk->storage->io); |
1495 | 0 | if (fp == NULL) { |
1496 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
1497 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
1498 | 0 | } |
1499 | 0 | } |
1500 | 0 | else { |
1501 | 0 | fp = io_cb->open(frame->urlpath, "rb", frame->schunk->storage->io->params); |
1502 | 0 | if (fp == NULL) { |
1503 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
1504 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
1505 | 0 | } |
1506 | 0 | io_pos = frame->file_offset; |
1507 | 0 | } |
1508 | 0 | if (fp != NULL) { |
1509 | 0 | rbytes = io_cb->read((void**)&header, 1, header_len, io_pos, fp); |
1510 | 0 | io_cb->close(fp); |
1511 | 0 | } |
1512 | 0 | if (rbytes != header_len) { |
1513 | 0 | BLOSC_TRACE_ERROR("Cannot access the header out of the frame."); |
1514 | 0 | if (needs_free) |
1515 | 0 | free(header); |
1516 | 0 | return BLOSC2_ERROR_FILE_READ; |
1517 | 0 | } |
1518 | 0 | } |
1519 | | |
1520 | 0 | ret = get_meta_from_header(frame, schunk, header, header_len); |
1521 | |
|
1522 | 0 | if (frame->cframe == NULL && needs_free) { |
1523 | 0 | free(header); |
1524 | 0 | } |
1525 | |
|
1526 | 0 | return ret; |
1527 | 0 | } |
1528 | | |
1529 | | static int get_vlmeta_from_trailer(blosc2_frame_s* frame, blosc2_schunk* schunk, uint8_t* trailer, |
1530 | 0 | int32_t trailer_len) { |
1531 | |
|
1532 | 0 | BLOSC_UNUSED_PARAM(frame); |
1533 | 0 | int64_t trailer_pos = FRAME_TRAILER_VLMETALAYERS + 2; |
1534 | 0 | uint8_t* idxp = trailer + trailer_pos; |
1535 | | |
1536 | | // Get the size for the index of metalayers |
1537 | 0 | trailer_pos += 2; |
1538 | 0 | if (trailer_len < trailer_pos) { |
1539 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1540 | 0 | } |
1541 | 0 | uint16_t idx_size; |
1542 | 0 | from_big(&idx_size, idxp, sizeof(idx_size)); |
1543 | 0 | idxp += 2; |
1544 | |
|
1545 | 0 | trailer_pos += 1; |
1546 | | // Get the actual index of metalayers |
1547 | 0 | if (trailer_len < trailer_pos) { |
1548 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1549 | 0 | } |
1550 | 0 | if (idxp[0] != 0xde) { // sanity check |
1551 | 0 | return BLOSC2_ERROR_DATA; |
1552 | 0 | } |
1553 | 0 | idxp += 1; |
1554 | |
|
1555 | 0 | int16_t nmetalayers; |
1556 | 0 | trailer_pos += sizeof(nmetalayers); |
1557 | 0 | if (trailer_len < trailer_pos) { |
1558 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1559 | 0 | } |
1560 | 0 | from_big(&nmetalayers, idxp, sizeof(uint16_t)); |
1561 | 0 | idxp += 2; |
1562 | 0 | if (nmetalayers > BLOSC2_MAX_VLMETALAYERS) { |
1563 | 0 | return BLOSC2_ERROR_DATA; |
1564 | 0 | } |
1565 | 0 | schunk->nvlmetalayers = nmetalayers; |
1566 | | |
1567 | | // Populate the metalayers and its serialized values |
1568 | 0 | for (int nmetalayer = 0; nmetalayer < nmetalayers; nmetalayer++) { |
1569 | 0 | trailer_pos += 1; |
1570 | 0 | if (trailer_len < trailer_pos) { |
1571 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1572 | 0 | } |
1573 | 0 | if ((*idxp & 0xe0u) != 0xa0u) { // sanity check |
1574 | 0 | return BLOSC2_ERROR_DATA; |
1575 | 0 | } |
1576 | 0 | blosc2_metalayer* metalayer = calloc(1, sizeof(blosc2_metalayer)); |
1577 | 0 | schunk->vlmetalayers[nmetalayer] = metalayer; |
1578 | | |
1579 | | // Populate the metalayer string |
1580 | 0 | uint8_t nslen = *idxp & (uint8_t)0x1F; |
1581 | 0 | idxp += 1; |
1582 | 0 | trailer_pos += nslen; |
1583 | 0 | if (trailer_len < trailer_pos) { |
1584 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1585 | 0 | } |
1586 | 0 | char* ns = malloc((size_t)nslen + 1); |
1587 | 0 | memcpy(ns, idxp, nslen); |
1588 | 0 | ns[nslen] = '\0'; |
1589 | 0 | idxp += nslen; |
1590 | 0 | metalayer->name = ns; |
1591 | | |
1592 | | // Populate the serialized value for this metalayer |
1593 | | // Get the offset |
1594 | 0 | trailer_pos += 1; |
1595 | 0 | if (trailer_len < trailer_pos) { |
1596 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1597 | 0 | } |
1598 | 0 | if ((*idxp & 0xffu) != 0xd2u) { // sanity check |
1599 | 0 | return BLOSC2_ERROR_DATA; |
1600 | 0 | } |
1601 | 0 | idxp += 1; |
1602 | 0 | int32_t offset; |
1603 | 0 | trailer_pos += sizeof(offset); |
1604 | 0 | if (trailer_len < trailer_pos) { |
1605 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1606 | 0 | } |
1607 | 0 | from_big(&offset, idxp, sizeof(offset)); |
1608 | 0 | idxp += 4; |
1609 | 0 | if (offset < 0 || offset >= trailer_len) { |
1610 | | // Offset is less than zero or exceeds trailer length |
1611 | 0 | return BLOSC2_ERROR_DATA; |
1612 | 0 | } |
1613 | | // Go to offset and see if we have the correct marker |
1614 | 0 | uint8_t* content_marker = trailer + offset; |
1615 | 0 | if (trailer_len < offset + 1 + 4) { |
1616 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1617 | 0 | } |
1618 | 0 | if (*content_marker != 0xc6) { |
1619 | 0 | return BLOSC2_ERROR_DATA; |
1620 | 0 | } |
1621 | | |
1622 | | // Read the size of the content |
1623 | 0 | int32_t content_len; |
1624 | 0 | from_big(&content_len, content_marker + 1, sizeof(content_len)); |
1625 | 0 | if (content_len < 0) { |
1626 | 0 | return BLOSC2_ERROR_DATA; |
1627 | 0 | } |
1628 | 0 | metalayer->content_len = content_len; |
1629 | | |
1630 | | // Finally, read the content |
1631 | 0 | if (trailer_len < offset + 1 + 4 + content_len) { |
1632 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1633 | 0 | } |
1634 | 0 | char* content = malloc((size_t)content_len); |
1635 | 0 | memcpy(content, content_marker + 1 + 4, (size_t)content_len); |
1636 | 0 | metalayer->content = (uint8_t*)content; |
1637 | 0 | } |
1638 | 0 | return 1; |
1639 | 0 | } |
1640 | | |
1641 | 0 | int frame_get_vlmetalayers(blosc2_frame_s* frame, blosc2_schunk* schunk) { |
1642 | 0 | int32_t header_len; |
1643 | 0 | int64_t frame_len; |
1644 | 0 | int64_t nbytes; |
1645 | 0 | int64_t cbytes; |
1646 | 0 | int32_t blocksize; |
1647 | 0 | int32_t chunksize; |
1648 | 0 | int64_t nchunks; |
1649 | 0 | int ret = get_header_info(frame, &header_len, &frame_len, &nbytes, &cbytes, |
1650 | 0 | &blocksize, &chunksize, &nchunks, |
1651 | 0 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
1652 | 0 | schunk->storage->io); |
1653 | 0 | if (ret < 0) { |
1654 | 0 | BLOSC_TRACE_ERROR("Unable to get the trailer info from frame."); |
1655 | 0 | return ret; |
1656 | 0 | } |
1657 | | |
1658 | 0 | int64_t trailer_offset = get_trailer_offset(frame, header_len, nbytes > 0); |
1659 | 0 | int32_t trailer_len = (int32_t) frame->trailer_len; |
1660 | |
|
1661 | 0 | if (trailer_offset < BLOSC_EXTENDED_HEADER_LENGTH || trailer_offset + trailer_len > frame->len) { |
1662 | 0 | BLOSC_TRACE_ERROR("Cannot access the trailer out of the frame."); |
1663 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1664 | 0 | } |
1665 | | |
1666 | | // Get the trailer |
1667 | 0 | uint8_t* trailer = NULL; |
1668 | 0 | bool needs_free = false; |
1669 | 0 | if (frame->cframe != NULL) { |
1670 | 0 | trailer = frame->cframe + trailer_offset; |
1671 | 0 | } else { |
1672 | 0 | int64_t rbytes = 0; |
1673 | |
|
1674 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
1675 | 0 | if (io_cb == NULL) { |
1676 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
1677 | 0 | return BLOSC2_ERROR_PLUGIN_IO; |
1678 | 0 | } |
1679 | | |
1680 | 0 | if (io_cb->is_allocation_necessary) { |
1681 | 0 | trailer = malloc(trailer_len); |
1682 | 0 | needs_free = true; |
1683 | 0 | } |
1684 | 0 | else { |
1685 | 0 | needs_free = false; |
1686 | 0 | } |
1687 | |
|
1688 | 0 | void* fp = NULL; |
1689 | 0 | int64_t io_pos = 0; |
1690 | 0 | if (frame->sframe) { |
1691 | 0 | char* eframe_name = malloc(strlen(frame->urlpath) + strlen("/chunks.b2frame") + 1); |
1692 | 0 | sprintf(eframe_name, "%s/chunks.b2frame", frame->urlpath); |
1693 | 0 | fp = io_cb->open(eframe_name, "rb", frame->schunk->storage->io->params); |
1694 | 0 | if (fp == NULL) { |
1695 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", eframe_name); |
1696 | 0 | free(eframe_name); |
1697 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
1698 | 0 | } |
1699 | 0 | free(eframe_name); |
1700 | 0 | io_pos = trailer_offset; |
1701 | 0 | } |
1702 | 0 | else { |
1703 | 0 | fp = io_cb->open(frame->urlpath, "rb", frame->schunk->storage->io->params); |
1704 | 0 | if (fp == NULL) { |
1705 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
1706 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
1707 | 0 | } |
1708 | 0 | io_pos = frame->file_offset + trailer_offset; |
1709 | 0 | } |
1710 | 0 | if (fp != NULL) { |
1711 | 0 | rbytes = io_cb->read((void**)&trailer, 1, trailer_len, io_pos, fp); |
1712 | 0 | io_cb->close(fp); |
1713 | 0 | } |
1714 | 0 | if (rbytes != trailer_len) { |
1715 | 0 | BLOSC_TRACE_ERROR("Cannot access the trailer out of the fileframe."); |
1716 | 0 | if (needs_free) |
1717 | 0 | free(trailer); |
1718 | 0 | return BLOSC2_ERROR_FILE_READ; |
1719 | 0 | } |
1720 | 0 | } |
1721 | | |
1722 | 0 | ret = get_vlmeta_from_trailer(frame, schunk, trailer, trailer_len); |
1723 | |
|
1724 | 0 | if (frame->cframe == NULL && needs_free) { |
1725 | 0 | free(trailer); |
1726 | 0 | } |
1727 | |
|
1728 | 0 | return ret; |
1729 | 0 | } |
1730 | | |
1731 | | |
1732 | | blosc2_storage* get_new_storage(const blosc2_storage* storage, |
1733 | | const blosc2_cparams* cdefaults, |
1734 | | const blosc2_dparams* ddefaults, |
1735 | 0 | const blosc2_io* iodefaults) { |
1736 | |
|
1737 | 0 | blosc2_storage* new_storage = (blosc2_storage*)calloc(1, sizeof(blosc2_storage)); |
1738 | 0 | memcpy(new_storage, storage, sizeof(blosc2_storage)); |
1739 | 0 | if (storage->urlpath != NULL) { |
1740 | 0 | char* urlpath = normalize_urlpath(storage->urlpath); |
1741 | 0 | new_storage->urlpath = malloc(strlen(urlpath) + 1); |
1742 | 0 | strcpy(new_storage->urlpath, urlpath); |
1743 | 0 | } |
1744 | | |
1745 | | // cparams |
1746 | 0 | blosc2_cparams* cparams = malloc(sizeof(blosc2_cparams)); |
1747 | 0 | if (storage->cparams != NULL) { |
1748 | 0 | memcpy(cparams, storage->cparams, sizeof(blosc2_cparams)); |
1749 | 0 | } else { |
1750 | 0 | memcpy(cparams, cdefaults, sizeof(blosc2_cparams)); |
1751 | 0 | } |
1752 | 0 | new_storage->cparams = cparams; |
1753 | | |
1754 | | // dparams |
1755 | 0 | blosc2_dparams* dparams = malloc(sizeof(blosc2_dparams)); |
1756 | 0 | if (storage->dparams != NULL) { |
1757 | 0 | memcpy(dparams, storage->dparams, sizeof(blosc2_dparams)); |
1758 | 0 | } |
1759 | 0 | else { |
1760 | 0 | memcpy(dparams, ddefaults, sizeof(blosc2_dparams)); |
1761 | 0 | } |
1762 | 0 | new_storage->dparams = dparams; |
1763 | | |
1764 | | // iodefaults |
1765 | 0 | blosc2_io* udio = malloc(sizeof(blosc2_io)); |
1766 | 0 | if (storage->io != NULL) { |
1767 | 0 | memcpy(udio, storage->io, sizeof(blosc2_io)); |
1768 | 0 | } |
1769 | 0 | else { |
1770 | 0 | memcpy(udio, iodefaults, sizeof(blosc2_io)); |
1771 | 0 | } |
1772 | 0 | new_storage->io = udio; |
1773 | |
|
1774 | 0 | return new_storage; |
1775 | 0 | } |
1776 | | |
1777 | | |
1778 | | /* Get a super-chunk out of a frame */ |
1779 | 0 | blosc2_schunk* frame_to_schunk(blosc2_frame_s* frame, bool copy, const blosc2_io *udio) { |
1780 | 0 | int32_t header_len; |
1781 | 0 | int64_t frame_len; |
1782 | 0 | int rc; |
1783 | 0 | blosc2_schunk* schunk = calloc(1, sizeof(blosc2_schunk)); |
1784 | 0 | schunk->frame = (blosc2_frame*)frame; |
1785 | 0 | frame->schunk = schunk; |
1786 | |
|
1787 | 0 | rc = get_header_info(frame, &header_len, &frame_len, &schunk->nbytes, |
1788 | 0 | &schunk->cbytes, &schunk->blocksize, |
1789 | 0 | &schunk->chunksize, &schunk->nchunks, &schunk->typesize, |
1790 | 0 | &schunk->compcode, &schunk->compcode_meta, &schunk->clevel, schunk->filters, |
1791 | 0 | schunk->filters_meta, &schunk->splitmode, udio); |
1792 | 0 | if (rc < 0) { |
1793 | 0 | BLOSC_TRACE_ERROR("Unable to get meta info from frame."); |
1794 | 0 | blosc2_schunk_free(schunk); |
1795 | 0 | return NULL; |
1796 | 0 | } |
1797 | 0 | int64_t nchunks = schunk->nchunks; |
1798 | 0 | int64_t nbytes = schunk->nbytes; |
1799 | 0 | (void) nbytes; |
1800 | 0 | int64_t cbytes = schunk->cbytes; |
1801 | | |
1802 | | // Compression and decompression contexts |
1803 | 0 | blosc2_cparams *cparams; |
1804 | 0 | blosc2_schunk_get_cparams(schunk, &cparams); |
1805 | 0 | schunk->cctx = blosc2_create_cctx(*cparams); |
1806 | 0 | if (schunk->cctx == NULL) { |
1807 | 0 | BLOSC_TRACE_ERROR("Error while creating the compression context"); |
1808 | 0 | return NULL; |
1809 | 0 | } |
1810 | 0 | blosc2_dparams *dparams; |
1811 | 0 | blosc2_schunk_get_dparams(schunk, &dparams); |
1812 | 0 | schunk->dctx = blosc2_create_dctx(*dparams); |
1813 | 0 | if (schunk->dctx == NULL) { |
1814 | 0 | BLOSC_TRACE_ERROR("Error while creating the decompression context"); |
1815 | 0 | return NULL; |
1816 | 0 | } |
1817 | 0 | blosc2_storage storage = {.contiguous = copy ? false : true}; |
1818 | 0 | schunk->storage = get_new_storage(&storage, cparams, dparams, udio); |
1819 | 0 | free(cparams); |
1820 | 0 | free(dparams); |
1821 | 0 | if (!copy) { |
1822 | 0 | goto out; |
1823 | 0 | } |
1824 | | |
1825 | | // We are not attached to a frame anymore |
1826 | 0 | schunk->frame = NULL; |
1827 | |
|
1828 | 0 | if (nchunks == 0) { |
1829 | 0 | frame->schunk = NULL; |
1830 | 0 | goto out; |
1831 | 0 | } |
1832 | | |
1833 | | // Get the compressed offsets |
1834 | 0 | int32_t coffsets_cbytes = 0; |
1835 | 0 | uint8_t* coffsets = get_coffsets(frame, header_len, cbytes, nchunks, &coffsets_cbytes); |
1836 | 0 | if (coffsets == NULL) { |
1837 | 0 | blosc2_schunk_free(schunk); |
1838 | 0 | BLOSC_TRACE_ERROR("Cannot get the offsets for the frame."); |
1839 | 0 | return NULL; |
1840 | 0 | } |
1841 | | |
1842 | | // Decompress offsets |
1843 | 0 | blosc2_dparams off_dparams = BLOSC2_DPARAMS_DEFAULTS; |
1844 | 0 | blosc2_context *dctx = blosc2_create_dctx(off_dparams); |
1845 | 0 | if (dctx == NULL) { |
1846 | 0 | BLOSC_TRACE_ERROR("Error while creating the decompression context"); |
1847 | 0 | return NULL; |
1848 | 0 | } |
1849 | 0 | int64_t* offsets = (int64_t *) malloc((size_t)nchunks * sizeof(int64_t)); |
1850 | 0 | int32_t off_nbytes = blosc2_decompress_ctx(dctx, coffsets, coffsets_cbytes, |
1851 | 0 | offsets, (int32_t)(nchunks * sizeof(int64_t))); |
1852 | 0 | blosc2_free_ctx(dctx); |
1853 | 0 | if (off_nbytes < 0) { |
1854 | 0 | free(offsets); |
1855 | 0 | blosc2_schunk_free(schunk); |
1856 | 0 | BLOSC_TRACE_ERROR("Cannot decompress the offsets chunk."); |
1857 | 0 | return NULL; |
1858 | 0 | } |
1859 | | |
1860 | | // We want the contiguous schunk, so create the actual data chunks (and, while doing this, |
1861 | | // get a guess at the blocksize used in this frame) |
1862 | 0 | int64_t acc_nbytes = 0; |
1863 | 0 | int64_t acc_cbytes = 0; |
1864 | 0 | int32_t blocksize = 0; |
1865 | 0 | int32_t chunk_nbytes; |
1866 | 0 | int32_t chunk_cbytes; |
1867 | 0 | int32_t chunk_blocksize; |
1868 | 0 | size_t prev_alloc = BLOSC_EXTENDED_HEADER_LENGTH; |
1869 | 0 | uint8_t* data_chunk = NULL; |
1870 | 0 | bool needs_free = false; |
1871 | 0 | const blosc2_io_cb *io_cb = blosc2_get_io_cb(udio->id); |
1872 | 0 | if (io_cb == NULL) { |
1873 | 0 | blosc2_schunk_free(schunk); |
1874 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
1875 | 0 | return NULL; |
1876 | 0 | } |
1877 | | |
1878 | 0 | void* fp = NULL; |
1879 | 0 | if (frame->cframe == NULL) { |
1880 | 0 | if (io_cb->is_allocation_necessary) { |
1881 | 0 | data_chunk = malloc((size_t)prev_alloc); |
1882 | 0 | needs_free = true; |
1883 | 0 | } |
1884 | 0 | else { |
1885 | 0 | needs_free = false; |
1886 | 0 | } |
1887 | | |
1888 | 0 | if (!frame->sframe) { |
1889 | | // If not the chunks won't be in the frame |
1890 | 0 | fp = io_cb->open(frame->urlpath, "rb", udio->params); |
1891 | 0 | if (fp == NULL) { |
1892 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
1893 | 0 | rc = BLOSC2_ERROR_FILE_OPEN; |
1894 | 0 | goto end; |
1895 | 0 | } |
1896 | 0 | } |
1897 | 0 | } |
1898 | 0 | schunk->data = malloc(nchunks * sizeof(void*)); |
1899 | 0 | for (int i = 0; i < nchunks; i++) { |
1900 | 0 | if (frame->cframe != NULL) { |
1901 | 0 | if (needs_free) { |
1902 | 0 | free(data_chunk); |
1903 | 0 | } |
1904 | 0 | if (offsets[i] < 0) { |
1905 | 0 | int64_t rbytes = frame_get_chunk(frame, i, &data_chunk, &needs_free); |
1906 | 0 | if (rbytes < 0) { |
1907 | 0 | break; |
1908 | 0 | } |
1909 | 0 | } |
1910 | 0 | else { |
1911 | 0 | data_chunk = frame->cframe + header_len + offsets[i]; |
1912 | 0 | } |
1913 | 0 | rc = blosc2_cbuffer_sizes(data_chunk, NULL, &chunk_cbytes, NULL); |
1914 | 0 | if (rc < 0) { |
1915 | 0 | break; |
1916 | 0 | } |
1917 | 0 | } |
1918 | 0 | else { |
1919 | 0 | int64_t rbytes; |
1920 | 0 | if (frame->sframe) { |
1921 | 0 | if (needs_free) { |
1922 | 0 | free(data_chunk); |
1923 | 0 | } |
1924 | 0 | rbytes = frame_get_chunk(frame, i, &data_chunk, &needs_free); |
1925 | 0 | if (rbytes < 0) { |
1926 | 0 | break; |
1927 | 0 | } |
1928 | 0 | } |
1929 | 0 | else { |
1930 | 0 | int64_t io_pos = frame->file_offset + header_len + offsets[i]; |
1931 | 0 | rbytes = io_cb->read((void**)&data_chunk, 1, BLOSC_EXTENDED_HEADER_LENGTH, io_pos, fp); |
1932 | 0 | } |
1933 | 0 | if (rbytes != BLOSC_EXTENDED_HEADER_LENGTH) { |
1934 | 0 | rc = BLOSC2_ERROR_READ_BUFFER; |
1935 | 0 | break; |
1936 | 0 | } |
1937 | 0 | rc = blosc2_cbuffer_sizes(data_chunk, NULL, &chunk_cbytes, NULL); |
1938 | 0 | if (rc < 0) { |
1939 | 0 | break; |
1940 | 0 | } |
1941 | 0 | if (chunk_cbytes > (int32_t)prev_alloc) { |
1942 | 0 | if (io_cb->is_allocation_necessary) |
1943 | 0 | data_chunk = realloc(data_chunk, chunk_cbytes); |
1944 | 0 | if (data_chunk == NULL) { |
1945 | 0 | BLOSC_TRACE_ERROR("Cannot realloc space for the data_chunk."); |
1946 | 0 | rc = BLOSC2_ERROR_MEMORY_ALLOC; |
1947 | 0 | break; |
1948 | 0 | } |
1949 | 0 | prev_alloc = chunk_cbytes; |
1950 | 0 | } |
1951 | 0 | if (!frame->sframe) { |
1952 | 0 | int64_t io_pos = frame->file_offset + header_len + offsets[i]; |
1953 | 0 | rbytes = io_cb->read((void**)&data_chunk, 1, chunk_cbytes, io_pos, fp); |
1954 | 0 | if (rbytes != chunk_cbytes) { |
1955 | 0 | rc = BLOSC2_ERROR_READ_BUFFER; |
1956 | 0 | break; |
1957 | 0 | } |
1958 | 0 | } |
1959 | 0 | } |
1960 | 0 | uint8_t* new_chunk = malloc(chunk_cbytes); |
1961 | 0 | memcpy(new_chunk, data_chunk, chunk_cbytes); |
1962 | 0 | schunk->data[i] = new_chunk; |
1963 | 0 | rc = blosc2_cbuffer_sizes(data_chunk, &chunk_nbytes, NULL, &chunk_blocksize); |
1964 | 0 | if (rc < 0) { |
1965 | 0 | break; |
1966 | 0 | } |
1967 | 0 | acc_nbytes += chunk_nbytes; |
1968 | 0 | acc_cbytes += chunk_cbytes; |
1969 | 0 | if (i == 0) { |
1970 | 0 | blocksize = chunk_blocksize; |
1971 | 0 | } |
1972 | 0 | else if (blocksize != chunk_blocksize) { |
1973 | | // Blocksize varies |
1974 | 0 | blocksize = 0; |
1975 | 0 | } |
1976 | 0 | } |
1977 | | |
1978 | | // We are not attached to a schunk anymore |
1979 | 0 | frame->schunk = NULL; |
1980 | |
|
1981 | 0 | end: |
1982 | 0 | if (needs_free) { |
1983 | 0 | free(data_chunk); |
1984 | 0 | } |
1985 | 0 | if (frame->cframe == NULL) { |
1986 | 0 | if (!frame->sframe) { |
1987 | 0 | io_cb->close(fp); |
1988 | 0 | } |
1989 | 0 | } |
1990 | 0 | free(offsets); |
1991 | | |
1992 | | // cframes and sframes have different ways to store chunks with special values: |
1993 | | // 1) cframes represent special chunks as negative offsets |
1994 | | // 2) sframes does not have the concept of offsets, but rather of data pointers (.data) |
1995 | | // so they always have a pointer to a special chunk |
1996 | | // This is why cframes and sframes have different cbytes and hence, we cannot enforce acc_bytes == schunk->cbytes |
1997 | | // In the future, maybe we could provide special meanings for .data[i] > 0x7FFFFFFF, but not there yet |
1998 | | // if (rc < 0 || acc_nbytes != nbytes || acc_cbytes != cbytes) { |
1999 | 0 | if (rc < 0 || acc_nbytes != nbytes) { |
2000 | 0 | blosc2_schunk_free(schunk); |
2001 | 0 | return NULL; |
2002 | 0 | } |
2003 | | // Update counters |
2004 | 0 | schunk->cbytes = acc_cbytes; |
2005 | 0 | schunk->blocksize = blocksize; |
2006 | |
|
2007 | 0 | out: |
2008 | 0 | rc = frame_get_metalayers(frame, schunk); |
2009 | 0 | if (rc < 0) { |
2010 | 0 | blosc2_schunk_free(schunk); |
2011 | 0 | BLOSC_TRACE_ERROR("Cannot access the metalayers."); |
2012 | 0 | return NULL; |
2013 | 0 | } |
2014 | | |
2015 | 0 | rc = frame_get_vlmetalayers(frame, schunk); |
2016 | 0 | if (rc < 0) { |
2017 | 0 | blosc2_schunk_free(schunk); |
2018 | 0 | BLOSC_TRACE_ERROR("Cannot access the vlmetalayers."); |
2019 | 0 | return NULL; |
2020 | 0 | } |
2021 | | |
2022 | 0 | return schunk; |
2023 | 0 | } |
2024 | | |
2025 | | |
2026 | 0 | void frame_avoid_cframe_free(blosc2_frame_s* frame, bool avoid_cframe_free) { |
2027 | 0 | frame->avoid_cframe_free = avoid_cframe_free; |
2028 | 0 | } |
2029 | | |
2030 | | |
2031 | | struct csize_idx { |
2032 | | int32_t val; |
2033 | | int32_t idx; |
2034 | | }; |
2035 | | |
2036 | | // Helper function for qsorting block offsets |
2037 | 0 | int sort_offset(const void* a, const void* b) { |
2038 | 0 | int32_t a_ = ((struct csize_idx*)a)->val; |
2039 | 0 | int32_t b_ = ((struct csize_idx*)b)->val; |
2040 | 0 | return a_ - b_; |
2041 | 0 | } |
2042 | | |
2043 | | |
2044 | | int get_coffset(blosc2_frame_s* frame, int32_t header_len, int64_t cbytes, |
2045 | 0 | int64_t nchunk, int64_t nchunks, int64_t *offset) { |
2046 | 0 | int32_t off_cbytes; |
2047 | | // Get the offset to nchunk |
2048 | 0 | uint8_t *coffsets = get_coffsets(frame, header_len, cbytes, nchunks, &off_cbytes); |
2049 | 0 | if (coffsets == NULL) { |
2050 | 0 | BLOSC_TRACE_ERROR("Cannot get the offset for chunk %" PRId64 " for the frame.", nchunk); |
2051 | 0 | return BLOSC2_ERROR_DATA; |
2052 | 0 | } |
2053 | | |
2054 | | // Get the 64-bit offset |
2055 | 0 | int rc = blosc2_getitem(coffsets, off_cbytes, (int32_t)nchunk, 1, offset, (int32_t)sizeof(int64_t)); |
2056 | 0 | if (rc < 0) { |
2057 | 0 | BLOSC_TRACE_ERROR("Problems retrieving a chunk offset."); |
2058 | 0 | } else if (!frame->sframe && *offset > frame->len) { |
2059 | 0 | BLOSC_TRACE_ERROR("Cannot read chunk %" PRId64 " outside of frame boundary.", nchunk); |
2060 | 0 | rc = BLOSC2_ERROR_READ_BUFFER; |
2061 | 0 | } |
2062 | | |
2063 | 0 | return rc; |
2064 | 0 | } |
2065 | | |
2066 | | |
2067 | | // Detect and return a chunk with special values in offsets (only zeros, NaNs and non initialized) |
2068 | | int frame_special_chunk(int64_t special_value, int32_t nbytes, int32_t typesize, int32_t blocksize, |
2069 | 0 | uint8_t** chunk, int32_t cbytes, bool *needs_free) { |
2070 | 0 | int rc = 0; |
2071 | 0 | *chunk = malloc(cbytes); |
2072 | 0 | *needs_free = true; |
2073 | | |
2074 | | // Detect the kind of special value |
2075 | 0 | uint64_t zeros_mask = (uint64_t) BLOSC2_SPECIAL_ZERO << (8 * 7); // chunk of zeros |
2076 | 0 | uint64_t nans_mask = (uint64_t) BLOSC2_SPECIAL_NAN << (8 * 7); // chunk of NaNs |
2077 | 0 | uint64_t uninit_mask = (uint64_t) BLOSC2_SPECIAL_UNINIT << (8 * 7); // chunk of uninit values |
2078 | |
|
2079 | 0 | blosc2_cparams cparams = BLOSC2_CPARAMS_DEFAULTS; |
2080 | 0 | cparams.typesize = typesize; |
2081 | 0 | cparams.blocksize = blocksize; |
2082 | 0 | if (special_value & zeros_mask) { |
2083 | 0 | rc = blosc2_chunk_zeros(cparams, nbytes, *chunk, cbytes); |
2084 | 0 | if (rc < 0) { |
2085 | 0 | BLOSC_TRACE_ERROR("Error creating a zero chunk"); |
2086 | 0 | } |
2087 | 0 | } |
2088 | 0 | else if (special_value & uninit_mask) { |
2089 | 0 | rc = blosc2_chunk_uninit(cparams, nbytes, *chunk, cbytes); |
2090 | 0 | if (rc < 0) { |
2091 | 0 | BLOSC_TRACE_ERROR("Error creating a non initialized chunk"); |
2092 | 0 | } |
2093 | 0 | } |
2094 | 0 | else if (special_value & nans_mask) { |
2095 | 0 | rc = blosc2_chunk_nans(cparams, nbytes, *chunk, cbytes); |
2096 | 0 | if (rc < 0) { |
2097 | 0 | BLOSC_TRACE_ERROR("Error creating a nan chunk"); |
2098 | 0 | } |
2099 | 0 | } |
2100 | 0 | else { |
2101 | 0 | BLOSC_TRACE_ERROR("Special value not recognized: %" PRId64 "", special_value); |
2102 | 0 | rc = BLOSC2_ERROR_DATA; |
2103 | 0 | } |
2104 | | |
2105 | 0 | if (rc < 0) { |
2106 | 0 | free(*chunk); |
2107 | 0 | *needs_free = false; |
2108 | 0 | *chunk = NULL; |
2109 | 0 | } |
2110 | |
|
2111 | 0 | return rc; |
2112 | 0 | } |
2113 | | |
2114 | | |
2115 | | /* Return a compressed chunk that is part of a frame in the `chunk` parameter. |
2116 | | * If the frame is disk-based, a buffer is allocated for the (compressed) chunk, |
2117 | | * and hence a free is needed. You can check if the chunk requires a free with the `needs_free` |
2118 | | * parameter. |
2119 | | * If the chunk does not need a free, it means that a pointer to the location in frame is returned |
2120 | | * in the `chunk` parameter. |
2121 | | * |
2122 | | * The size of the (compressed) chunk is returned. If some problem is detected, a negative code |
2123 | | * is returned instead. |
2124 | | */ |
2125 | 0 | int frame_get_chunk(blosc2_frame_s *frame, int64_t nchunk, uint8_t **chunk, bool *needs_free) { |
2126 | 0 | int32_t header_len; |
2127 | 0 | int64_t frame_len; |
2128 | 0 | int64_t nbytes; |
2129 | 0 | int64_t cbytes; |
2130 | 0 | int32_t blocksize; |
2131 | 0 | int32_t chunksize; |
2132 | 0 | int64_t nchunks; |
2133 | 0 | int32_t typesize; |
2134 | 0 | int64_t offset; |
2135 | 0 | int32_t chunk_cbytes; |
2136 | 0 | int rc; |
2137 | |
|
2138 | 0 | *chunk = NULL; |
2139 | 0 | *needs_free = false; |
2140 | 0 | rc = get_header_info(frame, &header_len, &frame_len, &nbytes, &cbytes, |
2141 | 0 | &blocksize, &chunksize, &nchunks, |
2142 | 0 | &typesize, NULL, NULL, NULL, NULL, NULL, NULL, |
2143 | 0 | frame->schunk->storage->io); |
2144 | 0 | if (rc < 0) { |
2145 | 0 | BLOSC_TRACE_ERROR("Unable to get meta info from frame."); |
2146 | 0 | return rc; |
2147 | 0 | } |
2148 | | |
2149 | 0 | if ((nchunks > 0) && (nchunk >= nchunks)) { |
2150 | 0 | BLOSC_TRACE_ERROR("nchunk ('%" PRId64 "') exceeds the number of chunks " |
2151 | 0 | "('%" PRId64 "') in frame.", nchunk, nchunks); |
2152 | 0 | return BLOSC2_ERROR_INVALID_PARAM; |
2153 | 0 | } |
2154 | | |
2155 | | // Get the offset to nchunk |
2156 | 0 | rc = get_coffset(frame, header_len, cbytes, nchunk, nchunks, &offset); |
2157 | 0 | if (rc < 0) { |
2158 | 0 | BLOSC_TRACE_ERROR("Unable to get offset to chunk %" PRId64 ".", nchunk); |
2159 | 0 | return rc; |
2160 | 0 | } |
2161 | | |
2162 | 0 | if (offset < 0) { |
2163 | | // Special value |
2164 | 0 | chunk_cbytes = BLOSC_EXTENDED_HEADER_LENGTH; |
2165 | 0 | int32_t chunksize_ = chunksize; |
2166 | 0 | if ((nchunk == nchunks - 1) && (nbytes % chunksize)) { |
2167 | | // Last chunk is incomplete. Compute its actual size. |
2168 | 0 | chunksize_ = (int32_t) (nbytes % chunksize); |
2169 | 0 | } |
2170 | 0 | rc = frame_special_chunk(offset, chunksize_, typesize, blocksize, chunk, chunk_cbytes, needs_free); |
2171 | 0 | if (rc < 0) { |
2172 | 0 | return rc; |
2173 | 0 | } |
2174 | 0 | goto end; |
2175 | 0 | } |
2176 | | |
2177 | 0 | if (frame->sframe) { |
2178 | | // Sparse on-disk |
2179 | 0 | nchunk = offset; |
2180 | 0 | return sframe_get_chunk(frame, nchunk, chunk, needs_free); |
2181 | 0 | } |
2182 | | |
2183 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
2184 | 0 | if (io_cb == NULL) { |
2185 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
2186 | 0 | return BLOSC2_ERROR_PLUGIN_IO; |
2187 | 0 | } |
2188 | | |
2189 | 0 | if (frame->cframe == NULL) { |
2190 | 0 | uint8_t* header_ptr; |
2191 | 0 | uint8_t header[BLOSC_EXTENDED_HEADER_LENGTH]; |
2192 | 0 | void* fp = io_cb->open(frame->urlpath, "rb", frame->schunk->storage->io->params); |
2193 | 0 | if (fp == NULL) { |
2194 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
2195 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
2196 | 0 | } |
2197 | 0 | if (io_cb->is_allocation_necessary) |
2198 | 0 | header_ptr = header; |
2199 | 0 | int64_t io_pos = frame->file_offset + header_len + offset; |
2200 | 0 | int64_t rbytes = io_cb->read((void**)&header_ptr, 1, sizeof(header), io_pos, fp); |
2201 | 0 | if (rbytes != BLOSC_EXTENDED_HEADER_LENGTH) { |
2202 | 0 | BLOSC_TRACE_ERROR("Cannot read the cbytes for chunk in the frame."); |
2203 | 0 | io_cb->close(fp); |
2204 | 0 | return BLOSC2_ERROR_FILE_READ; |
2205 | 0 | } |
2206 | 0 | rc = blosc2_cbuffer_sizes(header_ptr, NULL, &chunk_cbytes, NULL); |
2207 | 0 | if (rc < 0) { |
2208 | 0 | BLOSC_TRACE_ERROR("Cannot read the cbytes for chunk in the frame."); |
2209 | 0 | io_cb->close(fp); |
2210 | 0 | return rc; |
2211 | 0 | } |
2212 | 0 | if (io_cb->is_allocation_necessary) { |
2213 | 0 | *chunk = malloc(chunk_cbytes); |
2214 | 0 | *needs_free = true; |
2215 | 0 | } |
2216 | 0 | else { |
2217 | 0 | *needs_free = false; |
2218 | 0 | } |
2219 | | |
2220 | 0 | io_pos = frame->file_offset + header_len + offset; |
2221 | 0 | rbytes = io_cb->read((void**)chunk, 1, chunk_cbytes, io_pos, fp); |
2222 | 0 | io_cb->close(fp); |
2223 | 0 | if (rbytes != chunk_cbytes) { |
2224 | 0 | BLOSC_TRACE_ERROR("Cannot read the chunk out of the frame."); |
2225 | 0 | return BLOSC2_ERROR_FILE_READ; |
2226 | 0 | } |
2227 | |
|
2228 | 0 | } else { |
2229 | | // The chunk is in memory and just one pointer away |
2230 | 0 | *chunk = frame->cframe + header_len + offset; |
2231 | 0 | rc = blosc2_cbuffer_sizes(*chunk, NULL, &chunk_cbytes, NULL); |
2232 | 0 | if (rc < 0) { |
2233 | 0 | return rc; |
2234 | 0 | } |
2235 | 0 | } |
2236 | | |
2237 | 0 | end: |
2238 | 0 | return (int32_t)chunk_cbytes; |
2239 | 0 | } |
2240 | | |
2241 | | |
2242 | | /* Return a compressed chunk that is part of a frame in the `chunk` parameter. |
2243 | | * If the frame is disk-based, a buffer is allocated for the (lazy) chunk, |
2244 | | * and hence a free is needed. You can check if the chunk requires a free with the `needs_free` |
2245 | | * parameter. |
2246 | | * If the chunk does not need a free, it means that the frame is in memory and that just a |
2247 | | * pointer to the location of the chunk in memory is returned. |
2248 | | * |
2249 | | * The size of the (compressed, potentially lazy) chunk is returned. If some problem is detected, |
2250 | | * a negative code is returned instead. |
2251 | | */ |
2252 | 0 | int frame_get_lazychunk(blosc2_frame_s *frame, int64_t nchunk, uint8_t **chunk, bool *needs_free) { |
2253 | 0 | int32_t header_len; |
2254 | 0 | int64_t frame_len; |
2255 | 0 | int64_t nbytes; |
2256 | 0 | int64_t cbytes; |
2257 | 0 | int32_t blocksize; |
2258 | 0 | int32_t chunksize; |
2259 | 0 | int64_t nchunks; |
2260 | 0 | int32_t typesize; |
2261 | 0 | int32_t lazychunk_cbytes; |
2262 | 0 | int64_t offset; |
2263 | 0 | void* fp = NULL; |
2264 | |
|
2265 | 0 | *chunk = NULL; |
2266 | 0 | *needs_free = false; |
2267 | 0 | int rc = get_header_info(frame, &header_len, &frame_len, &nbytes, &cbytes, |
2268 | 0 | &blocksize, &chunksize, &nchunks, |
2269 | 0 | &typesize, NULL, NULL, NULL, NULL, NULL, NULL, |
2270 | 0 | frame->schunk->storage->io); |
2271 | 0 | if (rc < 0) { |
2272 | 0 | BLOSC_TRACE_ERROR("Unable to get meta info from frame."); |
2273 | 0 | return rc; |
2274 | 0 | } |
2275 | | |
2276 | 0 | if (nchunk >= nchunks) { |
2277 | 0 | BLOSC_TRACE_ERROR("nchunk ('%" PRId64 "') exceeds the number of chunks " |
2278 | 0 | "('%" PRId64 "') in frame.", nchunk, nchunks); |
2279 | 0 | return BLOSC2_ERROR_INVALID_PARAM; |
2280 | 0 | } |
2281 | | |
2282 | | // Get the offset to nchunk |
2283 | 0 | rc = get_coffset(frame, header_len, cbytes, nchunk, nchunks, &offset); |
2284 | 0 | if (rc < 0) { |
2285 | 0 | BLOSC_TRACE_ERROR("Unable to get offset to chunk %" PRId64 ".", nchunk); |
2286 | 0 | return rc; |
2287 | 0 | } |
2288 | | |
2289 | 0 | if (offset < 0) { |
2290 | | // Special value |
2291 | 0 | lazychunk_cbytes = BLOSC_EXTENDED_HEADER_LENGTH; |
2292 | 0 | int32_t chunksize_ = chunksize; |
2293 | 0 | if ((nchunk == nchunks - 1) && (nbytes % chunksize)) { |
2294 | | // Last chunk is incomplete. Compute its actual size. |
2295 | 0 | chunksize_ = (int32_t) (nbytes % chunksize); |
2296 | 0 | } |
2297 | 0 | rc = frame_special_chunk(offset, chunksize_, typesize, blocksize, chunk, |
2298 | 0 | (int32_t)lazychunk_cbytes, needs_free); |
2299 | 0 | goto end; |
2300 | 0 | } |
2301 | | |
2302 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
2303 | 0 | if (io_cb == NULL) { |
2304 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
2305 | 0 | rc = BLOSC2_ERROR_PLUGIN_IO; |
2306 | 0 | goto end; |
2307 | 0 | } |
2308 | | |
2309 | 0 | if (frame->cframe == NULL) { |
2310 | | // TODO: make this portable across different endianness |
2311 | | // Get info for building a lazy chunk |
2312 | 0 | int32_t chunk_nbytes; |
2313 | 0 | int32_t chunk_cbytes; |
2314 | 0 | int32_t chunk_blocksize; |
2315 | 0 | uint8_t* header_ptr; |
2316 | 0 | uint8_t header[BLOSC_EXTENDED_HEADER_LENGTH]; |
2317 | 0 | int64_t io_pos = 0; |
2318 | 0 | if (frame->sframe) { |
2319 | | // The chunk is not in the frame |
2320 | 0 | fp = sframe_open_chunk(frame->urlpath, offset, "rb", |
2321 | 0 | frame->schunk->storage->io); |
2322 | 0 | if (fp == NULL) { |
2323 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
2324 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
2325 | 0 | } |
2326 | 0 | } |
2327 | 0 | else { |
2328 | 0 | fp = io_cb->open(frame->urlpath, "rb", frame->schunk->storage->io->params); |
2329 | 0 | if (fp == NULL) { |
2330 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
2331 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
2332 | 0 | } |
2333 | 0 | io_pos = frame->file_offset + header_len + offset; |
2334 | 0 | } |
2335 | 0 | if (io_cb->is_allocation_necessary) |
2336 | 0 | header_ptr = header; |
2337 | 0 | int64_t rbytes = io_cb->read((void**)&header_ptr, 1, BLOSC_EXTENDED_HEADER_LENGTH, io_pos, fp); |
2338 | 0 | if (rbytes != BLOSC_EXTENDED_HEADER_LENGTH) { |
2339 | 0 | BLOSC_TRACE_ERROR("Cannot read the header for chunk in the frame."); |
2340 | 0 | rc = BLOSC2_ERROR_FILE_READ; |
2341 | 0 | goto end; |
2342 | 0 | } |
2343 | 0 | rc = blosc2_cbuffer_sizes(header_ptr, &chunk_nbytes, &chunk_cbytes, &chunk_blocksize); |
2344 | 0 | if (rc < 0) { |
2345 | 0 | goto end; |
2346 | 0 | } |
2347 | 0 | size_t nblocks = chunk_nbytes / chunk_blocksize; |
2348 | 0 | size_t leftover_block = chunk_nbytes % chunk_blocksize; |
2349 | 0 | nblocks = leftover_block ? nblocks + 1 : nblocks; |
2350 | | // Allocate space for the lazy chunk |
2351 | 0 | int32_t trailer_len; |
2352 | 0 | int32_t special_type = (header_ptr[BLOSC2_CHUNK_BLOSC2_FLAGS] >> 4) & BLOSC2_SPECIAL_MASK; |
2353 | 0 | int memcpyed = header_ptr[BLOSC2_CHUNK_FLAGS] & (uint8_t) BLOSC_MEMCPYED; |
2354 | |
|
2355 | 0 | int32_t trailer_offset = BLOSC_EXTENDED_HEADER_LENGTH; |
2356 | 0 | size_t streams_offset = BLOSC_EXTENDED_HEADER_LENGTH; |
2357 | 0 | if (special_type == 0) { |
2358 | | // Regular values have offsets for blocks |
2359 | 0 | trailer_offset += (int32_t) (nblocks * sizeof(int32_t)); |
2360 | 0 | if (memcpyed) { |
2361 | 0 | streams_offset += 0; |
2362 | 0 | } else { |
2363 | 0 | streams_offset += nblocks * sizeof(int32_t); |
2364 | 0 | } |
2365 | 0 | trailer_len = (int32_t) (sizeof(int32_t) + sizeof(int64_t) + nblocks * sizeof(int32_t)); |
2366 | 0 | lazychunk_cbytes = trailer_offset + trailer_len; |
2367 | 0 | } |
2368 | 0 | else if (special_type == BLOSC2_SPECIAL_VALUE) { |
2369 | 0 | trailer_offset += typesize; |
2370 | 0 | streams_offset += typesize; |
2371 | 0 | trailer_len = 0; |
2372 | 0 | lazychunk_cbytes = trailer_offset + trailer_len; |
2373 | 0 | } |
2374 | 0 | else { |
2375 | 0 | rc = BLOSC2_ERROR_INVALID_HEADER; |
2376 | 0 | goto end; |
2377 | 0 | } |
2378 | | |
2379 | | // Read just the full header and bstarts section too (lazy partial length) |
2380 | 0 | if (frame->sframe) { |
2381 | 0 | io_pos = 0; |
2382 | 0 | } |
2383 | 0 | else { |
2384 | 0 | io_pos = frame->file_offset + header_len + offset; |
2385 | 0 | } |
2386 | | |
2387 | | // The case here is a bit special because more memory is allocated than read from the file |
2388 | | // and the chunk is modified after reading. Due to the modification, we cannot directly use |
2389 | | // the memory provided by the io |
2390 | 0 | *chunk = malloc(lazychunk_cbytes); |
2391 | 0 | *needs_free = true; |
2392 | |
|
2393 | 0 | if (io_cb->is_allocation_necessary) { |
2394 | 0 | rbytes = io_cb->read((void**)chunk, 1, (int64_t)streams_offset, io_pos, fp); |
2395 | 0 | } |
2396 | 0 | else { |
2397 | 0 | uint8_t* chunk_ptr; |
2398 | 0 | rbytes = io_cb->read((void**)&chunk_ptr, 1, (int64_t)streams_offset, io_pos, fp); |
2399 | 0 | memcpy(*chunk, chunk_ptr, streams_offset); |
2400 | 0 | } |
2401 | | |
2402 | 0 | if (rbytes != (int64_t)streams_offset) { |
2403 | 0 | BLOSC_TRACE_ERROR("Cannot read the (lazy) chunk out of the frame."); |
2404 | 0 | rc = BLOSC2_ERROR_FILE_READ; |
2405 | 0 | goto end; |
2406 | 0 | } |
2407 | 0 | if (special_type == BLOSC2_SPECIAL_VALUE) { |
2408 | | // Value runlen is not returning a lazy chunk. We are done. |
2409 | 0 | goto end; |
2410 | 0 | } |
2411 | | |
2412 | | // Mark chunk as lazy |
2413 | 0 | uint8_t* blosc2_flags = *chunk + BLOSC2_CHUNK_BLOSC2_FLAGS; |
2414 | 0 | *blosc2_flags |= 0x08U; |
2415 | | |
2416 | | // Add the trailer (currently, nchunk + offset + block_csizes) |
2417 | 0 | if (frame->sframe) { |
2418 | 0 | *(int32_t*)(*chunk + trailer_offset) = (int32_t)offset; // offset is nchunk for sframes |
2419 | 0 | *(int64_t*)(*chunk + trailer_offset + sizeof(int32_t)) = offset; |
2420 | 0 | } |
2421 | 0 | else { |
2422 | 0 | *(int32_t*)(*chunk + trailer_offset) = (int32_t)nchunk; |
2423 | 0 | *(int64_t*)(*chunk + trailer_offset + sizeof(int32_t)) = header_len + offset; |
2424 | 0 | } |
2425 | |
|
2426 | 0 | int32_t* block_csizes = malloc(nblocks * sizeof(int32_t)); |
2427 | |
|
2428 | 0 | if (memcpyed) { |
2429 | | // When memcpyed the blocksizes are trivial to compute |
2430 | 0 | for (int i = 0; i < (int)nblocks - 1; i++) { |
2431 | 0 | block_csizes[i] = (int)chunk_blocksize; |
2432 | 0 | } |
2433 | | // The last block could be incomplete, mainly due to the fact that the block size is not divisible |
2434 | | // by the typesize |
2435 | 0 | block_csizes[nblocks - 1] = (int32_t)leftover_block ? (int32_t)leftover_block : chunk_blocksize; |
2436 | 0 | } |
2437 | 0 | else { |
2438 | | // In regular, compressed chunks, we need to sort the bstarts (they can be out |
2439 | | // of order because of multi-threading), and get a reverse index too. |
2440 | 0 | memcpy(block_csizes, *chunk + BLOSC_EXTENDED_HEADER_LENGTH, nblocks * sizeof(int32_t)); |
2441 | | // Helper structure to keep track of original indexes |
2442 | 0 | struct csize_idx *csize_idx = malloc(nblocks * sizeof(struct csize_idx)); |
2443 | 0 | for (int n = 0; n < (int)nblocks; n++) { |
2444 | 0 | csize_idx[n].val = block_csizes[n]; |
2445 | 0 | csize_idx[n].idx = n; |
2446 | 0 | } |
2447 | 0 | qsort(csize_idx, nblocks, sizeof(struct csize_idx), &sort_offset); |
2448 | | // Compute the actual csizes |
2449 | 0 | int idx; |
2450 | 0 | for (int n = 0; n < (int)nblocks - 1; n++) { |
2451 | 0 | idx = csize_idx[n].idx; |
2452 | 0 | block_csizes[idx] = csize_idx[n + 1].val - csize_idx[n].val; |
2453 | 0 | } |
2454 | 0 | idx = csize_idx[nblocks - 1].idx; |
2455 | 0 | block_csizes[idx] = (int)chunk_cbytes - csize_idx[nblocks - 1].val; |
2456 | 0 | free(csize_idx); |
2457 | 0 | } |
2458 | | // Copy the csizes at the end of the trailer |
2459 | 0 | void *trailer_csizes = *chunk + lazychunk_cbytes - nblocks * sizeof(int32_t); |
2460 | 0 | memcpy(trailer_csizes, block_csizes, nblocks * sizeof(int32_t)); |
2461 | 0 | free(block_csizes); |
2462 | 0 | } else { |
2463 | | // The chunk is in memory and just one pointer away |
2464 | 0 | int64_t chunk_header_offset = header_len + offset; |
2465 | 0 | int64_t chunk_cbytes_offset = chunk_header_offset + BLOSC_MIN_HEADER_LENGTH; |
2466 | |
|
2467 | 0 | *chunk = frame->cframe + chunk_header_offset; |
2468 | |
|
2469 | 0 | if (chunk_cbytes_offset > frame->len) { |
2470 | 0 | BLOSC_TRACE_ERROR("Cannot read the header for chunk in the (contiguous) frame."); |
2471 | 0 | rc = BLOSC2_ERROR_READ_BUFFER; |
2472 | 0 | } else { |
2473 | 0 | rc = blosc2_cbuffer_sizes(*chunk, NULL, &lazychunk_cbytes, NULL); |
2474 | 0 | if (rc && chunk_cbytes_offset + lazychunk_cbytes > frame_len) { |
2475 | 0 | BLOSC_TRACE_ERROR("Compressed bytes exceed beyond frame length."); |
2476 | 0 | rc = BLOSC2_ERROR_READ_BUFFER; |
2477 | 0 | } |
2478 | 0 | } |
2479 | 0 | } |
2480 | | |
2481 | 0 | end: |
2482 | 0 | if (fp != NULL) { |
2483 | 0 | io_cb->close(fp); |
2484 | 0 | } |
2485 | 0 | if (rc < 0) { |
2486 | 0 | if (*needs_free) { |
2487 | 0 | free(*chunk); |
2488 | 0 | *chunk = NULL; |
2489 | 0 | *needs_free = false; |
2490 | 0 | } |
2491 | 0 | return rc; |
2492 | 0 | } |
2493 | | |
2494 | 0 | return (int)lazychunk_cbytes; |
2495 | 0 | } |
2496 | | |
2497 | | |
2498 | | /* Fill an empty frame with special values (fast path). */ |
2499 | | int64_t frame_fill_special(blosc2_frame_s* frame, int64_t nitems, int special_value, |
2500 | 0 | int32_t chunksize, blosc2_schunk* schunk) { |
2501 | 0 | int32_t header_len; |
2502 | 0 | int64_t frame_len; |
2503 | 0 | int64_t nbytes; |
2504 | 0 | int64_t cbytes; |
2505 | 0 | int32_t blocksize; |
2506 | 0 | int32_t typesize; |
2507 | 0 | int64_t nchunks; |
2508 | |
|
2509 | 0 | int rc = get_header_info(frame, &header_len, &frame_len, &nbytes, &cbytes, &blocksize, NULL, |
2510 | 0 | &nchunks, &typesize, NULL, NULL, NULL, NULL, NULL, NULL, |
2511 | 0 | schunk->storage->io); |
2512 | 0 | if (rc < 0) { |
2513 | 0 | BLOSC_TRACE_ERROR("Unable to get meta info from frame."); |
2514 | 0 | return BLOSC2_ERROR_DATA; |
2515 | 0 | } |
2516 | | |
2517 | 0 | if (nitems == 0) { |
2518 | 0 | return frame_len; |
2519 | 0 | } |
2520 | | |
2521 | 0 | if ((nitems / chunksize) > INT_MAX) { |
2522 | 0 | BLOSC_TRACE_ERROR("nitems is too large. Try increasing the chunksize."); |
2523 | 0 | return BLOSC2_ERROR_FRAME_SPECIAL; |
2524 | 0 | } |
2525 | | |
2526 | 0 | if ((nbytes > 0) || (cbytes > 0)) { |
2527 | 0 | BLOSC_TRACE_ERROR("Filling with special values only works on empty frames"); |
2528 | 0 | return BLOSC2_ERROR_FRAME_SPECIAL; |
2529 | 0 | } |
2530 | | |
2531 | | // Compute the number of chunks and the length of the offsets chunk |
2532 | 0 | int32_t chunkitems = chunksize / typesize; |
2533 | 0 | nchunks = nitems / chunkitems; |
2534 | 0 | int32_t leftover_items = (int32_t)(nitems % chunkitems); |
2535 | 0 | if (leftover_items) { |
2536 | 0 | nchunks += 1; |
2537 | 0 | } |
2538 | |
|
2539 | 0 | blosc2_cparams* cparams; |
2540 | 0 | blosc2_schunk_get_cparams(schunk, &cparams); |
2541 | | |
2542 | | // Build the offsets with a special chunk |
2543 | 0 | int new_off_cbytes = BLOSC_EXTENDED_HEADER_LENGTH + sizeof(int64_t); |
2544 | 0 | uint8_t* off_chunk = malloc(new_off_cbytes); |
2545 | 0 | uint64_t offset_value = ((uint64_t)1 << 63); |
2546 | 0 | uint8_t* sample_chunk = malloc(BLOSC_EXTENDED_HEADER_LENGTH); |
2547 | 0 | int csize; |
2548 | 0 | switch (special_value) { |
2549 | 0 | case BLOSC2_SPECIAL_ZERO: |
2550 | 0 | offset_value += (uint64_t) BLOSC2_SPECIAL_ZERO << (8 * 7); |
2551 | 0 | csize = blosc2_chunk_zeros(*cparams, chunksize, sample_chunk, BLOSC_EXTENDED_HEADER_LENGTH); |
2552 | 0 | break; |
2553 | 0 | case BLOSC2_SPECIAL_UNINIT: |
2554 | 0 | offset_value += (uint64_t) BLOSC2_SPECIAL_UNINIT << (8 * 7); |
2555 | 0 | csize = blosc2_chunk_uninit(*cparams, chunksize, sample_chunk, BLOSC_EXTENDED_HEADER_LENGTH); |
2556 | 0 | break; |
2557 | 0 | case BLOSC2_SPECIAL_NAN: |
2558 | 0 | offset_value += (uint64_t)BLOSC2_SPECIAL_NAN << (8 * 7); |
2559 | 0 | csize = blosc2_chunk_nans(*cparams, chunksize, sample_chunk, BLOSC_EXTENDED_HEADER_LENGTH); |
2560 | 0 | break; |
2561 | 0 | default: |
2562 | 0 | BLOSC_TRACE_ERROR("Only zeros, NaNs or non-initialized values are supported."); |
2563 | 0 | return BLOSC2_ERROR_FRAME_SPECIAL; |
2564 | 0 | } |
2565 | 0 | if (csize < 0) { |
2566 | 0 | BLOSC_TRACE_ERROR("Error creating sample chunk"); |
2567 | 0 | return BLOSC2_ERROR_FRAME_SPECIAL; |
2568 | 0 | } |
2569 | 0 | cparams->typesize = sizeof(int64_t); // change it to offsets typesize |
2570 | | // cparams->blocksize = 0; // automatic blocksize |
2571 | 0 | cparams->blocksize = 8 * 2 * 1024; // based on experiments with create_frame.c bench |
2572 | 0 | cparams->clevel = 5; |
2573 | 0 | cparams->compcode = BLOSC_BLOSCLZ; |
2574 | 0 | int32_t special_nbytes = (int32_t) (nchunks * sizeof(int64_t)); |
2575 | 0 | rc = blosc2_chunk_repeatval(*cparams, special_nbytes, off_chunk, new_off_cbytes, &offset_value); |
2576 | 0 | free(cparams); |
2577 | 0 | if (rc < 0) { |
2578 | 0 | BLOSC_TRACE_ERROR("Error creating a special offsets chunk"); |
2579 | 0 | return BLOSC2_ERROR_DATA; |
2580 | 0 | } |
2581 | | |
2582 | | // Get the blocksize associated to the sample chunk |
2583 | 0 | blosc2_cbuffer_sizes(sample_chunk, NULL, NULL, &blocksize); |
2584 | 0 | free(sample_chunk); |
2585 | | // and use it for the super-chunk |
2586 | 0 | schunk->blocksize = blocksize; |
2587 | | // schunk->blocksize = 0; // for experimenting with automatic blocksize |
2588 | | |
2589 | | // We have the new offsets; update the frame. |
2590 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
2591 | 0 | if (io_cb == NULL) { |
2592 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
2593 | 0 | return BLOSC2_ERROR_PLUGIN_IO; |
2594 | 0 | } |
2595 | | |
2596 | 0 | int64_t new_frame_len = header_len + new_off_cbytes + frame->trailer_len; |
2597 | 0 | void* fp = NULL; |
2598 | 0 | if (frame->cframe != NULL) { |
2599 | 0 | uint8_t* framep = frame->cframe; |
2600 | | /* Make space for the new chunk and copy it */ |
2601 | 0 | frame->cframe = framep = realloc(framep, (size_t)new_frame_len); |
2602 | 0 | if (framep == NULL) { |
2603 | 0 | BLOSC_TRACE_ERROR("Cannot realloc space for the frame."); |
2604 | 0 | return BLOSC2_ERROR_FRAME_SPECIAL; |
2605 | 0 | } |
2606 | | /* Copy the offsets */ |
2607 | 0 | memcpy(framep + header_len, off_chunk, (size_t)new_off_cbytes); |
2608 | 0 | } |
2609 | 0 | else { |
2610 | 0 | size_t wbytes; |
2611 | 0 | int64_t io_pos = 0; |
2612 | 0 | if (frame->sframe) { |
2613 | | // Update the offsets chunk in the chunks frame |
2614 | 0 | fp = sframe_open_index(frame->urlpath, "rb+", frame->schunk->storage->io); |
2615 | 0 | if (fp == NULL) { |
2616 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
2617 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
2618 | 0 | } |
2619 | 0 | io_pos = frame->file_offset + header_len; |
2620 | 0 | } |
2621 | 0 | else { |
2622 | | // Regular frame |
2623 | 0 | fp = io_cb->open(frame->urlpath, "rb+", schunk->storage->io->params); |
2624 | 0 | if (fp == NULL) { |
2625 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
2626 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
2627 | 0 | } |
2628 | 0 | io_pos = frame->file_offset + header_len + cbytes; |
2629 | 0 | } |
2630 | 0 | wbytes = io_cb->write(off_chunk, 1, new_off_cbytes, io_pos, fp); // the new offsets |
2631 | 0 | io_cb->close(fp); |
2632 | 0 | if (wbytes != (size_t)new_off_cbytes) { |
2633 | 0 | BLOSC_TRACE_ERROR("Cannot write the offsets to frame."); |
2634 | 0 | return BLOSC2_ERROR_FRAME_SPECIAL; |
2635 | 0 | } |
2636 | 0 | } |
2637 | | |
2638 | | // Invalidate the cache for chunk offsets |
2639 | 0 | if (frame->coffsets != NULL) { |
2640 | 0 | if (frame->coffsets_needs_free) |
2641 | 0 | free(frame->coffsets); |
2642 | 0 | frame->coffsets = NULL; |
2643 | 0 | } |
2644 | 0 | free(off_chunk); |
2645 | |
|
2646 | 0 | frame->len = new_frame_len; |
2647 | 0 | rc = frame_update_header(frame, schunk, false); |
2648 | 0 | if (rc < 0) { |
2649 | 0 | return BLOSC2_ERROR_FRAME_SPECIAL; |
2650 | 0 | } |
2651 | | |
2652 | 0 | rc = frame_update_trailer(frame, schunk); |
2653 | 0 | if (rc < 0) { |
2654 | 0 | return BLOSC2_ERROR_FRAME_SPECIAL; |
2655 | 0 | } |
2656 | | |
2657 | 0 | return frame->len; |
2658 | 0 | } |
2659 | | |
2660 | | |
2661 | | /* Append an existing chunk into a frame. */ |
2662 | 0 | void* frame_append_chunk(blosc2_frame_s* frame, void* chunk, blosc2_schunk* schunk) { |
2663 | 0 | int8_t* chunk_ = chunk; |
2664 | 0 | int32_t header_len; |
2665 | 0 | int64_t frame_len; |
2666 | 0 | int64_t nbytes; |
2667 | 0 | int64_t cbytes; |
2668 | 0 | int32_t blocksize; |
2669 | 0 | int32_t chunksize; |
2670 | 0 | int64_t nchunks; |
2671 | 0 | int rc = get_header_info(frame, &header_len, &frame_len, &nbytes, &cbytes, &blocksize, &chunksize, |
2672 | 0 | &nchunks, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
2673 | 0 | frame->schunk->storage->io); |
2674 | 0 | if (rc < 0) { |
2675 | 0 | BLOSC_TRACE_ERROR("Unable to get meta info from frame."); |
2676 | 0 | return NULL; |
2677 | 0 | } |
2678 | | |
2679 | | /* The uncompressed and compressed sizes start at byte 4 and 12 */ |
2680 | 0 | int32_t chunk_nbytes; |
2681 | 0 | int32_t chunk_cbytes; |
2682 | 0 | rc = blosc2_cbuffer_sizes(chunk, &chunk_nbytes, &chunk_cbytes, NULL); |
2683 | 0 | if (rc < 0) { |
2684 | 0 | return NULL; |
2685 | 0 | } |
2686 | | |
2687 | 0 | if ((nchunks > 0) && (chunk_nbytes > chunksize)) { |
2688 | 0 | BLOSC_TRACE_ERROR("Appending chunks with a larger chunksize than frame is " |
2689 | 0 | "not allowed yet %d != %d.", chunk_nbytes, chunksize); |
2690 | 0 | return NULL; |
2691 | 0 | } |
2692 | | |
2693 | | // Check that we are not appending a small chunk after another small chunk |
2694 | 0 | int32_t chunk_nbytes_last; |
2695 | 0 | if (chunksize == 0 && (nchunks > 0) && (chunk_nbytes < chunksize)) { |
2696 | 0 | uint8_t* last_chunk; |
2697 | 0 | bool needs_free; |
2698 | 0 | rc = frame_get_lazychunk(frame, nchunks - 1, &last_chunk, &needs_free); |
2699 | 0 | if (rc < 0) { |
2700 | 0 | BLOSC_TRACE_ERROR("Cannot get the last chunk (in position %" PRId64 ").", nchunks - 1); |
2701 | 0 | } else { |
2702 | 0 | rc = blosc2_cbuffer_sizes(last_chunk, &chunk_nbytes_last, NULL, NULL); |
2703 | 0 | } |
2704 | 0 | if (needs_free) { |
2705 | 0 | free(last_chunk); |
2706 | 0 | } |
2707 | 0 | if (rc < 0) { |
2708 | 0 | return NULL; |
2709 | 0 | } |
2710 | 0 | if ((chunk_nbytes_last < chunksize) && (nbytes < chunksize)) { |
2711 | 0 | BLOSC_TRACE_ERROR("Appending two consecutive chunks with a chunksize smaller " |
2712 | 0 | "than the frame chunksize is not allowed yet: %d != %d.", |
2713 | 0 | chunk_nbytes, chunksize); |
2714 | 0 | return NULL; |
2715 | 0 | } |
2716 | 0 | } |
2717 | | |
2718 | | // Get the current offsets and add one more |
2719 | 0 | int32_t off_nbytes = (int32_t) ((nchunks + 1) * sizeof(int64_t)); |
2720 | 0 | int64_t* offsets = (int64_t *) malloc((size_t)off_nbytes); |
2721 | 0 | if (nchunks > 0) { |
2722 | 0 | int32_t coffsets_cbytes; |
2723 | 0 | uint8_t *coffsets = get_coffsets(frame, header_len, cbytes, nchunks, &coffsets_cbytes); |
2724 | 0 | if (coffsets == NULL) { |
2725 | 0 | BLOSC_TRACE_ERROR("Cannot get the offsets for the frame."); |
2726 | 0 | free(offsets); |
2727 | 0 | return NULL; |
2728 | 0 | } |
2729 | 0 | if (coffsets_cbytes == 0) { |
2730 | 0 | coffsets_cbytes = (int32_t)cbytes; |
2731 | 0 | } |
2732 | | |
2733 | | // Decompress offsets |
2734 | 0 | blosc2_dparams off_dparams = BLOSC2_DPARAMS_DEFAULTS; |
2735 | 0 | blosc2_context *dctx = blosc2_create_dctx(off_dparams); |
2736 | 0 | if (dctx == NULL) { |
2737 | 0 | BLOSC_TRACE_ERROR("Error while creating the decompression context"); |
2738 | 0 | return NULL; |
2739 | 0 | } |
2740 | 0 | int32_t prev_nbytes = blosc2_decompress_ctx(dctx, coffsets, coffsets_cbytes, offsets, |
2741 | 0 | off_nbytes); |
2742 | 0 | blosc2_free_ctx(dctx); |
2743 | 0 | if (prev_nbytes < 0) { |
2744 | 0 | free(offsets); |
2745 | 0 | BLOSC_TRACE_ERROR("Cannot decompress the offsets chunk."); |
2746 | 0 | return NULL; |
2747 | 0 | } |
2748 | 0 | } |
2749 | | |
2750 | | // Add the new offset |
2751 | 0 | int64_t sframe_chunk_id = -1; |
2752 | 0 | int special_value = (chunk_[BLOSC2_CHUNK_BLOSC2_FLAGS] >> 4) & BLOSC2_SPECIAL_MASK; |
2753 | 0 | uint64_t offset_value = ((uint64_t)1 << 63); |
2754 | 0 | switch (special_value) { |
2755 | 0 | case BLOSC2_SPECIAL_ZERO: |
2756 | | // Zero chunk. Code it in a special way. |
2757 | 0 | offset_value += (uint64_t) BLOSC2_SPECIAL_ZERO << (8 * 7); // chunk of zeros |
2758 | 0 | to_little(offsets + nchunks, &offset_value, sizeof(uint64_t)); |
2759 | 0 | chunk_cbytes = 0; // we don't need to store the chunk |
2760 | 0 | break; |
2761 | 0 | case BLOSC2_SPECIAL_UNINIT: |
2762 | | // Non initizalized values chunk. Code it in a special way. |
2763 | 0 | offset_value += (uint64_t) BLOSC2_SPECIAL_UNINIT << (8 * 7); // chunk of uninit values |
2764 | 0 | to_little(offsets + nchunks, &offset_value, sizeof(uint64_t)); |
2765 | 0 | chunk_cbytes = 0; // we don't need to store the chunk |
2766 | 0 | break; |
2767 | 0 | case BLOSC2_SPECIAL_NAN: |
2768 | | // NaN chunk. Code it in a special way. |
2769 | 0 | offset_value += (uint64_t)BLOSC2_SPECIAL_NAN << (8 * 7); // chunk of NANs |
2770 | 0 | to_little(offsets + nchunks, &offset_value, sizeof(uint64_t)); |
2771 | 0 | chunk_cbytes = 0; // we don't need to store the chunk |
2772 | 0 | break; |
2773 | 0 | default: |
2774 | 0 | if (frame->sframe) { |
2775 | | // Compute the sframe_chunk_id value |
2776 | 0 | for (int i = 0; i < nchunks; ++i) { |
2777 | 0 | if (offsets[i] > sframe_chunk_id) { |
2778 | 0 | sframe_chunk_id = offsets[i]; |
2779 | 0 | } |
2780 | 0 | } |
2781 | 0 | offsets[nchunks] = ++sframe_chunk_id; |
2782 | 0 | } |
2783 | 0 | else { |
2784 | 0 | offsets[nchunks] = cbytes; |
2785 | 0 | } |
2786 | 0 | } |
2787 | | |
2788 | | // Re-compress the offsets again |
2789 | 0 | blosc2_cparams cparams = BLOSC2_CPARAMS_DEFAULTS; |
2790 | 0 | cparams.splitmode = BLOSC_NEVER_SPLIT; |
2791 | 0 | cparams.typesize = sizeof(int64_t); |
2792 | 0 | cparams.blocksize = 16 * 1024; // based on experiments with create_frame.c bench |
2793 | 0 | cparams.nthreads = 4; // 4 threads seems a decent default for nowadays CPUs |
2794 | 0 | cparams.compcode = BLOSC_BLOSCLZ; |
2795 | 0 | blosc2_context* cctx = blosc2_create_cctx(cparams); |
2796 | 0 | if (cctx == NULL) { |
2797 | 0 | BLOSC_TRACE_ERROR("Error while creating the compression context"); |
2798 | 0 | return NULL; |
2799 | 0 | } |
2800 | 0 | cctx->typesize = sizeof(int64_t); // override a possible BLOSC_TYPESIZE env variable (or chaos may appear) |
2801 | 0 | void* off_chunk = malloc((size_t)off_nbytes + BLOSC2_MAX_OVERHEAD); |
2802 | 0 | int32_t new_off_cbytes = blosc2_compress_ctx(cctx, offsets, off_nbytes, |
2803 | 0 | off_chunk, off_nbytes + BLOSC2_MAX_OVERHEAD); |
2804 | 0 | blosc2_free_ctx(cctx); |
2805 | 0 | free(offsets); |
2806 | 0 | if (new_off_cbytes < 0) { |
2807 | 0 | free(off_chunk); |
2808 | 0 | return NULL; |
2809 | 0 | } |
2810 | | // printf("%f\n", (double) off_nbytes / new_off_cbytes); |
2811 | | |
2812 | 0 | int64_t new_cbytes = cbytes + chunk_cbytes; |
2813 | 0 | int64_t new_frame_len; |
2814 | 0 | if (frame->sframe) { |
2815 | 0 | new_frame_len = header_len + 0 + new_off_cbytes + frame->trailer_len; |
2816 | 0 | } |
2817 | 0 | else { |
2818 | 0 | new_frame_len = header_len + new_cbytes + new_off_cbytes + frame->trailer_len; |
2819 | 0 | } |
2820 | |
|
2821 | 0 | void* fp = NULL; |
2822 | 0 | if (frame->cframe != NULL) { |
2823 | 0 | uint8_t* framep = frame->cframe; |
2824 | | /* Make space for the new chunk and copy it */ |
2825 | 0 | frame->cframe = framep = realloc(framep, (size_t)new_frame_len); |
2826 | 0 | if (framep == NULL) { |
2827 | 0 | BLOSC_TRACE_ERROR("Cannot realloc space for the frame."); |
2828 | 0 | return NULL; |
2829 | 0 | } |
2830 | | /* Copy the chunk */ |
2831 | 0 | memcpy(framep + header_len + cbytes, chunk, (size_t)chunk_cbytes); |
2832 | | /* Copy the offsets */ |
2833 | 0 | memcpy(framep + header_len + new_cbytes, off_chunk, (size_t)new_off_cbytes); |
2834 | 0 | } |
2835 | 0 | else { |
2836 | 0 | int64_t wbytes; |
2837 | 0 | int64_t io_pos = 0; |
2838 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
2839 | 0 | if (io_cb == NULL) { |
2840 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
2841 | 0 | return NULL; |
2842 | 0 | } |
2843 | | |
2844 | 0 | if (frame->sframe) { |
2845 | | // Update the offsets chunk in the chunks frame |
2846 | 0 | if (chunk_cbytes != 0) { |
2847 | 0 | if (sframe_chunk_id < 0) { |
2848 | 0 | BLOSC_TRACE_ERROR("The chunk id (%" PRId64 ") is not correct", sframe_chunk_id); |
2849 | 0 | return NULL; |
2850 | 0 | } |
2851 | 0 | if (sframe_create_chunk(frame, chunk, sframe_chunk_id, chunk_cbytes) == NULL) { |
2852 | 0 | BLOSC_TRACE_ERROR("Cannot write the full chunk."); |
2853 | 0 | return NULL; |
2854 | 0 | } |
2855 | 0 | } |
2856 | 0 | fp = sframe_open_index(frame->urlpath, "rb+", |
2857 | 0 | frame->schunk->storage->io); |
2858 | 0 | if (fp == NULL) { |
2859 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
2860 | 0 | return NULL; |
2861 | 0 | } |
2862 | 0 | io_pos = frame->file_offset + header_len; |
2863 | 0 | } |
2864 | 0 | else { |
2865 | | // Regular frame |
2866 | 0 | fp = io_cb->open(frame->urlpath, "rb+", frame->schunk->storage->io->params); |
2867 | 0 | if (fp == NULL) { |
2868 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
2869 | 0 | return NULL; |
2870 | 0 | } |
2871 | 0 | io_pos = frame->file_offset + header_len + cbytes; |
2872 | 0 | wbytes = io_cb->write(chunk, 1, chunk_cbytes, io_pos, fp); // the new chunk |
2873 | 0 | io_pos += chunk_cbytes; |
2874 | 0 | if (wbytes != chunk_cbytes) { |
2875 | 0 | BLOSC_TRACE_ERROR("Cannot write the full chunk to frame."); |
2876 | 0 | io_cb->close(fp); |
2877 | 0 | return NULL; |
2878 | 0 | } |
2879 | 0 | } |
2880 | 0 | wbytes = io_cb->write(off_chunk, 1, new_off_cbytes, io_pos, fp); // the new offsets |
2881 | 0 | io_cb->close(fp); |
2882 | 0 | if (wbytes != new_off_cbytes) { |
2883 | 0 | BLOSC_TRACE_ERROR("Cannot write the offsets to frame."); |
2884 | 0 | return NULL; |
2885 | 0 | } |
2886 | 0 | } |
2887 | | // Invalidate the cache for chunk offsets |
2888 | 0 | if (frame->coffsets != NULL) { |
2889 | 0 | if (frame->coffsets_needs_free) |
2890 | 0 | free(frame->coffsets); |
2891 | 0 | frame->coffsets = NULL; |
2892 | 0 | } |
2893 | 0 | free(chunk); // chunk has always to be a copy when reaching here... |
2894 | 0 | free(off_chunk); |
2895 | |
|
2896 | 0 | frame->len = new_frame_len; |
2897 | 0 | rc = frame_update_header(frame, schunk, false); |
2898 | 0 | if (rc < 0) { |
2899 | 0 | return NULL; |
2900 | 0 | } |
2901 | | |
2902 | 0 | rc = frame_update_trailer(frame, schunk); |
2903 | 0 | if (rc < 0) { |
2904 | 0 | return NULL; |
2905 | 0 | } |
2906 | | |
2907 | 0 | return frame; |
2908 | 0 | } |
2909 | | |
2910 | | |
2911 | 0 | void* frame_insert_chunk(blosc2_frame_s* frame, int64_t nchunk, void* chunk, blosc2_schunk* schunk) { |
2912 | 0 | uint8_t* chunk_ = chunk; |
2913 | 0 | int32_t header_len; |
2914 | 0 | int64_t frame_len; |
2915 | 0 | int64_t nbytes; |
2916 | 0 | int64_t cbytes; |
2917 | 0 | int32_t blocksize; |
2918 | 0 | int32_t chunksize; |
2919 | 0 | int64_t nchunks; |
2920 | 0 | int rc = get_header_info(frame, &header_len, &frame_len, &nbytes, &cbytes, |
2921 | 0 | &blocksize, &chunksize, &nchunks, |
2922 | 0 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
2923 | 0 | frame->schunk->storage->io); |
2924 | 0 | if (rc < 0) { |
2925 | 0 | BLOSC_TRACE_ERROR("Unable to get meta info from frame."); |
2926 | 0 | return NULL; |
2927 | 0 | } |
2928 | 0 | int32_t chunk_cbytes; |
2929 | 0 | rc = blosc2_cbuffer_sizes(chunk_, NULL, &chunk_cbytes, NULL); |
2930 | 0 | if (rc < 0) { |
2931 | 0 | return NULL; |
2932 | 0 | } |
2933 | | |
2934 | | // Get the current offsets |
2935 | 0 | int32_t off_nbytes = (int32_t) ((nchunks + 1) * sizeof(int64_t)); |
2936 | 0 | int64_t* offsets = (int64_t *) malloc((size_t)off_nbytes); |
2937 | 0 | if (nchunks > 0) { |
2938 | 0 | int32_t coffsets_cbytes = 0; |
2939 | 0 | uint8_t *coffsets = get_coffsets(frame, header_len, cbytes, nchunks, &coffsets_cbytes); |
2940 | 0 | if (coffsets == NULL) { |
2941 | 0 | BLOSC_TRACE_ERROR("Cannot get the offsets for the frame."); |
2942 | 0 | return NULL; |
2943 | 0 | } |
2944 | 0 | if (coffsets_cbytes == 0) { |
2945 | 0 | coffsets_cbytes = (int32_t)cbytes; |
2946 | 0 | } |
2947 | | |
2948 | | // Decompress offsets |
2949 | 0 | blosc2_dparams off_dparams = BLOSC2_DPARAMS_DEFAULTS; |
2950 | 0 | blosc2_context *dctx = blosc2_create_dctx(off_dparams); |
2951 | 0 | if (dctx == NULL) { |
2952 | 0 | BLOSC_TRACE_ERROR("Error while creating the decompression context"); |
2953 | 0 | return NULL; |
2954 | 0 | } |
2955 | 0 | int32_t prev_nbytes = blosc2_decompress_ctx(dctx, coffsets, coffsets_cbytes, offsets, off_nbytes); |
2956 | 0 | blosc2_free_ctx(dctx); |
2957 | 0 | if (prev_nbytes < 0) { |
2958 | 0 | free(offsets); |
2959 | 0 | BLOSC_TRACE_ERROR("Cannot decompress the offsets chunk."); |
2960 | 0 | return NULL; |
2961 | 0 | } |
2962 | 0 | } |
2963 | | |
2964 | | // TODO: Improvement: Check if new chunk is smaller than previous one |
2965 | | |
2966 | | // Move offsets |
2967 | 0 | for (int64_t i = nchunks; i > nchunk; i--) { |
2968 | 0 | offsets[i] = offsets[i - 1]; |
2969 | 0 | } |
2970 | | // Add the new offset |
2971 | 0 | int64_t sframe_chunk_id = -1; |
2972 | 0 | int special_value = (chunk_[BLOSC2_CHUNK_BLOSC2_FLAGS] >> 4) & BLOSC2_SPECIAL_MASK; |
2973 | 0 | uint64_t offset_value = ((uint64_t)1 << 63); |
2974 | 0 | switch (special_value) { |
2975 | 0 | case BLOSC2_SPECIAL_ZERO: |
2976 | | // Zero chunk. Code it in a special way. |
2977 | 0 | offset_value += (uint64_t)BLOSC2_SPECIAL_ZERO << (8 * 7); // indicate a chunk of zeros |
2978 | 0 | to_little(offsets + nchunk, &offset_value, sizeof(uint64_t)); |
2979 | 0 | chunk_cbytes = 0; // we don't need to store the chunk |
2980 | 0 | break; |
2981 | 0 | case BLOSC2_SPECIAL_UNINIT: |
2982 | | // Non initizalized values chunk. Code it in a special way. |
2983 | 0 | offset_value += (uint64_t) BLOSC2_SPECIAL_UNINIT << (8 * 7); // chunk of uninit values |
2984 | 0 | to_little(offsets + nchunk, &offset_value, sizeof(uint64_t)); |
2985 | 0 | chunk_cbytes = 0; // we don't need to store the chunk |
2986 | 0 | break; |
2987 | 0 | case BLOSC2_SPECIAL_NAN: |
2988 | | // NaN chunk. Code it in a special way. |
2989 | 0 | offset_value += (uint64_t)BLOSC2_SPECIAL_NAN << (8 * 7); // indicate a chunk of NANs |
2990 | 0 | to_little(offsets + nchunk, &offset_value, sizeof(uint64_t)); |
2991 | 0 | chunk_cbytes = 0; // we don't need to store the chunk |
2992 | 0 | break; |
2993 | 0 | default: |
2994 | 0 | if (frame->sframe) { |
2995 | 0 | for (int i = 0; i <= nchunks; ++i) { |
2996 | | // offsets[nchunk] is still uninitialized here |
2997 | 0 | if (i != nchunk && offsets[i] > sframe_chunk_id) { |
2998 | 0 | sframe_chunk_id = offsets[i]; |
2999 | 0 | } |
3000 | 0 | } |
3001 | 0 | offsets[nchunk] = ++sframe_chunk_id; |
3002 | 0 | } |
3003 | 0 | else { |
3004 | 0 | offsets[nchunk] = cbytes; |
3005 | 0 | } |
3006 | 0 | } |
3007 | | |
3008 | | // Re-compress the offsets again |
3009 | 0 | blosc2_cparams cparams = BLOSC2_CPARAMS_DEFAULTS; |
3010 | 0 | cparams.splitmode = BLOSC_NEVER_SPLIT; |
3011 | 0 | cparams.typesize = sizeof(int64_t); |
3012 | 0 | cparams.blocksize = 16 * 1024; // based on experiments with create_frame.c bench |
3013 | 0 | cparams.nthreads = 4; // 4 threads seems a decent default for nowadays CPUs |
3014 | 0 | cparams.compcode = BLOSC_BLOSCLZ; |
3015 | 0 | blosc2_context* cctx = blosc2_create_cctx(cparams); |
3016 | 0 | if (cctx == NULL) { |
3017 | 0 | BLOSC_TRACE_ERROR("Error while creating the compression context"); |
3018 | 0 | return NULL; |
3019 | 0 | } |
3020 | 0 | void* off_chunk = malloc((size_t)off_nbytes + BLOSC2_MAX_OVERHEAD); |
3021 | 0 | int32_t new_off_cbytes = blosc2_compress_ctx(cctx, offsets, off_nbytes, |
3022 | 0 | off_chunk, off_nbytes + BLOSC2_MAX_OVERHEAD); |
3023 | 0 | blosc2_free_ctx(cctx); |
3024 | |
|
3025 | 0 | free(offsets); |
3026 | 0 | if (new_off_cbytes < 0) { |
3027 | 0 | free(off_chunk); |
3028 | 0 | return NULL; |
3029 | 0 | } |
3030 | | |
3031 | 0 | int64_t new_cbytes = cbytes + chunk_cbytes; |
3032 | |
|
3033 | 0 | int64_t new_frame_len; |
3034 | 0 | if (frame->sframe) { |
3035 | 0 | new_frame_len = header_len + 0 + new_off_cbytes + frame->trailer_len; |
3036 | 0 | } |
3037 | 0 | else { |
3038 | 0 | new_frame_len = header_len + new_cbytes + new_off_cbytes + frame->trailer_len; |
3039 | 0 | } |
3040 | | |
3041 | | // Add the chunk and update meta |
3042 | 0 | void* fp = NULL; |
3043 | 0 | if (frame->cframe != NULL) { |
3044 | 0 | uint8_t* framep = frame->cframe; |
3045 | | /* Make space for the new chunk and copy it */ |
3046 | 0 | frame->cframe = framep = realloc(framep, (size_t)new_frame_len); |
3047 | 0 | if (framep == NULL) { |
3048 | 0 | BLOSC_TRACE_ERROR("Cannot realloc space for the frame."); |
3049 | 0 | return NULL; |
3050 | 0 | } |
3051 | | /* Copy the chunk */ |
3052 | 0 | memcpy(framep + header_len + cbytes, chunk, (size_t)chunk_cbytes); |
3053 | | /* Copy the offsets */ |
3054 | 0 | memcpy(framep + header_len + new_cbytes, off_chunk, (size_t)new_off_cbytes); |
3055 | 0 | } else { |
3056 | 0 | int64_t wbytes; |
3057 | |
|
3058 | 0 | int64_t io_pos = 0; |
3059 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
3060 | 0 | if (io_cb == NULL) { |
3061 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
3062 | 0 | return NULL; |
3063 | 0 | } |
3064 | | |
3065 | 0 | if (frame->sframe) { |
3066 | 0 | if (chunk_cbytes != 0) { |
3067 | 0 | if (sframe_chunk_id < 0) { |
3068 | 0 | BLOSC_TRACE_ERROR("The chunk id (%" PRId64 ") is not correct", sframe_chunk_id); |
3069 | 0 | return NULL; |
3070 | 0 | } |
3071 | 0 | if (sframe_create_chunk(frame, chunk, sframe_chunk_id, chunk_cbytes) == NULL) { |
3072 | 0 | BLOSC_TRACE_ERROR("Cannot write the full chunk."); |
3073 | 0 | return NULL; |
3074 | 0 | } |
3075 | 0 | } |
3076 | | // Update the offsets chunk in the chunks frame |
3077 | 0 | fp = sframe_open_index(frame->urlpath, "rb+", |
3078 | 0 | frame->schunk->storage->io); |
3079 | 0 | if (fp == NULL) { |
3080 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
3081 | 0 | return NULL; |
3082 | 0 | } |
3083 | 0 | io_pos = frame->file_offset + header_len + 0; |
3084 | 0 | } |
3085 | 0 | else { |
3086 | | // Regular frame |
3087 | 0 | fp = io_cb->open(frame->urlpath, "rb+", frame->schunk->storage->io->params); |
3088 | 0 | if (fp == NULL) { |
3089 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
3090 | 0 | return NULL; |
3091 | 0 | } |
3092 | 0 | io_pos = frame->file_offset + header_len + cbytes; |
3093 | 0 | wbytes = io_cb->write(chunk, 1, chunk_cbytes, io_pos, fp); // the new chunk |
3094 | 0 | io_pos += chunk_cbytes; |
3095 | 0 | if (wbytes != chunk_cbytes) { |
3096 | 0 | BLOSC_TRACE_ERROR("Cannot write the full chunk to frame."); |
3097 | 0 | io_cb->close(fp); |
3098 | 0 | return NULL; |
3099 | 0 | } |
3100 | 0 | } |
3101 | 0 | wbytes = io_cb->write(off_chunk, 1, new_off_cbytes, io_pos, fp); // the new offsets |
3102 | 0 | io_cb->close(fp); |
3103 | 0 | if (wbytes != new_off_cbytes) { |
3104 | 0 | BLOSC_TRACE_ERROR("Cannot write the offsets to frame."); |
3105 | 0 | return NULL; |
3106 | 0 | } |
3107 | | // Invalidate the cache for chunk offsets |
3108 | 0 | if (frame->coffsets != NULL) { |
3109 | 0 | if (frame->coffsets_needs_free) |
3110 | 0 | free(frame->coffsets); |
3111 | 0 | frame->coffsets = NULL; |
3112 | 0 | } |
3113 | 0 | } |
3114 | 0 | free(chunk); // chunk has always to be a copy when reaching here... |
3115 | 0 | free(off_chunk); |
3116 | |
|
3117 | 0 | frame->len = new_frame_len; |
3118 | 0 | rc = frame_update_header(frame, schunk, false); |
3119 | 0 | if (rc < 0) { |
3120 | 0 | return NULL; |
3121 | 0 | } |
3122 | | |
3123 | 0 | rc = frame_update_trailer(frame, schunk); |
3124 | 0 | if (rc < 0) { |
3125 | 0 | return NULL; |
3126 | 0 | } |
3127 | | |
3128 | 0 | return frame; |
3129 | 0 | } |
3130 | | |
3131 | | |
3132 | 0 | void* frame_update_chunk(blosc2_frame_s* frame, int64_t nchunk, void* chunk, blosc2_schunk* schunk) { |
3133 | 0 | uint8_t *chunk_ = (uint8_t *) chunk; |
3134 | 0 | int32_t header_len; |
3135 | 0 | int64_t frame_len; |
3136 | 0 | int64_t nbytes; |
3137 | 0 | int64_t cbytes; |
3138 | 0 | int32_t blocksize; |
3139 | 0 | int32_t chunksize; |
3140 | 0 | int64_t nchunks; |
3141 | 0 | int rc = get_header_info(frame, &header_len, &frame_len, &nbytes, &cbytes, |
3142 | 0 | &blocksize, &chunksize, &nchunks, |
3143 | 0 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
3144 | 0 | frame->schunk->storage->io); |
3145 | 0 | if (rc < 0) { |
3146 | 0 | BLOSC_TRACE_ERROR("Unable to get meta info from frame."); |
3147 | 0 | return NULL; |
3148 | 0 | } |
3149 | 0 | if (nchunk >= nchunks) { |
3150 | 0 | BLOSC_TRACE_ERROR("The chunk must already exist."); |
3151 | 0 | return NULL; |
3152 | 0 | } |
3153 | | |
3154 | 0 | int32_t chunk_cbytes; |
3155 | 0 | rc = blosc2_cbuffer_sizes(chunk, NULL, &chunk_cbytes, NULL); |
3156 | 0 | if (rc < 0) { |
3157 | 0 | return NULL; |
3158 | 0 | } |
3159 | | |
3160 | | // Get the current offsets |
3161 | 0 | int32_t off_nbytes = (int32_t) (nchunks * sizeof(int64_t)); |
3162 | 0 | int64_t* offsets = (int64_t *) malloc((size_t)off_nbytes); |
3163 | 0 | if (nchunks > 0) { |
3164 | 0 | int32_t coffsets_cbytes = 0; |
3165 | 0 | uint8_t *coffsets = get_coffsets(frame, header_len, cbytes, nchunks, &coffsets_cbytes); |
3166 | 0 | if (coffsets == NULL) { |
3167 | 0 | BLOSC_TRACE_ERROR("Cannot get the offsets for the frame."); |
3168 | 0 | return NULL; |
3169 | 0 | } |
3170 | 0 | if (coffsets_cbytes == 0) { |
3171 | 0 | coffsets_cbytes = (int32_t)cbytes; |
3172 | 0 | } |
3173 | | |
3174 | | // Decompress offsets |
3175 | 0 | blosc2_dparams off_dparams = BLOSC2_DPARAMS_DEFAULTS; |
3176 | 0 | blosc2_context *dctx = blosc2_create_dctx(off_dparams); |
3177 | 0 | if (dctx == NULL) { |
3178 | 0 | BLOSC_TRACE_ERROR("Error while creating the decompression context"); |
3179 | 0 | return NULL; |
3180 | 0 | } |
3181 | 0 | int32_t prev_nbytes = blosc2_decompress_ctx(dctx, coffsets, coffsets_cbytes, offsets, off_nbytes); |
3182 | 0 | blosc2_free_ctx(dctx); |
3183 | 0 | if (prev_nbytes < 0) { |
3184 | 0 | free(offsets); |
3185 | 0 | BLOSC_TRACE_ERROR("Cannot decompress the offsets chunk."); |
3186 | 0 | return NULL; |
3187 | 0 | } |
3188 | 0 | } |
3189 | 0 | int32_t cbytes_old = 0; |
3190 | 0 | int64_t old_offset = 0; |
3191 | 0 | if (!frame->sframe) { |
3192 | | // See how big would be the space |
3193 | 0 | old_offset = offsets[nchunk]; |
3194 | 0 | bool needs_free; |
3195 | 0 | uint8_t *chunk_old; |
3196 | 0 | int err = blosc2_schunk_get_chunk(schunk, nchunk, &chunk_old, &needs_free); |
3197 | 0 | if (err < 0) { |
3198 | 0 | BLOSC_TRACE_ERROR("%" PRId64 " chunk can not be obtained from schunk.", nchunk); |
3199 | 0 | return NULL; |
3200 | 0 | } |
3201 | | |
3202 | 0 | if (chunk_old == NULL) { |
3203 | 0 | cbytes_old = 0; |
3204 | 0 | } |
3205 | 0 | else { |
3206 | 0 | cbytes_old = sw32_(chunk_old + BLOSC2_CHUNK_CBYTES); |
3207 | 0 | if (cbytes_old == BLOSC2_MAX_OVERHEAD) { |
3208 | 0 | cbytes_old = 0; |
3209 | 0 | } |
3210 | 0 | } |
3211 | 0 | if (needs_free) { |
3212 | 0 | free(chunk_old); |
3213 | 0 | } |
3214 | 0 | } |
3215 | | |
3216 | | // Add the new offset |
3217 | 0 | int64_t sframe_chunk_id = -1; |
3218 | 0 | if (frame->sframe) { |
3219 | 0 | if (offsets[nchunk] < 0) { |
3220 | 0 | sframe_chunk_id = -1; |
3221 | 0 | } |
3222 | 0 | else { |
3223 | | // In case there was a reorder in a sframe |
3224 | 0 | sframe_chunk_id = offsets[nchunk]; |
3225 | 0 | } |
3226 | 0 | } |
3227 | 0 | int special_value = (chunk_[BLOSC2_CHUNK_BLOSC2_FLAGS] >> 4) & BLOSC2_SPECIAL_MASK; |
3228 | 0 | uint64_t offset_value = ((uint64_t)1 << 63); |
3229 | 0 | switch (special_value) { |
3230 | 0 | case BLOSC2_SPECIAL_ZERO: |
3231 | | // Zero chunk. Code it in a special way. |
3232 | 0 | offset_value += (uint64_t)BLOSC2_SPECIAL_ZERO << (8 * 7); // indicate a chunk of zeros |
3233 | 0 | to_little(offsets + nchunk, &offset_value, sizeof(uint64_t)); |
3234 | 0 | chunk_cbytes = 0; // we don't need to store the chunk |
3235 | 0 | break; |
3236 | 0 | case BLOSC2_SPECIAL_UNINIT: |
3237 | | // Non initizalized values chunk. Code it in a special way. |
3238 | 0 | offset_value += (uint64_t)BLOSC2_SPECIAL_UNINIT << (8 * 7); // indicate a chunk of uninit values |
3239 | 0 | to_little(offsets + nchunk, &offset_value, sizeof(uint64_t)); |
3240 | 0 | chunk_cbytes = 0; // we don't need to store the chunk |
3241 | 0 | break; |
3242 | 0 | case BLOSC2_SPECIAL_NAN: |
3243 | | // NaN chunk. Code it in a special way. |
3244 | 0 | offset_value += (uint64_t)BLOSC2_SPECIAL_NAN << (8 * 7); // indicate a chunk of NANs |
3245 | 0 | to_little(offsets + nchunk, &offset_value, sizeof(uint64_t)); |
3246 | 0 | chunk_cbytes = 0; // we don't need to store the chunk |
3247 | 0 | break; |
3248 | 0 | default: |
3249 | 0 | if (frame->sframe) { |
3250 | 0 | if (sframe_chunk_id < 0) { |
3251 | 0 | for (int i = 0; i < nchunks; ++i) { |
3252 | 0 | if (offsets[i] > sframe_chunk_id) { |
3253 | 0 | sframe_chunk_id = offsets[i]; |
3254 | 0 | } |
3255 | 0 | } |
3256 | 0 | offsets[nchunk] = ++sframe_chunk_id; |
3257 | 0 | } |
3258 | 0 | } |
3259 | 0 | else { |
3260 | | // Add the new offset |
3261 | 0 | offsets[nchunk] = cbytes; |
3262 | 0 | } |
3263 | 0 | } |
3264 | | |
3265 | 0 | if (!frame->sframe && chunk_cbytes != 0 && cbytes_old >= chunk_cbytes) { |
3266 | 0 | offsets[nchunk] = old_offset; |
3267 | 0 | cbytes = old_offset; |
3268 | 0 | } |
3269 | | // Re-compress the offsets again |
3270 | 0 | blosc2_cparams cparams = BLOSC2_CPARAMS_DEFAULTS; |
3271 | 0 | cparams.splitmode = BLOSC_NEVER_SPLIT; |
3272 | 0 | cparams.typesize = sizeof(int64_t); |
3273 | 0 | cparams.blocksize = 16 * 1024; // based on experiments with create_frame.c bench |
3274 | 0 | cparams.nthreads = 4; // 4 threads seems a decent default for nowadays CPUs |
3275 | 0 | cparams.compcode = BLOSC_BLOSCLZ; |
3276 | 0 | blosc2_context* cctx = blosc2_create_cctx(cparams); |
3277 | 0 | if (cctx == NULL) { |
3278 | 0 | BLOSC_TRACE_ERROR("Error while creating the compression context"); |
3279 | 0 | return NULL; |
3280 | 0 | } |
3281 | 0 | void* off_chunk = malloc((size_t)off_nbytes + BLOSC2_MAX_OVERHEAD); |
3282 | 0 | int32_t new_off_cbytes = blosc2_compress_ctx(cctx, offsets, off_nbytes, |
3283 | 0 | off_chunk, off_nbytes + BLOSC2_MAX_OVERHEAD); |
3284 | 0 | blosc2_free_ctx(cctx); |
3285 | |
|
3286 | 0 | free(offsets); |
3287 | 0 | if (new_off_cbytes < 0) { |
3288 | 0 | free(off_chunk); |
3289 | 0 | return NULL; |
3290 | 0 | } |
3291 | | |
3292 | 0 | int64_t new_cbytes = schunk->cbytes; |
3293 | 0 | int64_t new_frame_len; |
3294 | 0 | if (frame->sframe) { |
3295 | | // The chunk is not stored in the frame |
3296 | 0 | new_frame_len = header_len + 0 + new_off_cbytes + frame->trailer_len; |
3297 | 0 | } |
3298 | 0 | else { |
3299 | 0 | new_frame_len = header_len + new_cbytes + new_off_cbytes + frame->trailer_len; |
3300 | 0 | } |
3301 | |
|
3302 | 0 | void* fp = NULL; |
3303 | 0 | if (frame->cframe != NULL) { |
3304 | 0 | uint8_t* framep = frame->cframe; |
3305 | | /* Make space for the new chunk and copy it */ |
3306 | 0 | frame->cframe = framep = realloc(framep, (size_t)new_frame_len); |
3307 | 0 | if (framep == NULL) { |
3308 | 0 | BLOSC_TRACE_ERROR("Cannot realloc space for the frame."); |
3309 | 0 | return NULL; |
3310 | 0 | } |
3311 | | /* Copy the chunk */ |
3312 | 0 | memcpy(framep + header_len + cbytes, chunk, (size_t)chunk_cbytes); |
3313 | | /* Copy the offsets */ |
3314 | 0 | memcpy(framep + header_len + new_cbytes, off_chunk, (size_t)new_off_cbytes); |
3315 | 0 | } else { |
3316 | 0 | int64_t wbytes; |
3317 | |
|
3318 | 0 | int64_t io_pos = 0; |
3319 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
3320 | 0 | if (io_cb == NULL) { |
3321 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
3322 | 0 | return NULL; |
3323 | 0 | } |
3324 | | |
3325 | 0 | if (frame->sframe) { |
3326 | | // Create the chunks file, if it's a special value this will delete its old content |
3327 | 0 | if (sframe_chunk_id >= 0) { |
3328 | 0 | if (sframe_create_chunk(frame, chunk, sframe_chunk_id, chunk_cbytes) == NULL) { |
3329 | 0 | BLOSC_TRACE_ERROR("Cannot write the full chunk."); |
3330 | 0 | return NULL; |
3331 | 0 | } |
3332 | 0 | } |
3333 | | // Update the offsets chunk in the chunks frame |
3334 | 0 | fp = sframe_open_index(frame->urlpath, "rb+", |
3335 | 0 | frame->schunk->storage->io); |
3336 | 0 | if (fp == NULL) { |
3337 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
3338 | 0 | return NULL; |
3339 | 0 | } |
3340 | 0 | io_pos = frame->file_offset + header_len + 0; |
3341 | 0 | } |
3342 | 0 | else { |
3343 | | // Regular frame |
3344 | 0 | fp = io_cb->open(frame->urlpath, "rb+", frame->schunk->storage->io->params); |
3345 | 0 | if (fp == NULL) { |
3346 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
3347 | 0 | return NULL; |
3348 | 0 | } |
3349 | 0 | io_pos = frame->file_offset + header_len + cbytes; |
3350 | 0 | wbytes = io_cb->write(chunk, 1, chunk_cbytes, io_pos, fp); // the new chunk |
3351 | 0 | io_pos += chunk_cbytes; |
3352 | 0 | if (wbytes != chunk_cbytes) { |
3353 | 0 | BLOSC_TRACE_ERROR("Cannot write the full chunk to frame."); |
3354 | 0 | io_cb->close(fp); |
3355 | 0 | return NULL; |
3356 | 0 | } |
3357 | 0 | io_pos = frame->file_offset + header_len + new_cbytes; |
3358 | 0 | } |
3359 | 0 | wbytes = io_cb->write(off_chunk, 1, new_off_cbytes, io_pos, fp); // the new offsets |
3360 | 0 | io_cb->close(fp); |
3361 | 0 | if (wbytes != new_off_cbytes) { |
3362 | 0 | BLOSC_TRACE_ERROR("Cannot write the offsets to frame."); |
3363 | 0 | return NULL; |
3364 | 0 | } |
3365 | | // Invalidate the cache for chunk offsets |
3366 | 0 | if (frame->coffsets != NULL) { |
3367 | 0 | if (frame->coffsets_needs_free) |
3368 | 0 | free(frame->coffsets); |
3369 | 0 | frame->coffsets = NULL; |
3370 | 0 | } |
3371 | 0 | } |
3372 | 0 | free(chunk); // chunk has always to be a copy when reaching here... |
3373 | 0 | free(off_chunk); |
3374 | |
|
3375 | 0 | frame->len = new_frame_len; |
3376 | 0 | rc = frame_update_header(frame, schunk, false); |
3377 | 0 | if (rc < 0) { |
3378 | 0 | return NULL; |
3379 | 0 | } |
3380 | | |
3381 | 0 | rc = frame_update_trailer(frame, schunk); |
3382 | 0 | if (rc < 0) { |
3383 | 0 | return NULL; |
3384 | 0 | } |
3385 | | |
3386 | 0 | return frame; |
3387 | 0 | } |
3388 | | |
3389 | | |
3390 | 0 | void* frame_delete_chunk(blosc2_frame_s* frame, int64_t nchunk, blosc2_schunk* schunk) { |
3391 | 0 | int32_t header_len; |
3392 | 0 | int64_t frame_len; |
3393 | 0 | int64_t nbytes; |
3394 | 0 | int64_t cbytes; |
3395 | 0 | int32_t blocksize; |
3396 | 0 | int32_t chunksize; |
3397 | 0 | int64_t nchunks; |
3398 | 0 | int rc = get_header_info(frame, &header_len, &frame_len, &nbytes, &cbytes, |
3399 | 0 | &blocksize, &chunksize, &nchunks, |
3400 | 0 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, frame->schunk->storage->io); |
3401 | 0 | if (rc < 0) { |
3402 | 0 | BLOSC_TRACE_ERROR("Unable to get meta info from frame."); |
3403 | 0 | return NULL; |
3404 | 0 | } |
3405 | | |
3406 | | // Get the current offsets |
3407 | 0 | int32_t off_nbytes = (int32_t) (nchunks * sizeof(int64_t)); |
3408 | 0 | int64_t* offsets = (int64_t *) malloc((size_t)off_nbytes); |
3409 | 0 | if (nchunks > 0) { |
3410 | 0 | int32_t coffsets_cbytes = 0; |
3411 | 0 | uint8_t *coffsets = get_coffsets(frame, header_len, cbytes, nchunks, &coffsets_cbytes); |
3412 | 0 | if (coffsets == NULL) { |
3413 | 0 | BLOSC_TRACE_ERROR("Cannot get the offsets for the frame."); |
3414 | 0 | return NULL; |
3415 | 0 | } |
3416 | 0 | if (coffsets_cbytes == 0) { |
3417 | 0 | coffsets_cbytes = (int32_t)cbytes; |
3418 | 0 | } |
3419 | | |
3420 | | // Decompress offsets |
3421 | 0 | blosc2_dparams off_dparams = BLOSC2_DPARAMS_DEFAULTS; |
3422 | 0 | blosc2_context *dctx = blosc2_create_dctx(off_dparams); |
3423 | 0 | if (dctx == NULL) { |
3424 | 0 | BLOSC_TRACE_ERROR("Error while creating the decompression context"); |
3425 | 0 | return NULL; |
3426 | 0 | } |
3427 | 0 | int32_t prev_nbytes = blosc2_decompress_ctx(dctx, coffsets, coffsets_cbytes, offsets, off_nbytes); |
3428 | 0 | blosc2_free_ctx(dctx); |
3429 | 0 | if (prev_nbytes < 0) { |
3430 | 0 | free(offsets); |
3431 | 0 | BLOSC_TRACE_ERROR("Cannot decompress the offsets chunk."); |
3432 | 0 | return NULL; |
3433 | 0 | } |
3434 | 0 | } |
3435 | | |
3436 | | // Delete the new offset |
3437 | 0 | for (int64_t i = nchunk; i < nchunks - 1; i++) { |
3438 | 0 | offsets[i] = offsets[i + 1]; |
3439 | 0 | } |
3440 | 0 | offsets[nchunks - 1] = 0; |
3441 | | |
3442 | | // Re-compress the offsets again |
3443 | 0 | blosc2_cparams cparams = BLOSC2_CPARAMS_DEFAULTS; |
3444 | 0 | cparams.splitmode = BLOSC_NEVER_SPLIT; |
3445 | 0 | cparams.typesize = sizeof(int64_t); |
3446 | 0 | cparams.blocksize = 16 * 1024; // based on experiments with create_frame.c bench |
3447 | 0 | cparams.nthreads = 4; // 4 threads seems a decent default for nowadays CPUs |
3448 | 0 | cparams.compcode = BLOSC_BLOSCLZ; |
3449 | 0 | blosc2_context* cctx = blosc2_create_cctx(cparams); |
3450 | 0 | if (cctx == NULL) { |
3451 | 0 | BLOSC_TRACE_ERROR("Error while creating the compression context"); |
3452 | 0 | return NULL; |
3453 | 0 | } |
3454 | 0 | void* off_chunk = malloc((size_t)off_nbytes + BLOSC2_MAX_OVERHEAD); |
3455 | 0 | int32_t new_off_cbytes = blosc2_compress_ctx(cctx, offsets, off_nbytes - (int32_t)sizeof(int64_t), |
3456 | 0 | off_chunk, off_nbytes + BLOSC2_MAX_OVERHEAD); |
3457 | 0 | blosc2_free_ctx(cctx); |
3458 | |
|
3459 | 0 | free(offsets); |
3460 | 0 | if (new_off_cbytes < 0) { |
3461 | 0 | free(off_chunk); |
3462 | 0 | return NULL; |
3463 | 0 | } |
3464 | | |
3465 | 0 | int64_t new_cbytes = cbytes; |
3466 | |
|
3467 | 0 | int64_t new_frame_len; |
3468 | 0 | if (frame->sframe) { |
3469 | 0 | new_frame_len = header_len + 0 + new_off_cbytes + frame->trailer_len; |
3470 | 0 | } |
3471 | 0 | else { |
3472 | 0 | new_frame_len = header_len + new_cbytes + new_off_cbytes + frame->trailer_len; |
3473 | 0 | } |
3474 | | |
3475 | | // Add the chunk and update meta |
3476 | 0 | FILE* fp = NULL; |
3477 | 0 | if (frame->cframe != NULL) { |
3478 | 0 | uint8_t* framep = frame->cframe; |
3479 | | /* Make space for the new chunk and copy it */ |
3480 | 0 | frame->cframe = framep = realloc(framep, (size_t)new_frame_len); |
3481 | 0 | if (framep == NULL) { |
3482 | 0 | BLOSC_TRACE_ERROR("Cannot realloc space for the frame."); |
3483 | 0 | return NULL; |
3484 | 0 | } |
3485 | | /* Copy the offsets */ |
3486 | 0 | memcpy(framep + header_len + new_cbytes, off_chunk, (size_t)new_off_cbytes); |
3487 | 0 | } else { |
3488 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
3489 | 0 | if (io_cb == NULL) { |
3490 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
3491 | 0 | return NULL; |
3492 | 0 | } |
3493 | | |
3494 | 0 | size_t wbytes; |
3495 | 0 | int64_t io_pos = 0; |
3496 | 0 | if (frame->sframe) { |
3497 | 0 | int64_t offset; |
3498 | 0 | rc = get_coffset(frame, header_len, cbytes, nchunk, nchunks, &offset); |
3499 | 0 | if (rc < 0) { |
3500 | 0 | BLOSC_TRACE_ERROR("Unable to get offset to chunk %" PRId64 ".", nchunk); |
3501 | 0 | return NULL; |
3502 | 0 | } |
3503 | 0 | if (offset >= 0){ |
3504 | | // Remove the chunk file only if it is not a special value chunk |
3505 | 0 | int err = sframe_delete_chunk(frame->urlpath, offset); |
3506 | 0 | if (err != 0) { |
3507 | 0 | BLOSC_TRACE_ERROR("Unable to delete chunk!"); |
3508 | 0 | return NULL; |
3509 | 0 | } |
3510 | 0 | } |
3511 | | // Update the offsets chunk in the chunks frame |
3512 | 0 | fp = sframe_open_index(frame->urlpath, "rb+", frame->schunk->storage->io); |
3513 | 0 | if (fp == NULL) { |
3514 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
3515 | 0 | return NULL; |
3516 | 0 | } |
3517 | 0 | io_pos = frame->file_offset + header_len + 0; |
3518 | 0 | } |
3519 | 0 | else { |
3520 | | // Regular frame |
3521 | 0 | fp = io_cb->open(frame->urlpath, "rb+", frame->schunk->storage->io); |
3522 | 0 | if (fp == NULL) { |
3523 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
3524 | 0 | return NULL; |
3525 | 0 | } |
3526 | 0 | io_pos = frame->file_offset + header_len + cbytes; |
3527 | 0 | } |
3528 | 0 | wbytes = io_cb->write(off_chunk, 1, new_off_cbytes, io_pos, fp); // the new offsets |
3529 | 0 | io_cb->close(fp); |
3530 | 0 | if (wbytes != (size_t)new_off_cbytes) { |
3531 | 0 | BLOSC_TRACE_ERROR("Cannot write the offsets to frame."); |
3532 | 0 | return NULL; |
3533 | 0 | } |
3534 | | // Invalidate the cache for chunk offsets |
3535 | 0 | if (frame->coffsets != NULL) { |
3536 | 0 | if (frame->coffsets_needs_free) |
3537 | 0 | free(frame->coffsets); |
3538 | 0 | frame->coffsets = NULL; |
3539 | 0 | } |
3540 | 0 | } |
3541 | 0 | free(off_chunk); |
3542 | |
|
3543 | 0 | frame->len = new_frame_len; |
3544 | 0 | rc = frame_update_header(frame, schunk, false); |
3545 | 0 | if (rc < 0) { |
3546 | 0 | return NULL; |
3547 | 0 | } |
3548 | | |
3549 | 0 | rc = frame_update_trailer(frame, schunk); |
3550 | 0 | if (rc < 0) { |
3551 | 0 | return NULL; |
3552 | 0 | } |
3553 | | |
3554 | 0 | return frame; |
3555 | 0 | } |
3556 | | |
3557 | | |
3558 | 0 | int frame_reorder_offsets(blosc2_frame_s* frame, const int64_t* offsets_order, blosc2_schunk* schunk) { |
3559 | | // Get header info |
3560 | 0 | int32_t header_len; |
3561 | 0 | int64_t frame_len; |
3562 | 0 | int64_t nbytes; |
3563 | 0 | int64_t cbytes; |
3564 | 0 | int32_t blocksize; |
3565 | 0 | int32_t chunksize; |
3566 | 0 | int64_t nchunks; |
3567 | 0 | int ret = get_header_info(frame, &header_len, &frame_len, &nbytes, &cbytes, |
3568 | 0 | &blocksize, &chunksize, &nchunks, |
3569 | 0 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
3570 | 0 | frame->schunk->storage->io); |
3571 | 0 | if (ret < 0) { |
3572 | 0 | BLOSC_TRACE_ERROR("Cannot get the header info for the frame."); |
3573 | 0 | return ret; |
3574 | 0 | } |
3575 | | |
3576 | | // Get the current offsets and add one more |
3577 | 0 | int32_t off_nbytes = (int32_t) (nchunks * sizeof(int64_t)); |
3578 | 0 | int64_t* offsets = (int64_t *) malloc((size_t)off_nbytes); |
3579 | |
|
3580 | 0 | int32_t coffsets_cbytes = 0; |
3581 | 0 | uint8_t *coffsets = get_coffsets(frame, header_len, cbytes, nchunks, &coffsets_cbytes); |
3582 | 0 | if (coffsets == NULL) { |
3583 | 0 | BLOSC_TRACE_ERROR("Cannot get the offsets for the frame."); |
3584 | 0 | free(offsets); |
3585 | 0 | return BLOSC2_ERROR_DATA; |
3586 | 0 | } |
3587 | | |
3588 | | // Decompress offsets |
3589 | 0 | blosc2_dparams off_dparams = BLOSC2_DPARAMS_DEFAULTS; |
3590 | 0 | blosc2_context *dctx = blosc2_create_dctx(off_dparams); |
3591 | 0 | if (dctx == NULL) { |
3592 | 0 | BLOSC_TRACE_ERROR("Error while creating the decompression context"); |
3593 | 0 | return BLOSC2_ERROR_NULL_POINTER; |
3594 | 0 | } |
3595 | 0 | int32_t prev_nbytes = blosc2_decompress_ctx(dctx, coffsets, coffsets_cbytes, |
3596 | 0 | offsets, off_nbytes); |
3597 | 0 | blosc2_free_ctx(dctx); |
3598 | 0 | if (prev_nbytes < 0) { |
3599 | 0 | free(offsets); |
3600 | 0 | BLOSC_TRACE_ERROR("Cannot decompress the offsets chunk."); |
3601 | 0 | return prev_nbytes; |
3602 | 0 | } |
3603 | | |
3604 | | // Make a copy of the chunk offsets and reorder it |
3605 | 0 | int64_t *offsets_copy = malloc(prev_nbytes); |
3606 | 0 | memcpy(offsets_copy, offsets, prev_nbytes); |
3607 | |
|
3608 | 0 | for (int i = 0; i < nchunks; ++i) { |
3609 | 0 | offsets[i] = offsets_copy[offsets_order[i]]; |
3610 | 0 | } |
3611 | 0 | free(offsets_copy); |
3612 | | |
3613 | | // Re-compress the offsets again |
3614 | 0 | blosc2_cparams cparams = BLOSC2_CPARAMS_DEFAULTS; |
3615 | 0 | cparams.splitmode = BLOSC_NEVER_SPLIT; |
3616 | 0 | cparams.typesize = sizeof(int64_t); |
3617 | 0 | cparams.blocksize = 16 * 1024; // based on experiments with create_frame.c bench |
3618 | 0 | cparams.nthreads = 4; // 4 threads seems a decent default for nowadays CPUs |
3619 | 0 | cparams.compcode = BLOSC_BLOSCLZ; |
3620 | 0 | blosc2_context* cctx = blosc2_create_cctx(cparams); |
3621 | 0 | if (cctx == NULL) { |
3622 | 0 | BLOSC_TRACE_ERROR("Error while creating the compression context"); |
3623 | 0 | return BLOSC2_ERROR_NULL_POINTER; |
3624 | 0 | } |
3625 | 0 | void* off_chunk = malloc((size_t)off_nbytes + BLOSC2_MAX_OVERHEAD); |
3626 | 0 | int32_t new_off_cbytes = blosc2_compress_ctx(cctx, offsets, off_nbytes, |
3627 | 0 | off_chunk, off_nbytes + BLOSC2_MAX_OVERHEAD); |
3628 | 0 | blosc2_free_ctx(cctx); |
3629 | |
|
3630 | 0 | if (new_off_cbytes < 0) { |
3631 | 0 | free(offsets); |
3632 | 0 | free(off_chunk); |
3633 | 0 | return new_off_cbytes; |
3634 | 0 | } |
3635 | 0 | free(offsets); |
3636 | 0 | int64_t new_frame_len; |
3637 | 0 | if (frame->sframe) { |
3638 | | // The chunks are not in the frame |
3639 | 0 | new_frame_len = header_len + 0 + new_off_cbytes + frame->trailer_len; |
3640 | 0 | } |
3641 | 0 | else { |
3642 | 0 | new_frame_len = header_len + cbytes + new_off_cbytes + frame->trailer_len; |
3643 | 0 | } |
3644 | |
|
3645 | 0 | if (frame->cframe != NULL) { |
3646 | 0 | uint8_t* framep = frame->cframe; |
3647 | | /* Make space for the new chunk and copy it */ |
3648 | 0 | frame->cframe = framep = realloc(framep, (size_t)new_frame_len); |
3649 | 0 | if (framep == NULL) { |
3650 | 0 | BLOSC_TRACE_ERROR("Cannot realloc space for the frame."); |
3651 | 0 | return BLOSC2_ERROR_MEMORY_ALLOC; |
3652 | 0 | } |
3653 | | /* Copy the offsets */ |
3654 | 0 | memcpy(framep + header_len + cbytes, off_chunk, (size_t)new_off_cbytes); |
3655 | 0 | } |
3656 | 0 | else { |
3657 | 0 | void* fp = NULL; |
3658 | |
|
3659 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
3660 | 0 | if (io_cb == NULL) { |
3661 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
3662 | 0 | return BLOSC2_ERROR_PLUGIN_IO; |
3663 | 0 | } |
3664 | | |
3665 | 0 | int64_t io_pos = 0; |
3666 | 0 | if (frame->sframe) { |
3667 | | // Update the offsets chunk in the chunks frame |
3668 | 0 | fp = sframe_open_index(frame->urlpath, "rb+", |
3669 | 0 | frame->schunk->storage->io); |
3670 | 0 | if (fp == NULL) { |
3671 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
3672 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
3673 | 0 | } |
3674 | 0 | io_pos = frame->file_offset + header_len + 0; |
3675 | 0 | } |
3676 | 0 | else { |
3677 | | // Regular frame |
3678 | 0 | fp = io_cb->open(frame->urlpath, "rb+", frame->schunk->storage->io->params); |
3679 | 0 | if (fp == NULL) { |
3680 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
3681 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
3682 | 0 | } |
3683 | 0 | io_pos = frame->file_offset + header_len + cbytes; |
3684 | 0 | } |
3685 | 0 | int64_t wbytes = io_cb->write(off_chunk, 1, new_off_cbytes, io_pos, fp); // the new offsets |
3686 | 0 | io_cb->close(fp); |
3687 | 0 | if (wbytes != new_off_cbytes) { |
3688 | 0 | BLOSC_TRACE_ERROR("Cannot write the offsets to frame."); |
3689 | 0 | return BLOSC2_ERROR_FILE_WRITE; |
3690 | 0 | } |
3691 | 0 | } |
3692 | | |
3693 | | // Invalidate the cache for chunk offsets |
3694 | 0 | if (frame->coffsets != NULL) { |
3695 | 0 | if (frame->coffsets_needs_free) |
3696 | 0 | free(frame->coffsets); |
3697 | 0 | frame->coffsets = NULL; |
3698 | 0 | } |
3699 | 0 | free(off_chunk); |
3700 | |
|
3701 | 0 | frame->len = new_frame_len; |
3702 | 0 | int rc = frame_update_header(frame, schunk, false); |
3703 | 0 | if (rc < 0) { |
3704 | 0 | return rc; |
3705 | 0 | } |
3706 | | |
3707 | 0 | rc = frame_update_trailer(frame, schunk); |
3708 | 0 | if (rc < 0) { |
3709 | 0 | return rc; |
3710 | 0 | } |
3711 | | |
3712 | 0 | return 0; |
3713 | 0 | } |
3714 | | |
3715 | | |
3716 | | /* Decompress and return a chunk that is part of a frame. */ |
3717 | 0 | int frame_decompress_chunk(blosc2_context *dctx, blosc2_frame_s* frame, int64_t nchunk, void *dest, int32_t nbytes) { |
3718 | 0 | uint8_t* src; |
3719 | 0 | bool needs_free; |
3720 | 0 | int32_t chunk_nbytes; |
3721 | 0 | int32_t chunk_cbytes; |
3722 | 0 | int rc; |
3723 | | |
3724 | | // Use a lazychunk here in order to do a potential parallel read. |
3725 | 0 | rc = frame_get_lazychunk(frame, nchunk, &src, &needs_free); |
3726 | 0 | if (rc < 0) { |
3727 | 0 | BLOSC_TRACE_ERROR("Cannot get the chunk in position %" PRId64 ".", nchunk); |
3728 | 0 | goto end; |
3729 | 0 | } |
3730 | 0 | chunk_cbytes = rc; |
3731 | 0 | if (chunk_cbytes < (signed)sizeof(int32_t)) { |
3732 | | /* Not enough input to read `nbytes` */ |
3733 | 0 | rc = BLOSC2_ERROR_READ_BUFFER; |
3734 | 0 | } |
3735 | |
|
3736 | 0 | rc = blosc2_cbuffer_sizes(src, &chunk_nbytes, &chunk_cbytes, NULL); |
3737 | 0 | if (rc < 0) { |
3738 | 0 | goto end; |
3739 | 0 | } |
3740 | | |
3741 | | /* Create a buffer for destination */ |
3742 | 0 | if (chunk_nbytes > nbytes) { |
3743 | 0 | BLOSC_TRACE_ERROR("Not enough space for decompressing in dest."); |
3744 | 0 | rc = BLOSC2_ERROR_WRITE_BUFFER; |
3745 | 0 | goto end; |
3746 | 0 | } |
3747 | | /* And decompress it */ |
3748 | 0 | dctx->header_overhead = BLOSC_EXTENDED_HEADER_LENGTH; |
3749 | 0 | int chunksize = rc = blosc2_decompress_ctx(dctx, src, chunk_cbytes, dest, nbytes); |
3750 | 0 | if (chunksize < 0 || chunksize != chunk_nbytes) { |
3751 | 0 | BLOSC_TRACE_ERROR("Error in decompressing chunk."); |
3752 | 0 | if (chunksize >= 0) |
3753 | 0 | rc = BLOSC2_ERROR_FAILURE; |
3754 | 0 | } |
3755 | 0 | end: |
3756 | 0 | if (needs_free) { |
3757 | 0 | free(src); |
3758 | 0 | } |
3759 | 0 | return rc; |
3760 | 0 | } |