/src/c-blosc2/blosc/frame.c
Line | Count | Source (jump to first uncovered line) |
1 | | /********************************************************************* |
2 | | Blosc - Blocked Shuffling and Compression Library |
3 | | |
4 | | Copyright (c) 2021 The Blosc Development Team <blosc@blosc.org> |
5 | | https://blosc.org |
6 | | License: BSD 3-Clause (see LICENSE.txt) |
7 | | |
8 | | See LICENSE.txt for details about copyright and rights to use. |
9 | | **********************************************************************/ |
10 | | |
11 | | #include "frame.h" |
12 | | #include "sframe.h" |
13 | | #include "context.h" |
14 | | #include "blosc-private.h" |
15 | | #include "blosc2.h" |
16 | | |
17 | | #if defined(_WIN32) |
18 | | #include <windows.h> |
19 | | #include <malloc.h> |
20 | | #endif /* _WIN32 */ |
21 | | |
22 | | #include <sys/stat.h> |
23 | | |
24 | | #include <inttypes.h> |
25 | | #include <stdbool.h> |
26 | | #include <stdio.h> |
27 | | #include <stdint.h> |
28 | | #include <stdlib.h> |
29 | | #include <string.h> |
30 | | |
31 | | /* If C11 is supported, use it's built-in aligned allocation. */ |
32 | | #if __STDC_VERSION__ >= 201112L |
33 | | #include <stdalign.h> |
34 | | #endif |
35 | | |
36 | | |
37 | | /* Create a new (empty) frame */ |
38 | 0 | blosc2_frame_s* frame_new(const char* urlpath) { |
39 | 0 | blosc2_frame_s* new_frame = calloc(1, sizeof(blosc2_frame_s)); |
40 | 0 | if (urlpath != NULL) { |
41 | 0 | char* new_urlpath = malloc(strlen(urlpath) + 1); // + 1 for the trailing NULL |
42 | 0 | new_frame->urlpath = strcpy(new_urlpath, urlpath); |
43 | 0 | new_frame->file_offset = 0; |
44 | 0 | } |
45 | 0 | return new_frame; |
46 | 0 | } |
47 | | |
48 | | |
49 | | /* Free memory from a frame. */ |
50 | 5 | int frame_free(blosc2_frame_s* frame) { |
51 | | |
52 | 5 | if (frame->cframe != NULL && !frame->avoid_cframe_free) { |
53 | 0 | free(frame->cframe); |
54 | 0 | } |
55 | | |
56 | 5 | if (frame->coffsets != NULL) { |
57 | 0 | free(frame->coffsets); |
58 | 0 | } |
59 | | |
60 | 5 | if (frame->urlpath != NULL) { |
61 | 0 | free(frame->urlpath); |
62 | 0 | } |
63 | | |
64 | 5 | free(frame); |
65 | | |
66 | 5 | return 0; |
67 | 5 | } |
68 | | |
69 | | |
70 | 0 | void *new_header_frame(blosc2_schunk *schunk, blosc2_frame_s *frame) { |
71 | 0 | if (frame == NULL) { |
72 | 0 | return NULL; |
73 | 0 | } |
74 | 0 | uint8_t* h2 = calloc(FRAME_HEADER_MINLEN, 1); |
75 | 0 | uint8_t* h2p = h2; |
76 | | |
77 | | // The msgpack header starts here |
78 | 0 | *h2p = 0x90; // fixarray... |
79 | 0 | *h2p += 14; // ...with 13 elements |
80 | 0 | h2p += 1; |
81 | | |
82 | | // Magic number |
83 | 0 | *h2p = 0xa0 + 8; // str with 8 elements |
84 | 0 | h2p += 1; |
85 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
86 | 0 | return NULL; |
87 | 0 | } |
88 | 0 | strcpy((char*)h2p, "b2frame"); |
89 | 0 | h2p += 8; |
90 | | |
91 | | // Header size |
92 | 0 | *h2p = 0xd2; // int32 |
93 | 0 | h2p += 1 + 4; |
94 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
95 | 0 | return NULL; |
96 | 0 | } |
97 | | |
98 | | // Total frame size |
99 | 0 | *h2p = 0xcf; // uint64 |
100 | | // Fill it with frame->len which is known *after* the creation of the frame (e.g. when updating the header) |
101 | 0 | int64_t flen = frame->len; |
102 | 0 | to_big(h2 + FRAME_LEN, &flen, sizeof(flen)); |
103 | 0 | h2p += 1 + 8; |
104 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
105 | 0 | return NULL; |
106 | 0 | } |
107 | | |
108 | | // Flags |
109 | 0 | *h2p = 0xa0 + 4; // str with 4 elements |
110 | 0 | h2p += 1; |
111 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
112 | 0 | return NULL; |
113 | 0 | } |
114 | | // General flags |
115 | 0 | *h2p = BLOSC2_VERSION_FRAME_FORMAT; // version |
116 | 0 | *h2p += 0x10; // 64-bit offsets. We only support this for now. |
117 | 0 | h2p += 1; |
118 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
119 | 0 | return NULL; |
120 | 0 | } |
121 | | |
122 | | // Frame type |
123 | | // We only support contiguous and sparse directories frames currently |
124 | 0 | *h2p = frame->sframe ? 1 : 0; |
125 | 0 | h2p += 1; |
126 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
127 | 0 | return NULL; |
128 | 0 | } |
129 | | |
130 | | // Codec flags |
131 | 0 | *h2p = schunk->compcode; |
132 | 0 | if (schunk->compcode >= BLOSC_LAST_CODEC) { |
133 | 0 | *h2p = BLOSC_UDCODEC_FORMAT; |
134 | 0 | } |
135 | 0 | *h2p += (schunk->clevel) << 4u; // clevel |
136 | 0 | h2p += 1; |
137 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
138 | 0 | return NULL; |
139 | 0 | } |
140 | | |
141 | | // Other flags |
142 | 0 | *h2p = schunk->splitmode - 1; |
143 | 0 | h2p += 1; |
144 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
145 | 0 | return NULL; |
146 | 0 | } |
147 | | |
148 | | // Uncompressed size |
149 | 0 | *h2p = 0xd3; // int64 |
150 | 0 | h2p += 1; |
151 | 0 | int64_t nbytes = schunk->nbytes; |
152 | 0 | to_big(h2p, &nbytes, sizeof(nbytes)); |
153 | 0 | h2p += 8; |
154 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
155 | 0 | return NULL; |
156 | 0 | } |
157 | | |
158 | | // Compressed size |
159 | 0 | *h2p = 0xd3; // int64 |
160 | 0 | h2p += 1; |
161 | 0 | int64_t cbytes = schunk->cbytes; |
162 | 0 | to_big(h2p, &cbytes, sizeof(cbytes)); |
163 | 0 | h2p += 8; |
164 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
165 | 0 | return NULL; |
166 | 0 | } |
167 | | |
168 | | // Type size |
169 | 0 | *h2p = 0xd2; // int32 |
170 | 0 | h2p += 1; |
171 | 0 | int32_t typesize = schunk->typesize; |
172 | 0 | to_big(h2p, &typesize, sizeof(typesize)); |
173 | 0 | h2p += 4; |
174 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
175 | 0 | return NULL; |
176 | 0 | } |
177 | | |
178 | | // Block size |
179 | 0 | *h2p = 0xd2; // int32 |
180 | 0 | h2p += 1; |
181 | 0 | int32_t blocksize = schunk->blocksize; |
182 | 0 | to_big(h2p, &blocksize, sizeof(blocksize)); |
183 | 0 | h2p += 4; |
184 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
185 | 0 | return NULL; |
186 | 0 | } |
187 | | |
188 | | // Chunk size |
189 | 0 | *h2p = 0xd2; // int32 |
190 | 0 | h2p += 1; |
191 | 0 | int32_t chunksize = schunk->chunksize; |
192 | 0 | to_big(h2p, &chunksize, sizeof(chunksize)); |
193 | 0 | h2p += 4; |
194 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
195 | 0 | return NULL; |
196 | 0 | } |
197 | | |
198 | | // Number of threads for compression |
199 | 0 | *h2p = 0xd1; // int16 |
200 | 0 | h2p += 1; |
201 | 0 | int16_t nthreads = (int16_t)schunk->cctx->nthreads; |
202 | 0 | to_big(h2p, &nthreads, sizeof(nthreads)); |
203 | 0 | h2p += 2; |
204 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
205 | 0 | return NULL; |
206 | 0 | } |
207 | | |
208 | | // Number of threads for decompression |
209 | 0 | *h2p = 0xd1; // int16 |
210 | 0 | h2p += 1; |
211 | 0 | nthreads = (int16_t)schunk->dctx->nthreads; |
212 | 0 | to_big(h2p, &nthreads, sizeof(nthreads)); |
213 | 0 | h2p += 2; |
214 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
215 | 0 | return NULL; |
216 | 0 | } |
217 | | |
218 | | // The boolean for variable-length metalayers |
219 | 0 | *h2p = (schunk->nvlmetalayers > 0) ? (uint8_t)0xc3 : (uint8_t)0xc2; |
220 | 0 | h2p += 1; |
221 | 0 | if (h2p - h2 >= FRAME_HEADER_MINLEN) { |
222 | 0 | return NULL; |
223 | 0 | } |
224 | | |
225 | | // The space for FRAME_FILTER_PIPELINE |
226 | 0 | *h2p = 0xd8; // fixext 16 |
227 | 0 | h2p += 1; |
228 | 0 | if (BLOSC2_MAX_FILTERS > FRAME_FILTER_PIPELINE_MAX) { |
229 | 0 | return NULL; |
230 | 0 | } |
231 | | // Store the filter pipeline in header |
232 | 0 | uint8_t* mp_filters = h2 + FRAME_FILTER_PIPELINE + 1; |
233 | 0 | uint8_t* mp_meta = h2 + FRAME_FILTER_PIPELINE + 1 + FRAME_FILTER_PIPELINE_MAX; |
234 | 0 | for (int i = 0; i < BLOSC2_MAX_FILTERS; i++) { |
235 | 0 | mp_filters[i] = schunk->filters[i]; |
236 | 0 | mp_meta[i] = schunk->filters_meta[i]; |
237 | 0 | } |
238 | 0 | *h2p = (uint8_t) BLOSC2_MAX_FILTERS; |
239 | 0 | h2p += 1; |
240 | 0 | h2p += 16; |
241 | | |
242 | | // User-defined codec and codec metadata |
243 | 0 | uint8_t* udcodec = h2 + FRAME_UDCODEC; |
244 | 0 | *udcodec = schunk->compcode; |
245 | 0 | uint8_t* codec_meta = h2 + FRAME_CODEC_META; |
246 | 0 | *codec_meta = schunk->compcode_meta; |
247 | |
|
248 | 0 | if (h2p - h2 != FRAME_HEADER_MINLEN) { |
249 | 0 | return NULL; |
250 | 0 | } |
251 | | |
252 | 0 | int32_t hsize = FRAME_HEADER_MINLEN; |
253 | | |
254 | | // Now, deal with metalayers |
255 | 0 | uint16_t nmetalayers = schunk->nmetalayers; |
256 | 0 | if (nmetalayers > BLOSC2_MAX_METALAYERS) { |
257 | 0 | return NULL; |
258 | 0 | } |
259 | | |
260 | | // Make space for the header of metalayers (array marker, size, map of offsets) |
261 | 0 | h2 = realloc(h2, (size_t)hsize + 1 + 1 + 2 + 1 + 2); |
262 | 0 | h2p = h2 + hsize; |
263 | | |
264 | | // The msgpack header for the metalayers (array_marker, size, map of offsets, list of metalayers) |
265 | 0 | *h2p = 0x90 + 3; // array with 3 elements |
266 | 0 | h2p += 1; |
267 | | |
268 | | // Size for the map (index) of offsets, including this uint16 size (to be filled out later on) |
269 | 0 | *h2p = 0xcd; // uint16 |
270 | 0 | h2p += 1 + 2; |
271 | | |
272 | | // Map (index) of offsets for optional metalayers |
273 | 0 | *h2p = 0xde; // map 16 with N keys |
274 | 0 | h2p += 1; |
275 | 0 | to_big(h2p, &nmetalayers, sizeof(nmetalayers)); |
276 | 0 | h2p += sizeof(nmetalayers); |
277 | 0 | int32_t current_header_len = (int32_t)(h2p - h2); |
278 | 0 | int32_t *offtooff = malloc(nmetalayers * sizeof(int32_t)); |
279 | 0 | for (int nmetalayer = 0; nmetalayer < nmetalayers; nmetalayer++) { |
280 | 0 | if (frame == NULL) { |
281 | 0 | return NULL; |
282 | 0 | } |
283 | 0 | blosc2_metalayer *metalayer = schunk->metalayers[nmetalayer]; |
284 | 0 | uint8_t namelen = (uint8_t) strlen(metalayer->name); |
285 | 0 | h2 = realloc(h2, (size_t)current_header_len + 1 + namelen + 1 + 4); |
286 | 0 | h2p = h2 + current_header_len; |
287 | | // Store the metalayer |
288 | 0 | if (namelen >= (1U << 5U)) { // metalayer strings cannot be longer than 32 bytes |
289 | 0 | free(offtooff); |
290 | 0 | return NULL; |
291 | 0 | } |
292 | 0 | *h2p = (uint8_t)0xa0 + namelen; // str |
293 | 0 | h2p += 1; |
294 | 0 | memcpy(h2p, metalayer->name, namelen); |
295 | 0 | h2p += namelen; |
296 | | // Space for storing the offset for the value of this metalayer |
297 | 0 | *h2p = 0xd2; // int32 |
298 | 0 | h2p += 1; |
299 | 0 | offtooff[nmetalayer] = (int32_t)(h2p - h2); |
300 | 0 | h2p += 4; |
301 | 0 | current_header_len += 1 + namelen + 1 + 4; |
302 | 0 | } |
303 | 0 | int32_t hsize2 = (int32_t)(h2p - h2); |
304 | 0 | if (hsize2 != current_header_len) { // sanity check |
305 | 0 | return NULL; |
306 | 0 | } |
307 | | |
308 | | // Map size + int16 size |
309 | 0 | if ((uint32_t) (hsize2 - hsize) >= (1U << 16U)) { |
310 | 0 | return NULL; |
311 | 0 | } |
312 | 0 | uint16_t map_size = (uint16_t) (hsize2 - hsize); |
313 | 0 | to_big(h2 + FRAME_IDX_SIZE, &map_size, sizeof(map_size)); |
314 | | |
315 | | // Make space for an (empty) array |
316 | 0 | hsize = (int32_t)(h2p - h2); |
317 | 0 | h2 = realloc(h2, (size_t)hsize + 2 + 1 + 2); |
318 | 0 | h2p = h2 + hsize; |
319 | | |
320 | | // Now, store the values in an array |
321 | 0 | *h2p = 0xdc; // array 16 with N elements |
322 | 0 | h2p += 1; |
323 | 0 | to_big(h2p, &nmetalayers, sizeof(nmetalayers)); |
324 | 0 | h2p += sizeof(nmetalayers); |
325 | 0 | current_header_len = (int32_t)(h2p - h2); |
326 | 0 | for (int nmetalayer = 0; nmetalayer < nmetalayers; nmetalayer++) { |
327 | 0 | if (frame == NULL) { |
328 | 0 | return NULL; |
329 | 0 | } |
330 | 0 | blosc2_metalayer *metalayer = schunk->metalayers[nmetalayer]; |
331 | 0 | h2 = realloc(h2, (size_t)current_header_len + 1 + 4 + metalayer->content_len); |
332 | 0 | h2p = h2 + current_header_len; |
333 | | // Store the serialized contents for this metalayer |
334 | 0 | *h2p = 0xc6; // bin32 |
335 | 0 | h2p += 1; |
336 | 0 | to_big(h2p, &(metalayer->content_len), sizeof(metalayer->content_len)); |
337 | 0 | h2p += 4; |
338 | 0 | memcpy(h2p, metalayer->content, metalayer->content_len); // buffer, no need to swap |
339 | 0 | h2p += metalayer->content_len; |
340 | | // Update the offset now that we know it |
341 | 0 | to_big(h2 + offtooff[nmetalayer], ¤t_header_len, sizeof(current_header_len)); |
342 | 0 | current_header_len += 1 + 4 + metalayer->content_len; |
343 | 0 | } |
344 | 0 | free(offtooff); |
345 | 0 | hsize = (int32_t)(h2p - h2); |
346 | 0 | if (hsize != current_header_len) { // sanity check |
347 | 0 | return NULL; |
348 | 0 | } |
349 | | |
350 | | // Set the length of the whole header now that we know it |
351 | 0 | to_big(h2 + FRAME_HEADER_LEN, &hsize, sizeof(hsize)); |
352 | |
|
353 | 0 | return h2; |
354 | 0 | } |
355 | | |
356 | | |
357 | | int get_header_info(blosc2_frame_s *frame, int32_t *header_len, int64_t *frame_len, int64_t *nbytes, int64_t *cbytes, |
358 | | int32_t *blocksize, int32_t *chunksize, int64_t *nchunks, int32_t *typesize, uint8_t *compcode, |
359 | | uint8_t *compcode_meta, uint8_t *clevel, uint8_t *filters, uint8_t *filters_meta, |
360 | 36 | uint8_t *splitmode, const blosc2_io *io) { |
361 | 36 | uint8_t* framep = frame->cframe; |
362 | 36 | uint8_t header[FRAME_HEADER_MINLEN]; |
363 | | |
364 | 36 | blosc2_io_cb *io_cb = blosc2_get_io_cb(io->id); |
365 | 36 | if (io_cb == NULL) { |
366 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
367 | 0 | return BLOSC2_ERROR_PLUGIN_IO; |
368 | 0 | } |
369 | | |
370 | 36 | if (frame->len <= 0) { |
371 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
372 | 0 | } |
373 | | |
374 | 36 | if (frame->cframe == NULL) { |
375 | 0 | int64_t rbytes = 0; |
376 | 0 | void* fp = NULL; |
377 | 0 | if (frame->sframe) { |
378 | 0 | fp = sframe_open_index(frame->urlpath, "rb", io); |
379 | 0 | if (fp == NULL) { |
380 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
381 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
382 | 0 | } |
383 | 0 | } |
384 | 0 | else { |
385 | 0 | fp = io_cb->open(frame->urlpath, "rb", io->params); |
386 | 0 | if (fp == NULL) { |
387 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
388 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
389 | 0 | } |
390 | 0 | io_cb->seek(fp, frame->file_offset, SEEK_SET); |
391 | 0 | } |
392 | 0 | rbytes = io_cb->read(header, 1, FRAME_HEADER_MINLEN, fp); |
393 | 0 | io_cb->close(fp); |
394 | 0 | if (rbytes != FRAME_HEADER_MINLEN) { |
395 | 0 | return BLOSC2_ERROR_FILE_READ; |
396 | 0 | } |
397 | 0 | framep = header; |
398 | 0 | } |
399 | | |
400 | | // Consistency check for frame type |
401 | 36 | uint8_t frame_type = framep[FRAME_TYPE]; |
402 | 36 | if (frame->sframe) { |
403 | 0 | if (frame_type != FRAME_DIRECTORY_TYPE) { |
404 | 0 | return BLOSC2_ERROR_FRAME_TYPE; |
405 | 0 | } |
406 | 36 | } else { |
407 | 36 | if (frame_type != FRAME_CONTIGUOUS_TYPE) { |
408 | 0 | return BLOSC2_ERROR_FRAME_TYPE; |
409 | 0 | } |
410 | 36 | } |
411 | | |
412 | | // Fetch some internal lengths |
413 | 36 | from_big(header_len, framep + FRAME_HEADER_LEN, sizeof(*header_len)); |
414 | 36 | if (*header_len < FRAME_HEADER_MINLEN) { |
415 | 0 | BLOSC_TRACE_ERROR("Header length is zero or smaller than min allowed."); |
416 | 0 | return BLOSC2_ERROR_INVALID_HEADER; |
417 | 0 | } |
418 | 36 | from_big(frame_len, framep + FRAME_LEN, sizeof(*frame_len)); |
419 | 36 | if (*header_len > *frame_len) { |
420 | 0 | BLOSC_TRACE_ERROR("Header length exceeds length of the frame."); |
421 | 0 | return BLOSC2_ERROR_INVALID_HEADER; |
422 | 0 | } |
423 | 36 | from_big(nbytes, framep + FRAME_NBYTES, sizeof(*nbytes)); |
424 | 36 | from_big(cbytes, framep + FRAME_CBYTES, sizeof(*cbytes)); |
425 | 36 | from_big(blocksize, framep + FRAME_BLOCKSIZE, sizeof(*blocksize)); |
426 | 36 | if (chunksize != NULL) { |
427 | 36 | from_big(chunksize, framep + FRAME_CHUNKSIZE, sizeof(*chunksize)); |
428 | 36 | } |
429 | 36 | if (typesize != NULL) { |
430 | 26 | from_big(typesize, framep + FRAME_TYPESIZE, sizeof(*typesize)); |
431 | 26 | if (*typesize <= 0) { |
432 | 0 | BLOSC_TRACE_ERROR("`typesize` cannot be zero or negative."); |
433 | 0 | return BLOSC2_ERROR_INVALID_HEADER; |
434 | 0 | } |
435 | 26 | } |
436 | | |
437 | | // Codecs |
438 | 36 | uint8_t frame_codecs = framep[FRAME_CODECS]; |
439 | 36 | if (clevel != NULL) { |
440 | 5 | *clevel = frame_codecs >> 4u; |
441 | 5 | } |
442 | 36 | if (compcode != NULL) { |
443 | 5 | *compcode = frame_codecs & 0xFu; |
444 | 5 | if (*compcode == BLOSC_UDCODEC_FORMAT) { |
445 | 0 | from_big(compcode, framep + FRAME_UDCODEC, sizeof(*compcode)); |
446 | 0 | } |
447 | 5 | } |
448 | | |
449 | | // Other flags |
450 | 36 | uint8_t other_flags = framep[FRAME_OTHER_FLAGS]; |
451 | 36 | if (splitmode != NULL) { |
452 | 5 | *splitmode = other_flags & 0x4u; |
453 | 5 | from_big(splitmode, framep + FRAME_OTHER_FLAGS, sizeof(*splitmode)); |
454 | 5 | *splitmode += 1; |
455 | 5 | } |
456 | | |
457 | 36 | if (compcode_meta != NULL) { |
458 | 5 | from_big(compcode_meta, framep + FRAME_CODEC_META, sizeof(*compcode_meta)); |
459 | 5 | } |
460 | | |
461 | | // Filters |
462 | 36 | if (filters != NULL && filters_meta != NULL) { |
463 | 5 | uint8_t nfilters = framep[FRAME_FILTER_PIPELINE]; |
464 | 5 | if (nfilters > BLOSC2_MAX_FILTERS) { |
465 | 0 | BLOSC_TRACE_ERROR("The number of filters in frame header are too large for Blosc2."); |
466 | 0 | return BLOSC2_ERROR_INVALID_HEADER; |
467 | 0 | } |
468 | 5 | uint8_t *filters_ = framep + FRAME_FILTER_PIPELINE + 1; |
469 | 5 | uint8_t *filters_meta_ = framep + FRAME_FILTER_PIPELINE + 1 + FRAME_FILTER_PIPELINE_MAX; |
470 | 35 | for (int i = 0; i < nfilters; i++) { |
471 | 30 | filters[i] = filters_[i]; |
472 | 30 | filters_meta[i] = filters_meta_[i]; |
473 | 30 | } |
474 | 5 | } |
475 | | |
476 | 36 | if (*nbytes > 0 && *chunksize > 0) { |
477 | | // We can compute the number of chunks only when the frame has actual data |
478 | 36 | *nchunks = *nbytes / *chunksize; |
479 | 36 | if (*nbytes % *chunksize > 0) { |
480 | 0 | if (*nchunks == INT32_MAX) { |
481 | 0 | BLOSC_TRACE_ERROR("Number of chunks exceeds maximum allowed."); |
482 | 0 | return BLOSC2_ERROR_INVALID_HEADER; |
483 | 0 | } |
484 | 0 | *nchunks += 1; |
485 | 0 | } |
486 | | |
487 | | // Sanity check for compressed sizes |
488 | 36 | if ((*cbytes < 0) || ((int64_t)*nchunks * *chunksize < *nbytes)) { |
489 | 0 | BLOSC_TRACE_ERROR("Invalid compressed size in frame header."); |
490 | 0 | return BLOSC2_ERROR_INVALID_HEADER; |
491 | 0 | } |
492 | 36 | } else { |
493 | 0 | *nchunks = 0; |
494 | 0 | } |
495 | | |
496 | 36 | return 0; |
497 | 36 | } |
498 | | |
499 | | |
500 | 5 | int64_t get_trailer_offset(blosc2_frame_s *frame, int32_t header_len, bool has_coffsets) { |
501 | 5 | if (!has_coffsets) { |
502 | | // No data chunks yet |
503 | 0 | return header_len; |
504 | 0 | } |
505 | 5 | return frame->len - frame->trailer_len; |
506 | 5 | } |
507 | | |
508 | | |
509 | | // Update the length in the header |
510 | 0 | int update_frame_len(blosc2_frame_s* frame, int64_t len) { |
511 | 0 | int rc = 1; |
512 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
513 | 0 | if (io_cb == NULL) { |
514 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
515 | 0 | return BLOSC2_ERROR_PLUGIN_IO; |
516 | 0 | } |
517 | | |
518 | 0 | if (frame->cframe != NULL) { |
519 | 0 | to_big(frame->cframe + FRAME_LEN, &len, sizeof(int64_t)); |
520 | 0 | } |
521 | 0 | else { |
522 | 0 | void* fp = NULL; |
523 | 0 | if (frame->sframe) { |
524 | 0 | fp = sframe_open_index(frame->urlpath, "rb+", |
525 | 0 | frame->schunk->storage->io); |
526 | 0 | } |
527 | 0 | else { |
528 | 0 | fp = io_cb->open(frame->urlpath, "rb+", frame->schunk->storage->io->params); |
529 | 0 | } |
530 | 0 | if (fp == NULL) { |
531 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
532 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
533 | 0 | } |
534 | 0 | io_cb->seek(fp, frame->file_offset + FRAME_LEN, SEEK_SET); |
535 | 0 | int64_t swap_len; |
536 | 0 | to_big(&swap_len, &len, sizeof(int64_t)); |
537 | 0 | int64_t wbytes = io_cb->write(&swap_len, 1, sizeof(int64_t), fp); |
538 | 0 | io_cb->close(fp); |
539 | 0 | if (wbytes != sizeof(int64_t)) { |
540 | 0 | BLOSC_TRACE_ERROR("Cannot write the frame length in header."); |
541 | 0 | return BLOSC2_ERROR_FILE_WRITE; |
542 | 0 | } |
543 | 0 | } |
544 | 0 | return rc; |
545 | 0 | } |
546 | | |
547 | | |
548 | 0 | int frame_update_trailer(blosc2_frame_s* frame, blosc2_schunk* schunk) { |
549 | 0 | if (frame != NULL && frame->len == 0) { |
550 | 0 | BLOSC_TRACE_ERROR("The trailer cannot be updated on empty frames."); |
551 | 0 | } |
552 | | |
553 | | // Create the trailer in msgpack (see the frame format document) |
554 | 0 | int64_t trailer_len = FRAME_TRAILER_MINLEN; |
555 | 0 | uint8_t* trailer = (uint8_t*)calloc((size_t)trailer_len, 1); |
556 | 0 | uint8_t* ptrailer = trailer; |
557 | 0 | *ptrailer = 0x90 + 4; // fixarray with 4 elements |
558 | 0 | ptrailer += 1; |
559 | | // Trailer format version |
560 | 0 | *ptrailer = FRAME_TRAILER_VERSION; |
561 | 0 | ptrailer += 1; |
562 | |
|
563 | 0 | int32_t current_trailer_len = (int32_t)(ptrailer - trailer); |
564 | | |
565 | | // Now, deal with variable-length metalayers |
566 | 0 | int16_t nvlmetalayers = schunk->nvlmetalayers; |
567 | 0 | if (nvlmetalayers < 0 || nvlmetalayers > BLOSC2_MAX_METALAYERS) { |
568 | 0 | return -1; |
569 | 0 | } |
570 | | |
571 | | // Make space for the header of metalayers (array marker, size, map of offsets) |
572 | 0 | trailer = realloc(trailer, (size_t) current_trailer_len + 1 + 1 + 2 + 1 + 2); |
573 | 0 | ptrailer = trailer + current_trailer_len; |
574 | | |
575 | | // The msgpack header for the metalayers (array_marker, size, map of offsets, list of metalayers) |
576 | 0 | *ptrailer = 0x90 + 3; // array with 3 elements |
577 | 0 | ptrailer += 1; |
578 | |
|
579 | 0 | int32_t tsize = (int32_t)(ptrailer - trailer); |
580 | | |
581 | | // Size for the map (index) of metalayer offsets, including this uint16 size (to be filled out later on) |
582 | 0 | *ptrailer = 0xcd; // uint16 |
583 | 0 | ptrailer += 1 + 2; |
584 | | |
585 | | // Map (index) of offsets for optional metalayers |
586 | 0 | *ptrailer = 0xde; // map 16 with N keys |
587 | 0 | ptrailer += 1; |
588 | 0 | to_big(ptrailer, &nvlmetalayers, sizeof(nvlmetalayers)); |
589 | 0 | ptrailer += sizeof(nvlmetalayers); |
590 | 0 | current_trailer_len = (int32_t)(ptrailer - trailer); |
591 | 0 | int32_t *offtodata = malloc(nvlmetalayers * sizeof(int32_t)); |
592 | 0 | for (int nvlmetalayer = 0; nvlmetalayer < nvlmetalayers; nvlmetalayer++) { |
593 | 0 | if (frame == NULL) { |
594 | 0 | return -1; |
595 | 0 | } |
596 | 0 | blosc2_metalayer *vlmetalayer = schunk->vlmetalayers[nvlmetalayer]; |
597 | 0 | uint8_t name_len = (uint8_t) strlen(vlmetalayer->name); |
598 | 0 | trailer = realloc(trailer, (size_t)current_trailer_len + 1 + name_len + 1 + 4); |
599 | 0 | ptrailer = trailer + current_trailer_len; |
600 | | // Store the vlmetalayer |
601 | 0 | if (name_len >= (1U << 5U)) { // metalayer strings cannot be longer than 32 bytes |
602 | 0 | free(offtodata); |
603 | 0 | return -1; |
604 | 0 | } |
605 | 0 | *ptrailer = (uint8_t)0xa0 + name_len; // str |
606 | 0 | ptrailer += 1; |
607 | 0 | memcpy(ptrailer, vlmetalayer->name, name_len); |
608 | 0 | ptrailer += name_len; |
609 | | // Space for storing the offset for the value of this vlmetalayer |
610 | 0 | *ptrailer = 0xd2; // int32 |
611 | 0 | ptrailer += 1; |
612 | 0 | offtodata[nvlmetalayer] = (int32_t)(ptrailer - trailer); |
613 | 0 | ptrailer += 4; |
614 | 0 | current_trailer_len += 1 + name_len + 1 + 4; |
615 | 0 | } |
616 | 0 | int32_t tsize2 = (int32_t)(ptrailer - trailer); |
617 | 0 | if (tsize2 != current_trailer_len) { // sanity check |
618 | 0 | return -1; |
619 | 0 | } |
620 | | |
621 | | // Map size + int16 size |
622 | 0 | if ((uint32_t) (tsize2 - tsize) >= (1U << 16U)) { |
623 | 0 | return -1; |
624 | 0 | } |
625 | 0 | uint16_t map_size = (uint16_t) (tsize2 - tsize); |
626 | 0 | to_big(trailer + 4, &map_size, sizeof(map_size)); |
627 | | |
628 | | // Make space for an (empty) array |
629 | 0 | tsize = (int32_t)(ptrailer - trailer); |
630 | 0 | trailer = realloc(trailer, (size_t) tsize + 2 + 1 + 2); |
631 | 0 | ptrailer = trailer + tsize; |
632 | | |
633 | | // Now, store the values in an array |
634 | 0 | *ptrailer = 0xdc; // array 16 with N elements |
635 | 0 | ptrailer += 1; |
636 | 0 | to_big(ptrailer, &nvlmetalayers, sizeof(nvlmetalayers)); |
637 | 0 | ptrailer += sizeof(nvlmetalayers); |
638 | 0 | current_trailer_len = (int32_t)(ptrailer - trailer); |
639 | 0 | for (int nvlmetalayer = 0; nvlmetalayer < nvlmetalayers; nvlmetalayer++) { |
640 | 0 | if (frame == NULL) { |
641 | 0 | return -1; |
642 | 0 | } |
643 | 0 | blosc2_metalayer *vlmetalayer = schunk->vlmetalayers[nvlmetalayer]; |
644 | 0 | trailer = realloc(trailer, (size_t)current_trailer_len + 1 + 4 + vlmetalayer->content_len); |
645 | 0 | ptrailer = trailer + current_trailer_len; |
646 | | // Store the serialized contents for this vlmetalayer |
647 | 0 | *ptrailer = 0xc6; // bin32 |
648 | 0 | ptrailer += 1; |
649 | 0 | to_big(ptrailer, &(vlmetalayer->content_len), sizeof(vlmetalayer->content_len)); |
650 | 0 | ptrailer += 4; |
651 | 0 | memcpy(ptrailer, vlmetalayer->content, vlmetalayer->content_len); // buffer, no need to swap |
652 | 0 | ptrailer += vlmetalayer->content_len; |
653 | | // Update the offset now that we know it |
654 | 0 | to_big(trailer + offtodata[nvlmetalayer], ¤t_trailer_len, sizeof(current_trailer_len)); |
655 | 0 | current_trailer_len += 1 + 4 + vlmetalayer->content_len; |
656 | 0 | } |
657 | 0 | free(offtodata); |
658 | 0 | tsize = (int32_t)(ptrailer - trailer); |
659 | 0 | if (tsize != current_trailer_len) { // sanity check |
660 | 0 | return -1; |
661 | 0 | } |
662 | | |
663 | 0 | trailer = realloc(trailer, (size_t)current_trailer_len + 23); |
664 | 0 | ptrailer = trailer + current_trailer_len; |
665 | 0 | trailer_len = (ptrailer - trailer) + 23; |
666 | | |
667 | | // Trailer length |
668 | 0 | *ptrailer = 0xce; // uint32 |
669 | 0 | ptrailer += 1; |
670 | 0 | to_big(ptrailer, &trailer_len, sizeof(uint32_t)); |
671 | 0 | ptrailer += sizeof(uint32_t); |
672 | | // Up to 16 bytes for frame fingerprint (using XXH3 included in https://github.com/Cyan4973/xxHash) |
673 | | // Maybe someone would need 256-bit in the future, but for the time being 128-bit seems like a good tradeoff |
674 | 0 | *ptrailer = 0xd8; // fixext 16 |
675 | 0 | ptrailer += 1; |
676 | 0 | *ptrailer = 0; // fingerprint type: 0 -> no fp; 1 -> 32-bit; 2 -> 64-bit; 3 -> 128-bit |
677 | 0 | ptrailer += 1; |
678 | | |
679 | | // Remove call to memset when we compute an actual fingerprint |
680 | 0 | memset(ptrailer, 0, 16); |
681 | | // Uncomment call to memcpy when we compute an actual fingerprint |
682 | | // memcpy(ptrailer, xxh3_fingerprint, sizeof(xxh3_fingerprint)); |
683 | 0 | ptrailer += 16; |
684 | | |
685 | | // Sanity check |
686 | 0 | if (ptrailer - trailer != trailer_len) { |
687 | 0 | return BLOSC2_ERROR_DATA; |
688 | 0 | } |
689 | | |
690 | 0 | int32_t header_len; |
691 | 0 | int64_t frame_len; |
692 | 0 | int64_t nbytes; |
693 | 0 | int64_t cbytes; |
694 | 0 | int32_t blocksize; |
695 | 0 | int32_t chunksize; |
696 | 0 | int64_t nchunks; |
697 | 0 | int ret = get_header_info(frame, &header_len, &frame_len, &nbytes, &cbytes, |
698 | 0 | &blocksize, &chunksize, &nchunks, |
699 | 0 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
700 | 0 | frame->schunk->storage->io); |
701 | 0 | if (ret < 0) { |
702 | 0 | BLOSC_TRACE_ERROR("Unable to get meta info from frame."); |
703 | 0 | return ret; |
704 | 0 | } |
705 | | |
706 | 0 | int64_t trailer_offset = get_trailer_offset(frame, header_len, nbytes > 0); |
707 | |
|
708 | 0 | if (trailer_offset < BLOSC_EXTENDED_HEADER_LENGTH) { |
709 | 0 | BLOSC_TRACE_ERROR("Unable to get trailer offset in frame."); |
710 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
711 | 0 | } |
712 | | |
713 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
714 | 0 | if (io_cb == NULL) { |
715 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
716 | 0 | return BLOSC2_ERROR_PLUGIN_IO; |
717 | 0 | } |
718 | | // Update the trailer. As there are no internal offsets to the trailer section, |
719 | | // and it is always at the end of the frame, we can just write (or overwrite) it |
720 | | // at the end of the frame. |
721 | 0 | if (frame->cframe != NULL) { |
722 | 0 | frame->cframe = realloc(frame->cframe, (size_t)(trailer_offset + trailer_len)); |
723 | 0 | if (frame->cframe == NULL) { |
724 | 0 | BLOSC_TRACE_ERROR("Cannot realloc space for the frame."); |
725 | 0 | return BLOSC2_ERROR_MEMORY_ALLOC; |
726 | 0 | } |
727 | 0 | memcpy(frame->cframe + trailer_offset, trailer, trailer_len); |
728 | 0 | } |
729 | 0 | else { |
730 | 0 | void* fp = NULL; |
731 | 0 | if (frame->sframe) { |
732 | 0 | fp = sframe_open_index(frame->urlpath, "rb+", |
733 | 0 | frame->schunk->storage->io); |
734 | 0 | } |
735 | 0 | else { |
736 | 0 | fp = io_cb->open(frame->urlpath, "rb+", frame->schunk->storage->io->params); |
737 | 0 | } |
738 | 0 | if (fp == NULL) { |
739 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
740 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
741 | 0 | } |
742 | 0 | io_cb->seek(fp, frame->file_offset + trailer_offset, SEEK_SET); |
743 | 0 | int64_t wbytes = io_cb->write(trailer, 1, trailer_len, fp); |
744 | 0 | if (wbytes != trailer_len) { |
745 | 0 | BLOSC_TRACE_ERROR("Cannot write the trailer length in trailer."); |
746 | 0 | return BLOSC2_ERROR_FILE_WRITE; |
747 | 0 | } |
748 | 0 | if (io_cb->truncate(fp, trailer_offset + trailer_len) != 0) { |
749 | 0 | BLOSC_TRACE_ERROR("Cannot truncate the frame."); |
750 | 0 | return BLOSC2_ERROR_FILE_TRUNCATE; |
751 | 0 | } |
752 | 0 | io_cb->close(fp); |
753 | |
|
754 | 0 | } |
755 | 0 | free(trailer); |
756 | |
|
757 | 0 | int rc = update_frame_len(frame, trailer_offset + trailer_len); |
758 | 0 | if (rc < 0) { |
759 | 0 | return rc; |
760 | 0 | } |
761 | 0 | frame->len = trailer_offset + trailer_len; |
762 | 0 | frame->trailer_len = trailer_len; |
763 | |
|
764 | 0 | return 1; |
765 | 0 | } |
766 | | |
767 | | |
768 | | // Remove a file:/// prefix |
769 | | // This is a temporary workaround for allowing to use proper URLs for local files/dirs |
770 | 0 | static char* normalize_urlpath(const char* urlpath) { |
771 | 0 | char* localpath = strstr(urlpath, "file:///"); |
772 | 0 | if (localpath == urlpath) { |
773 | | // There is a file:/// prefix. Get rid of it. |
774 | 0 | localpath += strlen("file:///"); |
775 | 0 | } |
776 | 0 | else { |
777 | 0 | localpath = (char*)urlpath; |
778 | 0 | } |
779 | 0 | return localpath; |
780 | 0 | } |
781 | | |
782 | | |
783 | | /* Initialize a frame out of a file */ |
784 | 0 | blosc2_frame_s* frame_from_file_offset(const char* urlpath, const blosc2_io *io, int64_t offset) { |
785 | | // Get the length of the frame |
786 | 0 | uint8_t header[FRAME_HEADER_MINLEN]; |
787 | 0 | uint8_t trailer[FRAME_TRAILER_MINLEN]; |
788 | |
|
789 | 0 | void* fp = NULL; |
790 | 0 | bool sframe = false; |
791 | 0 | struct stat path_stat; |
792 | |
|
793 | 0 | urlpath = normalize_urlpath(urlpath); |
794 | |
|
795 | 0 | if(stat(urlpath, &path_stat) < 0) { |
796 | 0 | BLOSC_TRACE_ERROR("Cannot get information about the path %s.", urlpath); |
797 | 0 | return NULL; |
798 | 0 | } |
799 | | |
800 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(io->id); |
801 | 0 | if (io_cb == NULL) { |
802 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
803 | 0 | return NULL; |
804 | 0 | } |
805 | | |
806 | 0 | char* urlpath_cpy; |
807 | 0 | if (path_stat.st_mode & S_IFDIR) { |
808 | 0 | urlpath_cpy = malloc(strlen(urlpath) + 1); |
809 | 0 | strcpy(urlpath_cpy, urlpath); |
810 | 0 | char last_char = urlpath[strlen(urlpath) - 1]; |
811 | 0 | if (last_char == '\\' || last_char == '/') { |
812 | 0 | urlpath_cpy[strlen(urlpath) - 1] = '\0'; |
813 | 0 | } |
814 | 0 | else { |
815 | 0 | } |
816 | 0 | fp = sframe_open_index(urlpath_cpy, "rb", io); |
817 | 0 | sframe = true; |
818 | 0 | } |
819 | 0 | else { |
820 | 0 | urlpath_cpy = malloc(strlen(urlpath) + 1); |
821 | 0 | strcpy(urlpath_cpy, urlpath); |
822 | 0 | fp = io_cb->open(urlpath, "rb", io->params); |
823 | 0 | } |
824 | 0 | if (fp == NULL) { |
825 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", urlpath); |
826 | 0 | return NULL; |
827 | 0 | } |
828 | 0 | io_cb->seek(fp, offset, SEEK_SET); |
829 | 0 | int64_t rbytes = io_cb->read(header, 1, FRAME_HEADER_MINLEN, fp); |
830 | 0 | if (rbytes != FRAME_HEADER_MINLEN) { |
831 | 0 | BLOSC_TRACE_ERROR("Cannot read from file '%s'.", urlpath); |
832 | 0 | io_cb->close(fp); |
833 | 0 | free(urlpath_cpy); |
834 | 0 | return NULL; |
835 | 0 | } |
836 | 0 | int64_t frame_len; |
837 | 0 | to_big(&frame_len, header + FRAME_LEN, sizeof(frame_len)); |
838 | |
|
839 | 0 | blosc2_frame_s* frame = calloc(1, sizeof(blosc2_frame_s)); |
840 | 0 | frame->urlpath = urlpath_cpy; |
841 | 0 | frame->len = frame_len; |
842 | 0 | frame->sframe = sframe; |
843 | 0 | frame->file_offset = offset; |
844 | | |
845 | | // Now, the trailer length |
846 | 0 | io_cb->seek(fp, offset + frame_len - FRAME_TRAILER_MINLEN, SEEK_SET); |
847 | 0 | rbytes = io_cb->read(trailer, 1, FRAME_TRAILER_MINLEN, fp); |
848 | 0 | io_cb->close(fp); |
849 | 0 | if (rbytes != FRAME_TRAILER_MINLEN) { |
850 | 0 | BLOSC_TRACE_ERROR("Cannot read from file '%s'.", urlpath); |
851 | 0 | free(urlpath_cpy); |
852 | 0 | free(frame); |
853 | 0 | return NULL; |
854 | 0 | } |
855 | 0 | int trailer_offset = FRAME_TRAILER_MINLEN - FRAME_TRAILER_LEN_OFFSET; |
856 | 0 | if (trailer[trailer_offset - 1] != 0xce) { |
857 | 0 | free(urlpath_cpy); |
858 | 0 | free(frame); |
859 | 0 | return NULL; |
860 | 0 | } |
861 | 0 | uint32_t trailer_len; |
862 | 0 | to_big(&trailer_len, trailer + trailer_offset, sizeof(trailer_len)); |
863 | 0 | frame->trailer_len = trailer_len; |
864 | |
|
865 | 0 | return frame; |
866 | 0 | } |
867 | | |
868 | | |
869 | | /* Initialize a frame out of a contiguous frame buffer */ |
870 | 10 | blosc2_frame_s* frame_from_cframe(uint8_t *cframe, int64_t len, bool copy) { |
871 | | // Get the length of the frame |
872 | 10 | const uint8_t* header = cframe; |
873 | 10 | int64_t frame_len; |
874 | 10 | if (len < FRAME_HEADER_MINLEN) { |
875 | 0 | return NULL; |
876 | 0 | } |
877 | | |
878 | 10 | from_big(&frame_len, header + FRAME_LEN, sizeof(frame_len)); |
879 | 10 | if (frame_len != len) { // sanity check |
880 | 5 | return NULL; |
881 | 5 | } |
882 | | |
883 | 5 | blosc2_frame_s* frame = calloc(1, sizeof(blosc2_frame_s)); |
884 | 5 | frame->len = frame_len; |
885 | 5 | frame->file_offset = 0; |
886 | | |
887 | | // Now, the trailer length |
888 | 5 | const uint8_t* trailer = cframe + frame_len - FRAME_TRAILER_MINLEN; |
889 | 5 | int trailer_offset = FRAME_TRAILER_MINLEN - FRAME_TRAILER_LEN_OFFSET; |
890 | 5 | if (trailer[trailer_offset - 1] != 0xce) { |
891 | 0 | free(frame); |
892 | 0 | return NULL; |
893 | 0 | } |
894 | 5 | uint32_t trailer_len; |
895 | 5 | from_big(&trailer_len, trailer + trailer_offset, sizeof(trailer_len)); |
896 | 5 | frame->trailer_len = trailer_len; |
897 | | |
898 | 5 | if (copy) { |
899 | 0 | frame->cframe = malloc((size_t)len); |
900 | 0 | memcpy(frame->cframe, cframe, (size_t)len); |
901 | 0 | } |
902 | 5 | else { |
903 | 5 | frame->cframe = cframe; |
904 | 5 | frame->avoid_cframe_free = true; |
905 | 5 | } |
906 | | |
907 | 5 | return frame; |
908 | 5 | } |
909 | | |
910 | | |
911 | | /* Create a frame out of a super-chunk. */ |
912 | 0 | int64_t frame_from_schunk(blosc2_schunk *schunk, blosc2_frame_s *frame) { |
913 | 0 | frame->file_offset = 0; |
914 | 0 | int64_t nchunks = schunk->nchunks; |
915 | 0 | int64_t cbytes = schunk->cbytes; |
916 | 0 | int32_t chunk_cbytes; |
917 | 0 | int32_t chunk_nbytes; |
918 | 0 | void* fp = NULL; |
919 | 0 | int rc; |
920 | |
|
921 | 0 | uint8_t* h2 = new_header_frame(schunk, frame); |
922 | 0 | if (h2 == NULL) { |
923 | 0 | return BLOSC2_ERROR_DATA; |
924 | 0 | } |
925 | 0 | uint32_t h2len; |
926 | 0 | from_big(&h2len, h2 + FRAME_HEADER_LEN, sizeof(h2len)); |
927 | | // Build the offsets chunk |
928 | 0 | int32_t chunksize = -1; |
929 | 0 | int32_t off_cbytes = 0; |
930 | 0 | uint64_t coffset = 0; |
931 | 0 | int32_t off_nbytes = (int32_t) (nchunks * sizeof(int64_t)); |
932 | 0 | uint64_t* data_tmp = malloc(off_nbytes); |
933 | 0 | bool needs_free = false; |
934 | 0 | for (int i = 0; i < nchunks; i++) { |
935 | 0 | uint8_t* data_chunk; |
936 | 0 | data_chunk = schunk->data[i]; |
937 | 0 | rc = blosc2_cbuffer_sizes(data_chunk, &chunk_nbytes, &chunk_cbytes, NULL); |
938 | 0 | if (rc < 0) { |
939 | 0 | return rc; |
940 | 0 | } |
941 | 0 | data_tmp[i] = coffset; |
942 | 0 | coffset += chunk_cbytes; |
943 | 0 | int32_t chunksize_ = chunk_nbytes; |
944 | 0 | if (i == 0) { |
945 | 0 | chunksize = chunksize_; |
946 | 0 | } |
947 | 0 | else if (chunksize != chunksize_) { |
948 | | // Variable size // TODO: update flags for this (or do not use them at all) |
949 | 0 | chunksize = 0; |
950 | 0 | } |
951 | 0 | if (needs_free) { |
952 | 0 | free(data_chunk); |
953 | 0 | } |
954 | 0 | } |
955 | 0 | if ((int64_t)coffset != cbytes) { |
956 | 0 | free(data_tmp); |
957 | 0 | return BLOSC2_ERROR_DATA; |
958 | 0 | } |
959 | 0 | uint8_t *off_chunk = NULL; |
960 | 0 | if (nchunks > 0) { |
961 | | // Compress the chunk of offsets |
962 | 0 | off_chunk = malloc(off_nbytes + BLOSC2_MAX_OVERHEAD); |
963 | 0 | blosc2_context *cctx = blosc2_create_cctx(BLOSC2_CPARAMS_DEFAULTS); |
964 | 0 | if (cctx == NULL) { |
965 | 0 | BLOSC_TRACE_ERROR("Error while creating the compression context"); |
966 | 0 | return BLOSC2_ERROR_NULL_POINTER; |
967 | 0 | } |
968 | 0 | cctx->typesize = sizeof(int64_t); |
969 | 0 | off_cbytes = blosc2_compress_ctx(cctx, data_tmp, off_nbytes, off_chunk, |
970 | 0 | off_nbytes + BLOSC2_MAX_OVERHEAD); |
971 | 0 | blosc2_free_ctx(cctx); |
972 | 0 | if (off_cbytes < 0) { |
973 | 0 | free(off_chunk); |
974 | 0 | free(h2); |
975 | 0 | return off_cbytes; |
976 | 0 | } |
977 | 0 | } |
978 | 0 | else { |
979 | 0 | off_cbytes = 0; |
980 | 0 | } |
981 | 0 | free(data_tmp); |
982 | | |
983 | | // Now that we know them, fill the chunksize and frame length in header |
984 | 0 | to_big(h2 + FRAME_CHUNKSIZE, &chunksize, sizeof(chunksize)); |
985 | 0 | frame->len = h2len + cbytes + off_cbytes + FRAME_TRAILER_MINLEN; |
986 | 0 | if (frame->sframe) { |
987 | 0 | frame->len = h2len + off_cbytes + FRAME_TRAILER_MINLEN; |
988 | 0 | } |
989 | 0 | int64_t tbytes = frame->len; |
990 | 0 | to_big(h2 + FRAME_LEN, &tbytes, sizeof(tbytes)); |
991 | |
|
992 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
993 | 0 | if (io_cb == NULL) { |
994 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
995 | 0 | return BLOSC2_ERROR_PLUGIN_IO; |
996 | 0 | } |
997 | | |
998 | | // Create the frame and put the header at the beginning |
999 | 0 | if (frame->urlpath == NULL) { |
1000 | 0 | frame->cframe = malloc((size_t)frame->len); |
1001 | 0 | memcpy(frame->cframe, h2, h2len); |
1002 | 0 | } |
1003 | 0 | else { |
1004 | 0 | if (frame->sframe) { |
1005 | 0 | fp = sframe_open_index(frame->urlpath, "wb", |
1006 | 0 | frame->schunk->storage->io); |
1007 | 0 | } |
1008 | 0 | else { |
1009 | 0 | fp = io_cb->open(frame->urlpath, "wb", frame->schunk->storage->io->params); |
1010 | 0 | } |
1011 | 0 | if (fp == NULL) { |
1012 | 0 | BLOSC_TRACE_ERROR("Error creating file in: %s", frame->urlpath); |
1013 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
1014 | 0 | } |
1015 | 0 | io_cb->write(h2, h2len, 1, fp); |
1016 | 0 | } |
1017 | 0 | free(h2); |
1018 | | |
1019 | | // Fill the frame with the actual data chunks |
1020 | 0 | if (!frame->sframe) { |
1021 | 0 | coffset = 0; |
1022 | 0 | for (int i = 0; i < nchunks; i++) { |
1023 | 0 | uint8_t* data_chunk = schunk->data[i]; |
1024 | 0 | rc = blosc2_cbuffer_sizes(data_chunk, NULL, &chunk_cbytes, NULL); |
1025 | 0 | if (rc < 0) { |
1026 | 0 | return rc; |
1027 | 0 | } |
1028 | 0 | if (frame->urlpath == NULL) { |
1029 | 0 | memcpy(frame->cframe + h2len + coffset, data_chunk, (size_t)chunk_cbytes); |
1030 | 0 | } else { |
1031 | 0 | io_cb->write(data_chunk, chunk_cbytes, 1, fp); |
1032 | 0 | } |
1033 | 0 | coffset += chunk_cbytes; |
1034 | 0 | } |
1035 | 0 | if ((int64_t)coffset != cbytes) { |
1036 | 0 | return BLOSC2_ERROR_FAILURE; |
1037 | 0 | } |
1038 | 0 | } |
1039 | | |
1040 | | // Copy the offsets chunk at the end of the frame |
1041 | 0 | if (frame->urlpath == NULL) { |
1042 | 0 | memcpy(frame->cframe + h2len + cbytes, off_chunk, off_cbytes); |
1043 | 0 | } |
1044 | 0 | else { |
1045 | 0 | io_cb->write(off_chunk, off_cbytes, 1, fp); |
1046 | 0 | io_cb->close(fp); |
1047 | 0 | } |
1048 | 0 | free(off_chunk); |
1049 | 0 | rc = frame_update_trailer(frame, schunk); |
1050 | 0 | if (rc < 0) { |
1051 | 0 | return rc; |
1052 | 0 | } |
1053 | | |
1054 | 0 | return frame->len; |
1055 | 0 | } |
1056 | | |
1057 | | |
1058 | | // Get the compressed data offsets |
1059 | | uint8_t* get_coffsets(blosc2_frame_s *frame, int32_t header_len, int64_t cbytes, |
1060 | 21 | int64_t nchunks, int32_t *off_cbytes) { |
1061 | 21 | int32_t chunk_cbytes; |
1062 | 21 | int rc; |
1063 | | |
1064 | 21 | if (frame->coffsets != NULL) { |
1065 | 0 | if (off_cbytes != NULL) { |
1066 | 0 | rc = blosc2_cbuffer_sizes(frame->coffsets, NULL, &chunk_cbytes, NULL); |
1067 | 0 | if (rc < 0) { |
1068 | 0 | return NULL; |
1069 | 0 | } |
1070 | 0 | *off_cbytes = (int32_t)chunk_cbytes; |
1071 | 0 | } |
1072 | 0 | return frame->coffsets; |
1073 | 0 | } |
1074 | 21 | if (frame->cframe != NULL) { |
1075 | 21 | int64_t off_pos = header_len; |
1076 | 21 | if (cbytes < INT64_MAX - header_len) { |
1077 | 21 | off_pos += cbytes; |
1078 | 21 | } |
1079 | | // Check that there is enough room to read Blosc header |
1080 | 21 | if (off_pos < 0 || off_pos > INT64_MAX - BLOSC_EXTENDED_HEADER_LENGTH || |
1081 | 21 | off_pos + BLOSC_EXTENDED_HEADER_LENGTH > frame->len) { |
1082 | 0 | BLOSC_TRACE_ERROR("Cannot read the offsets outside of frame boundary."); |
1083 | 0 | return NULL; |
1084 | 0 | } |
1085 | | // For in-memory frames, the coffset is just one pointer away |
1086 | 21 | uint8_t* off_start = frame->cframe + off_pos; |
1087 | 21 | if (off_cbytes != NULL) { |
1088 | 21 | int32_t chunk_nbytes; |
1089 | 21 | int32_t chunk_blocksize; |
1090 | 21 | rc = blosc2_cbuffer_sizes(off_start, &chunk_nbytes, &chunk_cbytes, &chunk_blocksize); |
1091 | 21 | if (rc < 0) { |
1092 | 0 | return NULL; |
1093 | 0 | } |
1094 | 21 | *off_cbytes = (int32_t)chunk_cbytes; |
1095 | 21 | if (*off_cbytes < 0 || off_pos + *off_cbytes > frame->len) { |
1096 | 0 | BLOSC_TRACE_ERROR("Cannot read the cbytes outside of frame boundary."); |
1097 | 0 | return NULL; |
1098 | 0 | } |
1099 | 21 | if ((uint64_t)chunk_nbytes != nchunks * sizeof(int64_t)) { |
1100 | 0 | BLOSC_TRACE_ERROR("The number of chunks in offset idx " |
1101 | 0 | "does not match the ones in the header frame."); |
1102 | 0 | return NULL; |
1103 | 0 | } |
1104 | | |
1105 | 21 | } |
1106 | 21 | return off_start; |
1107 | 21 | } |
1108 | | |
1109 | 0 | int64_t trailer_offset = get_trailer_offset(frame, header_len, true); |
1110 | |
|
1111 | 0 | if (trailer_offset < BLOSC_EXTENDED_HEADER_LENGTH || trailer_offset + FRAME_TRAILER_MINLEN > frame->len) { |
1112 | 0 | BLOSC_TRACE_ERROR("Cannot read the trailer out of the frame."); |
1113 | 0 | return NULL; |
1114 | 0 | } |
1115 | | |
1116 | 0 | int32_t coffsets_cbytes; |
1117 | 0 | if (frame->sframe) { |
1118 | 0 | coffsets_cbytes = (int32_t)(trailer_offset - (header_len + 0)); |
1119 | 0 | } |
1120 | 0 | else { |
1121 | 0 | coffsets_cbytes = (int32_t)(trailer_offset - (header_len + cbytes)); |
1122 | 0 | } |
1123 | |
|
1124 | 0 | if (off_cbytes != NULL) { |
1125 | 0 | *off_cbytes = coffsets_cbytes; |
1126 | 0 | } |
1127 | |
|
1128 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
1129 | 0 | if (io_cb == NULL) { |
1130 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
1131 | 0 | return NULL; |
1132 | 0 | } |
1133 | | |
1134 | 0 | void* fp = NULL; |
1135 | 0 | uint8_t* coffsets = malloc((size_t)coffsets_cbytes); |
1136 | 0 | if (frame->sframe) { |
1137 | 0 | fp = sframe_open_index(frame->urlpath, "rb", |
1138 | 0 | frame->schunk->storage->io); |
1139 | 0 | if (fp == NULL) { |
1140 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
1141 | 0 | return NULL; |
1142 | 0 | } |
1143 | 0 | io_cb->seek(fp, header_len + 0, SEEK_SET); |
1144 | 0 | } |
1145 | 0 | else { |
1146 | 0 | fp = io_cb->open(frame->urlpath, "rb", frame->schunk->storage->io->params); |
1147 | 0 | if (fp == NULL) { |
1148 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
1149 | 0 | return NULL; |
1150 | 0 | } |
1151 | 0 | io_cb->seek(fp, frame->file_offset + header_len + cbytes, SEEK_SET); |
1152 | 0 | } |
1153 | 0 | int64_t rbytes = io_cb->read(coffsets, 1, coffsets_cbytes, fp); |
1154 | 0 | io_cb->close(fp); |
1155 | 0 | if (rbytes != coffsets_cbytes) { |
1156 | 0 | BLOSC_TRACE_ERROR("Cannot read the offsets out of the frame."); |
1157 | 0 | free(coffsets); |
1158 | 0 | return NULL; |
1159 | 0 | } |
1160 | 0 | frame->coffsets = coffsets; |
1161 | 0 | return coffsets; |
1162 | 0 | } |
1163 | | |
1164 | | |
1165 | | // Get the data offsets from a frame |
1166 | 0 | int64_t* blosc2_frame_get_offsets(blosc2_schunk *schunk) { |
1167 | 0 | if (schunk->frame == NULL) { |
1168 | 0 | BLOSC_TRACE_ERROR("This function needs a frame."); |
1169 | 0 | return NULL; |
1170 | 0 | } |
1171 | 0 | blosc2_frame_s* frame = (blosc2_frame_s*)schunk->frame; |
1172 | | |
1173 | | // Get header info |
1174 | 0 | int32_t header_len; |
1175 | 0 | int64_t frame_len; |
1176 | 0 | int64_t nbytes; |
1177 | 0 | int64_t cbytes; |
1178 | 0 | int32_t blocksize; |
1179 | 0 | int32_t chunksize; |
1180 | 0 | int64_t nchunks; |
1181 | 0 | int ret = get_header_info(frame, &header_len, &frame_len, &nbytes, &cbytes, |
1182 | 0 | &blocksize, &chunksize, &nchunks, |
1183 | 0 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
1184 | 0 | frame->schunk->storage->io); |
1185 | 0 | if (ret < 0) { |
1186 | 0 | BLOSC_TRACE_ERROR("Cannot get the header info for the frame."); |
1187 | 0 | return NULL; |
1188 | 0 | } |
1189 | | |
1190 | 0 | int32_t off_nbytes = (int32_t) (nchunks * sizeof(int64_t)); |
1191 | 0 | int64_t* offsets = (int64_t *) malloc((size_t)off_nbytes); |
1192 | |
|
1193 | 0 | int32_t coffsets_cbytes = 0; |
1194 | 0 | uint8_t *coffsets = get_coffsets(frame, header_len, cbytes, nchunks, &coffsets_cbytes); |
1195 | | // Decompress offsets |
1196 | 0 | blosc2_dparams off_dparams = BLOSC2_DPARAMS_DEFAULTS; |
1197 | 0 | blosc2_context *dctx = blosc2_create_dctx(off_dparams); |
1198 | 0 | if (dctx == NULL) { |
1199 | 0 | BLOSC_TRACE_ERROR("Error while creating the decompression context"); |
1200 | 0 | return NULL; |
1201 | 0 | } |
1202 | 0 | int32_t prev_nbytes = blosc2_decompress_ctx(dctx, coffsets, coffsets_cbytes, |
1203 | 0 | offsets, off_nbytes); |
1204 | 0 | blosc2_free_ctx(dctx); |
1205 | 0 | if (prev_nbytes < 0) { |
1206 | 0 | free(offsets); |
1207 | 0 | BLOSC_TRACE_ERROR("Cannot decompress the offsets chunk."); |
1208 | 0 | return NULL; |
1209 | 0 | } |
1210 | 0 | return offsets; |
1211 | 0 | } |
1212 | | |
1213 | | |
1214 | 0 | int frame_update_header(blosc2_frame_s* frame, blosc2_schunk* schunk, bool new) { |
1215 | 0 | uint8_t* framep = frame->cframe; |
1216 | 0 | uint8_t header[FRAME_HEADER_MINLEN]; |
1217 | |
|
1218 | 0 | if (frame->len <= 0) { |
1219 | 0 | return BLOSC2_ERROR_INVALID_PARAM; |
1220 | 0 | } |
1221 | | |
1222 | 0 | if (new && schunk->cbytes > 0) { |
1223 | 0 | BLOSC_TRACE_ERROR("New metalayers cannot be added after actual data " |
1224 | 0 | "has been appended."); |
1225 | 0 | return BLOSC2_ERROR_INVALID_PARAM; |
1226 | 0 | } |
1227 | | |
1228 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
1229 | 0 | if (io_cb == NULL) { |
1230 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
1231 | 0 | return BLOSC2_ERROR_PLUGIN_IO; |
1232 | 0 | } |
1233 | | |
1234 | 0 | if (frame->cframe == NULL) { |
1235 | 0 | int64_t rbytes = 0; |
1236 | 0 | void* fp = NULL; |
1237 | 0 | if (frame->sframe) { |
1238 | 0 | fp = sframe_open_index(frame->urlpath, "rb+", |
1239 | 0 | frame->schunk->storage->io); |
1240 | 0 | if (fp == NULL) { |
1241 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
1242 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
1243 | 0 | } |
1244 | 0 | } |
1245 | 0 | else { |
1246 | 0 | fp = io_cb->open(frame->urlpath, "rb", frame->schunk->storage->io->params); |
1247 | 0 | if (fp == NULL) { |
1248 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
1249 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
1250 | 0 | } |
1251 | 0 | io_cb->seek(fp, frame->file_offset, SEEK_SET); |
1252 | 0 | } |
1253 | 0 | if (fp != NULL) { |
1254 | 0 | rbytes = io_cb->read(header, 1, FRAME_HEADER_MINLEN, fp); |
1255 | 0 | io_cb->close(fp); |
1256 | 0 | } |
1257 | 0 | (void) rbytes; |
1258 | 0 | if (rbytes != FRAME_HEADER_MINLEN) { |
1259 | 0 | return BLOSC2_ERROR_FILE_WRITE; |
1260 | 0 | } |
1261 | 0 | framep = header; |
1262 | 0 | } |
1263 | 0 | uint32_t prev_h2len; |
1264 | 0 | from_big(&prev_h2len, framep + FRAME_HEADER_LEN, sizeof(prev_h2len)); |
1265 | | |
1266 | | // Build a new header |
1267 | 0 | uint8_t* h2 = new_header_frame(schunk, frame); |
1268 | 0 | uint32_t h2len; |
1269 | 0 | from_big(&h2len, h2 + FRAME_HEADER_LEN, sizeof(h2len)); |
1270 | | |
1271 | | // The frame length is outdated when adding a new metalayer, so update it |
1272 | 0 | if (new) { |
1273 | 0 | int64_t frame_len = h2len; // at adding time, we only have to worry of the header for now |
1274 | 0 | to_big(h2 + FRAME_LEN, &frame_len, sizeof(frame_len)); |
1275 | 0 | frame->len = frame_len; |
1276 | 0 | } |
1277 | |
|
1278 | 0 | if (!new && prev_h2len != h2len) { |
1279 | 0 | BLOSC_TRACE_ERROR("The new metalayer sizes should be equal the existing ones."); |
1280 | 0 | return BLOSC2_ERROR_DATA; |
1281 | 0 | } |
1282 | | |
1283 | 0 | void* fp = NULL; |
1284 | 0 | if (frame->cframe == NULL) { |
1285 | | // Write updated header down to file |
1286 | 0 | if (frame->sframe) { |
1287 | 0 | fp = sframe_open_index(frame->urlpath, "rb+", |
1288 | 0 | frame->schunk->storage->io); |
1289 | 0 | } |
1290 | 0 | else { |
1291 | 0 | fp = io_cb->open(frame->urlpath, "rb+", frame->schunk->storage->io->params); |
1292 | 0 | } |
1293 | 0 | if (fp == NULL) { |
1294 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
1295 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
1296 | 0 | } |
1297 | 0 | io_cb->seek(fp, frame->file_offset, SEEK_SET); |
1298 | 0 | io_cb->write(h2, h2len, 1, fp); |
1299 | 0 | io_cb->close(fp); |
1300 | 0 | } |
1301 | 0 | else { |
1302 | 0 | if (new) { |
1303 | 0 | frame->cframe = realloc(frame->cframe, h2len); |
1304 | 0 | } |
1305 | 0 | memcpy(frame->cframe, h2, h2len); |
1306 | 0 | } |
1307 | 0 | free(h2); |
1308 | |
|
1309 | 0 | return 1; |
1310 | 0 | } |
1311 | | |
1312 | | |
1313 | | static int get_meta_from_header(blosc2_frame_s* frame, blosc2_schunk* schunk, uint8_t* header, |
1314 | 5 | int32_t header_len) { |
1315 | 5 | BLOSC_UNUSED_PARAM(frame); |
1316 | 5 | int64_t header_pos = FRAME_IDX_SIZE; |
1317 | | |
1318 | | // Get the size for the index of metalayers |
1319 | 5 | uint16_t idx_size; |
1320 | 5 | header_pos += sizeof(idx_size); |
1321 | 5 | if (header_len < header_pos) { |
1322 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1323 | 0 | } |
1324 | 5 | from_big(&idx_size, header + FRAME_IDX_SIZE, sizeof(idx_size)); |
1325 | | |
1326 | | // Get the actual index of metalayers |
1327 | 5 | uint8_t* metalayers_idx = header + FRAME_IDX_SIZE + 2; |
1328 | 5 | header_pos += 1; |
1329 | 5 | if (header_len < header_pos) { |
1330 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1331 | 0 | } |
1332 | 5 | if (metalayers_idx[0] != 0xde) { // sanity check |
1333 | 0 | return BLOSC2_ERROR_DATA; |
1334 | 0 | } |
1335 | 5 | uint8_t* idxp = metalayers_idx + 1; |
1336 | 5 | uint16_t nmetalayers; |
1337 | 5 | header_pos += sizeof(nmetalayers); |
1338 | 5 | if (header_len < header_pos) { |
1339 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1340 | 0 | } |
1341 | 5 | from_big(&nmetalayers, idxp, sizeof(uint16_t)); |
1342 | 5 | idxp += 2; |
1343 | 5 | if (nmetalayers > BLOSC2_MAX_METALAYERS) { |
1344 | 0 | return BLOSC2_ERROR_DATA; |
1345 | 0 | } |
1346 | 5 | schunk->nmetalayers = nmetalayers; |
1347 | | |
1348 | | // Populate the metalayers and its serialized values |
1349 | 5 | for (int nmetalayer = 0; nmetalayer < nmetalayers; nmetalayer++) { |
1350 | 0 | header_pos += 1; |
1351 | 0 | if (header_len < header_pos) { |
1352 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1353 | 0 | } |
1354 | 0 | if ((*idxp & 0xe0u) != 0xa0u) { // sanity check |
1355 | 0 | return BLOSC2_ERROR_DATA; |
1356 | 0 | } |
1357 | 0 | blosc2_metalayer* metalayer = calloc(1, sizeof(blosc2_metalayer)); |
1358 | 0 | schunk->metalayers[nmetalayer] = metalayer; |
1359 | | |
1360 | | // Populate the metalayer string |
1361 | 0 | int8_t nslen = *idxp & (uint8_t)0x1F; |
1362 | 0 | idxp += 1; |
1363 | 0 | header_pos += nslen; |
1364 | 0 | if (header_len < header_pos) { |
1365 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1366 | 0 | } |
1367 | 0 | char* ns = malloc((size_t)nslen + 1); |
1368 | 0 | memcpy(ns, idxp, nslen); |
1369 | 0 | ns[nslen] = '\0'; |
1370 | 0 | idxp += nslen; |
1371 | 0 | metalayer->name = ns; |
1372 | | |
1373 | | // Populate the serialized value for this metalayer |
1374 | | // Get the offset |
1375 | 0 | header_pos += 1; |
1376 | 0 | if (header_len < header_pos) { |
1377 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1378 | 0 | } |
1379 | 0 | if ((*idxp & 0xffu) != 0xd2u) { // sanity check |
1380 | 0 | return BLOSC2_ERROR_DATA; |
1381 | 0 | } |
1382 | 0 | idxp += 1; |
1383 | 0 | int32_t offset; |
1384 | 0 | header_pos += sizeof(offset); |
1385 | 0 | if (header_len < header_pos) { |
1386 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1387 | 0 | } |
1388 | 0 | from_big(&offset, idxp, sizeof(offset)); |
1389 | 0 | idxp += 4; |
1390 | 0 | if (offset < 0 || offset >= header_len) { |
1391 | | // Offset is less than zero or exceeds header length |
1392 | 0 | return BLOSC2_ERROR_DATA; |
1393 | 0 | } |
1394 | | // Go to offset and see if we have the correct marker |
1395 | 0 | uint8_t* content_marker = header + offset; |
1396 | 0 | if (header_len < offset + 1 + 4) { |
1397 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1398 | 0 | } |
1399 | 0 | if (*content_marker != 0xc6) { |
1400 | 0 | return BLOSC2_ERROR_DATA; |
1401 | 0 | } |
1402 | | |
1403 | | // Read the size of the content |
1404 | 0 | int32_t content_len; |
1405 | 0 | from_big(&content_len, content_marker + 1, sizeof(content_len)); |
1406 | 0 | if (content_len < 0) { |
1407 | 0 | return BLOSC2_ERROR_DATA; |
1408 | 0 | } |
1409 | 0 | metalayer->content_len = content_len; |
1410 | | |
1411 | | // Finally, read the content |
1412 | 0 | if (header_len < offset + 1 + 4 + content_len) { |
1413 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1414 | 0 | } |
1415 | 0 | char* content = malloc((size_t)content_len); |
1416 | 0 | memcpy(content, content_marker + 1 + 4, (size_t)content_len); |
1417 | 0 | metalayer->content = (uint8_t*)content; |
1418 | 0 | } |
1419 | | |
1420 | 5 | return 1; |
1421 | 5 | } |
1422 | | |
1423 | 5 | int frame_get_metalayers(blosc2_frame_s* frame, blosc2_schunk* schunk) { |
1424 | 5 | int32_t header_len; |
1425 | 5 | int64_t frame_len; |
1426 | 5 | int64_t nbytes; |
1427 | 5 | int64_t cbytes; |
1428 | 5 | int32_t blocksize; |
1429 | 5 | int32_t chunksize; |
1430 | 5 | int64_t nchunks; |
1431 | 5 | int ret = get_header_info(frame, &header_len, &frame_len, &nbytes, &cbytes, |
1432 | 5 | &blocksize, &chunksize, &nchunks, |
1433 | 5 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
1434 | 5 | schunk->storage->io); |
1435 | 5 | if (ret < 0) { |
1436 | 0 | BLOSC_TRACE_ERROR("Unable to get the header info from frame."); |
1437 | 0 | return ret; |
1438 | 0 | } |
1439 | | |
1440 | | // Get the header |
1441 | 5 | uint8_t* header = NULL; |
1442 | 5 | if (frame->cframe != NULL) { |
1443 | 5 | header = frame->cframe; |
1444 | 5 | } else { |
1445 | 0 | int64_t rbytes = 0; |
1446 | 0 | header = malloc(header_len); |
1447 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
1448 | 0 | if (io_cb == NULL) { |
1449 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
1450 | 0 | return BLOSC2_ERROR_PLUGIN_IO; |
1451 | 0 | } |
1452 | | |
1453 | 0 | void* fp = NULL; |
1454 | 0 | if (frame->sframe) { |
1455 | 0 | fp = sframe_open_index(frame->urlpath, "rb", |
1456 | 0 | frame->schunk->storage->io); |
1457 | 0 | if (fp == NULL) { |
1458 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
1459 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
1460 | 0 | } |
1461 | 0 | } |
1462 | 0 | else { |
1463 | 0 | fp = io_cb->open(frame->urlpath, "rb", frame->schunk->storage->io->params); |
1464 | 0 | if (fp == NULL) { |
1465 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
1466 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
1467 | 0 | } |
1468 | 0 | io_cb->seek(fp, frame->file_offset, SEEK_SET); |
1469 | 0 | } |
1470 | 0 | if (fp != NULL) { |
1471 | 0 | rbytes = io_cb->read(header, 1, header_len, fp); |
1472 | 0 | io_cb->close(fp); |
1473 | 0 | } |
1474 | 0 | if (rbytes != header_len) { |
1475 | 0 | BLOSC_TRACE_ERROR("Cannot access the header out of the frame."); |
1476 | 0 | free(header); |
1477 | 0 | return BLOSC2_ERROR_FILE_READ; |
1478 | 0 | } |
1479 | 0 | } |
1480 | | |
1481 | 5 | ret = get_meta_from_header(frame, schunk, header, header_len); |
1482 | | |
1483 | 5 | if (frame->cframe == NULL) { |
1484 | 0 | free(header); |
1485 | 0 | } |
1486 | | |
1487 | 5 | return ret; |
1488 | 5 | } |
1489 | | |
1490 | | static int get_vlmeta_from_trailer(blosc2_frame_s* frame, blosc2_schunk* schunk, uint8_t* trailer, |
1491 | 5 | int32_t trailer_len) { |
1492 | | |
1493 | 5 | BLOSC_UNUSED_PARAM(frame); |
1494 | 5 | int64_t trailer_pos = FRAME_TRAILER_VLMETALAYERS + 2; |
1495 | 5 | uint8_t* idxp = trailer + trailer_pos; |
1496 | | |
1497 | | // Get the size for the index of metalayers |
1498 | 5 | trailer_pos += 2; |
1499 | 5 | if (trailer_len < trailer_pos) { |
1500 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1501 | 0 | } |
1502 | 5 | uint16_t idx_size; |
1503 | 5 | from_big(&idx_size, idxp, sizeof(idx_size)); |
1504 | 5 | idxp += 2; |
1505 | | |
1506 | 5 | trailer_pos += 1; |
1507 | | // Get the actual index of metalayers |
1508 | 5 | if (trailer_len < trailer_pos) { |
1509 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1510 | 0 | } |
1511 | 5 | if (idxp[0] != 0xde) { // sanity check |
1512 | 0 | return BLOSC2_ERROR_DATA; |
1513 | 0 | } |
1514 | 5 | idxp += 1; |
1515 | | |
1516 | 5 | int16_t nmetalayers; |
1517 | 5 | trailer_pos += sizeof(nmetalayers); |
1518 | 5 | if (trailer_len < trailer_pos) { |
1519 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1520 | 0 | } |
1521 | 5 | from_big(&nmetalayers, idxp, sizeof(uint16_t)); |
1522 | 5 | idxp += 2; |
1523 | 5 | if (nmetalayers > BLOSC2_MAX_VLMETALAYERS) { |
1524 | 0 | return BLOSC2_ERROR_DATA; |
1525 | 0 | } |
1526 | 5 | schunk->nvlmetalayers = nmetalayers; |
1527 | | |
1528 | | // Populate the metalayers and its serialized values |
1529 | 10 | for (int nmetalayer = 0; nmetalayer < nmetalayers; nmetalayer++) { |
1530 | 5 | trailer_pos += 1; |
1531 | 5 | if (trailer_len < trailer_pos) { |
1532 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1533 | 0 | } |
1534 | 5 | if ((*idxp & 0xe0u) != 0xa0u) { // sanity check |
1535 | 0 | return BLOSC2_ERROR_DATA; |
1536 | 0 | } |
1537 | 5 | blosc2_metalayer* metalayer = calloc(1, sizeof(blosc2_metalayer)); |
1538 | 5 | schunk->vlmetalayers[nmetalayer] = metalayer; |
1539 | | |
1540 | | // Populate the metalayer string |
1541 | 5 | int8_t nslen = *idxp & (uint8_t)0x1F; |
1542 | 5 | idxp += 1; |
1543 | 5 | trailer_pos += nslen; |
1544 | 5 | if (trailer_len < trailer_pos) { |
1545 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1546 | 0 | } |
1547 | 5 | char* ns = malloc((size_t)nslen + 1); |
1548 | 5 | memcpy(ns, idxp, nslen); |
1549 | 5 | ns[nslen] = '\0'; |
1550 | 5 | idxp += nslen; |
1551 | 5 | metalayer->name = ns; |
1552 | | |
1553 | | // Populate the serialized value for this metalayer |
1554 | | // Get the offset |
1555 | 5 | trailer_pos += 1; |
1556 | 5 | if (trailer_len < trailer_pos) { |
1557 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1558 | 0 | } |
1559 | 5 | if ((*idxp & 0xffu) != 0xd2u) { // sanity check |
1560 | 0 | return BLOSC2_ERROR_DATA; |
1561 | 0 | } |
1562 | 5 | idxp += 1; |
1563 | 5 | int32_t offset; |
1564 | 5 | trailer_pos += sizeof(offset); |
1565 | 5 | if (trailer_len < trailer_pos) { |
1566 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1567 | 0 | } |
1568 | 5 | from_big(&offset, idxp, sizeof(offset)); |
1569 | 5 | idxp += 4; |
1570 | 5 | if (offset < 0 || offset >= trailer_len) { |
1571 | | // Offset is less than zero or exceeds trailer length |
1572 | 0 | return BLOSC2_ERROR_DATA; |
1573 | 0 | } |
1574 | | // Go to offset and see if we have the correct marker |
1575 | 5 | uint8_t* content_marker = trailer + offset; |
1576 | 5 | if (trailer_len < offset + 1 + 4) { |
1577 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1578 | 0 | } |
1579 | 5 | if (*content_marker != 0xc6) { |
1580 | 0 | return BLOSC2_ERROR_DATA; |
1581 | 0 | } |
1582 | | |
1583 | | // Read the size of the content |
1584 | 5 | int32_t content_len; |
1585 | 5 | from_big(&content_len, content_marker + 1, sizeof(content_len)); |
1586 | 5 | if (content_len < 0) { |
1587 | 0 | return BLOSC2_ERROR_DATA; |
1588 | 0 | } |
1589 | 5 | metalayer->content_len = content_len; |
1590 | | |
1591 | | // Finally, read the content |
1592 | 5 | if (trailer_len < offset + 1 + 4 + content_len) { |
1593 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1594 | 0 | } |
1595 | 5 | char* content = malloc((size_t)content_len); |
1596 | 5 | memcpy(content, content_marker + 1 + 4, (size_t)content_len); |
1597 | 5 | metalayer->content = (uint8_t*)content; |
1598 | 5 | } |
1599 | 5 | return 1; |
1600 | 5 | } |
1601 | | |
1602 | 5 | int frame_get_vlmetalayers(blosc2_frame_s* frame, blosc2_schunk* schunk) { |
1603 | 5 | int32_t header_len; |
1604 | 5 | int64_t frame_len; |
1605 | 5 | int64_t nbytes; |
1606 | 5 | int64_t cbytes; |
1607 | 5 | int32_t blocksize; |
1608 | 5 | int32_t chunksize; |
1609 | 5 | int64_t nchunks; |
1610 | 5 | int ret = get_header_info(frame, &header_len, &frame_len, &nbytes, &cbytes, |
1611 | 5 | &blocksize, &chunksize, &nchunks, |
1612 | 5 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
1613 | 5 | schunk->storage->io); |
1614 | 5 | if (ret < 0) { |
1615 | 0 | BLOSC_TRACE_ERROR("Unable to get the trailer info from frame."); |
1616 | 0 | return ret; |
1617 | 0 | } |
1618 | | |
1619 | 5 | int64_t trailer_offset = get_trailer_offset(frame, header_len, nbytes > 0); |
1620 | 5 | int32_t trailer_len = (int32_t) frame->trailer_len; |
1621 | | |
1622 | 5 | if (trailer_offset < BLOSC_EXTENDED_HEADER_LENGTH || trailer_offset + trailer_len > frame->len) { |
1623 | 0 | BLOSC_TRACE_ERROR("Cannot access the trailer out of the frame."); |
1624 | 0 | return BLOSC2_ERROR_READ_BUFFER; |
1625 | 0 | } |
1626 | | |
1627 | | // Get the trailer |
1628 | 5 | uint8_t* trailer = NULL; |
1629 | 5 | if (frame->cframe != NULL) { |
1630 | 5 | trailer = frame->cframe + trailer_offset; |
1631 | 5 | } else { |
1632 | 0 | int64_t rbytes = 0; |
1633 | 0 | trailer = malloc(trailer_len); |
1634 | |
|
1635 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
1636 | 0 | if (io_cb == NULL) { |
1637 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
1638 | 0 | return BLOSC2_ERROR_PLUGIN_IO; |
1639 | 0 | } |
1640 | | |
1641 | 0 | void* fp = NULL; |
1642 | 0 | if (frame->sframe) { |
1643 | 0 | char* eframe_name = malloc(strlen(frame->urlpath) + strlen("/chunks.b2frame") + 1); |
1644 | 0 | sprintf(eframe_name, "%s/chunks.b2frame", frame->urlpath); |
1645 | 0 | fp = io_cb->open(eframe_name, "rb", frame->schunk->storage->io->params); |
1646 | 0 | if (fp == NULL) { |
1647 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", eframe_name); |
1648 | 0 | free(eframe_name); |
1649 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
1650 | 0 | } |
1651 | 0 | free(eframe_name); |
1652 | 0 | io_cb->seek(fp, trailer_offset, SEEK_SET); |
1653 | 0 | } |
1654 | 0 | else { |
1655 | 0 | fp = io_cb->open(frame->urlpath, "rb", frame->schunk->storage->io->params); |
1656 | 0 | if (fp == NULL) { |
1657 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
1658 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
1659 | 0 | } |
1660 | 0 | io_cb->seek(fp, frame->file_offset + trailer_offset, SEEK_SET); |
1661 | 0 | } |
1662 | 0 | if (fp != NULL) { |
1663 | 0 | rbytes = io_cb->read(trailer, 1, trailer_len, fp); |
1664 | 0 | io_cb->close(fp); |
1665 | 0 | } |
1666 | 0 | if (rbytes != trailer_len) { |
1667 | 0 | BLOSC_TRACE_ERROR("Cannot access the trailer out of the fileframe."); |
1668 | 0 | free(trailer); |
1669 | 0 | return BLOSC2_ERROR_FILE_READ; |
1670 | 0 | } |
1671 | 0 | } |
1672 | | |
1673 | 5 | ret = get_vlmeta_from_trailer(frame, schunk, trailer, trailer_len); |
1674 | | |
1675 | 5 | if (frame->cframe == NULL) { |
1676 | 0 | free(trailer); |
1677 | 0 | } |
1678 | | |
1679 | 5 | return ret; |
1680 | 5 | } |
1681 | | |
1682 | | |
1683 | | blosc2_storage* get_new_storage(const blosc2_storage* storage, |
1684 | | const blosc2_cparams* cdefaults, |
1685 | | const blosc2_dparams* ddefaults, |
1686 | 2.41k | const blosc2_io* iodefaults) { |
1687 | | |
1688 | 2.41k | blosc2_storage* new_storage = (blosc2_storage*)calloc(1, sizeof(blosc2_storage)); |
1689 | 2.41k | memcpy(new_storage, storage, sizeof(blosc2_storage)); |
1690 | 2.41k | if (storage->urlpath != NULL) { |
1691 | 0 | char* urlpath = normalize_urlpath(storage->urlpath); |
1692 | 0 | new_storage->urlpath = malloc(strlen(urlpath) + 1); |
1693 | 0 | strcpy(new_storage->urlpath, urlpath); |
1694 | 0 | } |
1695 | | |
1696 | | // cparams |
1697 | 2.41k | blosc2_cparams* cparams = malloc(sizeof(blosc2_cparams)); |
1698 | 2.41k | if (storage->cparams != NULL) { |
1699 | 2.40k | memcpy(cparams, storage->cparams, sizeof(blosc2_cparams)); |
1700 | 2.40k | } else { |
1701 | 5 | memcpy(cparams, cdefaults, sizeof(blosc2_cparams)); |
1702 | 5 | } |
1703 | 2.41k | new_storage->cparams = cparams; |
1704 | | |
1705 | | // dparams |
1706 | 2.41k | blosc2_dparams* dparams = malloc(sizeof(blosc2_dparams)); |
1707 | 2.41k | if (storage->dparams != NULL) { |
1708 | 2.40k | memcpy(dparams, storage->dparams, sizeof(blosc2_dparams)); |
1709 | 2.40k | } |
1710 | 5 | else { |
1711 | 5 | memcpy(dparams, ddefaults, sizeof(blosc2_dparams)); |
1712 | 5 | } |
1713 | 2.41k | new_storage->dparams = dparams; |
1714 | | |
1715 | | // iodefaults |
1716 | 2.41k | blosc2_io* udio = malloc(sizeof(blosc2_io)); |
1717 | 2.41k | if (storage->io != NULL) { |
1718 | 0 | memcpy(udio, storage->io, sizeof(blosc2_io)); |
1719 | 0 | } |
1720 | 2.41k | else { |
1721 | 2.41k | memcpy(udio, iodefaults, sizeof(blosc2_io)); |
1722 | 2.41k | } |
1723 | 2.41k | new_storage->io = udio; |
1724 | | |
1725 | 2.41k | return new_storage; |
1726 | 2.41k | } |
1727 | | |
1728 | | |
1729 | | /* Get a super-chunk out of a frame */ |
1730 | 5 | blosc2_schunk* frame_to_schunk(blosc2_frame_s* frame, bool copy, const blosc2_io *udio) { |
1731 | 5 | int32_t header_len; |
1732 | 5 | int64_t frame_len; |
1733 | 5 | int rc; |
1734 | 5 | blosc2_schunk* schunk = calloc(1, sizeof(blosc2_schunk)); |
1735 | 5 | schunk->frame = (blosc2_frame*)frame; |
1736 | 5 | frame->schunk = schunk; |
1737 | | |
1738 | 5 | rc = get_header_info(frame, &header_len, &frame_len, &schunk->nbytes, |
1739 | 5 | &schunk->cbytes, &schunk->blocksize, |
1740 | 5 | &schunk->chunksize, &schunk->nchunks, &schunk->typesize, |
1741 | 5 | &schunk->compcode, &schunk->compcode_meta, &schunk->clevel, schunk->filters, |
1742 | 5 | schunk->filters_meta, &schunk->splitmode, udio); |
1743 | 5 | if (rc < 0) { |
1744 | 0 | BLOSC_TRACE_ERROR("Unable to get meta info from frame."); |
1745 | 0 | blosc2_schunk_free(schunk); |
1746 | 0 | return NULL; |
1747 | 0 | } |
1748 | 5 | int64_t nchunks = schunk->nchunks; |
1749 | 5 | int64_t nbytes = schunk->nbytes; |
1750 | 5 | (void) nbytes; |
1751 | 5 | int64_t cbytes = schunk->cbytes; |
1752 | | |
1753 | | // Compression and decompression contexts |
1754 | 5 | blosc2_cparams *cparams; |
1755 | 5 | blosc2_schunk_get_cparams(schunk, &cparams); |
1756 | 5 | schunk->cctx = blosc2_create_cctx(*cparams); |
1757 | 5 | if (schunk->cctx == NULL) { |
1758 | 0 | BLOSC_TRACE_ERROR("Error while creating the compression context"); |
1759 | 0 | return NULL; |
1760 | 0 | } |
1761 | 5 | blosc2_dparams *dparams; |
1762 | 5 | blosc2_schunk_get_dparams(schunk, &dparams); |
1763 | 5 | schunk->dctx = blosc2_create_dctx(*dparams); |
1764 | 5 | if (schunk->dctx == NULL) { |
1765 | 0 | BLOSC_TRACE_ERROR("Error while creating the decompression context"); |
1766 | 0 | return NULL; |
1767 | 0 | } |
1768 | 5 | blosc2_storage storage = {.contiguous = copy ? false : true}; |
1769 | 5 | schunk->storage = get_new_storage(&storage, cparams, dparams, udio); |
1770 | 5 | free(cparams); |
1771 | 5 | free(dparams); |
1772 | 5 | if (!copy) { |
1773 | 5 | goto out; |
1774 | 5 | } |
1775 | | |
1776 | | // We are not attached to a frame anymore |
1777 | 0 | schunk->frame = NULL; |
1778 | |
|
1779 | 0 | if (nchunks == 0) { |
1780 | 0 | frame->schunk = NULL; |
1781 | 0 | goto out; |
1782 | 0 | } |
1783 | | |
1784 | | // Get the compressed offsets |
1785 | 0 | int32_t coffsets_cbytes = 0; |
1786 | 0 | uint8_t* coffsets = get_coffsets(frame, header_len, cbytes, nchunks, &coffsets_cbytes); |
1787 | 0 | if (coffsets == NULL) { |
1788 | 0 | blosc2_schunk_free(schunk); |
1789 | 0 | BLOSC_TRACE_ERROR("Cannot get the offsets for the frame."); |
1790 | 0 | return NULL; |
1791 | 0 | } |
1792 | | |
1793 | | // Decompress offsets |
1794 | 0 | blosc2_dparams off_dparams = BLOSC2_DPARAMS_DEFAULTS; |
1795 | 0 | blosc2_context *dctx = blosc2_create_dctx(off_dparams); |
1796 | 0 | if (dctx == NULL) { |
1797 | 0 | BLOSC_TRACE_ERROR("Error while creating the decompression context"); |
1798 | 0 | return NULL; |
1799 | 0 | } |
1800 | 0 | int64_t* offsets = (int64_t *) malloc((size_t)nchunks * sizeof(int64_t)); |
1801 | 0 | int32_t off_nbytes = blosc2_decompress_ctx(dctx, coffsets, coffsets_cbytes, |
1802 | 0 | offsets, (int32_t)(nchunks * sizeof(int64_t))); |
1803 | 0 | blosc2_free_ctx(dctx); |
1804 | 0 | if (off_nbytes < 0) { |
1805 | 0 | free(offsets); |
1806 | 0 | blosc2_schunk_free(schunk); |
1807 | 0 | BLOSC_TRACE_ERROR("Cannot decompress the offsets chunk."); |
1808 | 0 | return NULL; |
1809 | 0 | } |
1810 | | |
1811 | | // We want the contiguous schunk, so create the actual data chunks (and, while doing this, |
1812 | | // get a guess at the blocksize used in this frame) |
1813 | 0 | int64_t acc_nbytes = 0; |
1814 | 0 | int64_t acc_cbytes = 0; |
1815 | 0 | int32_t blocksize = 0; |
1816 | 0 | int32_t chunk_nbytes; |
1817 | 0 | int32_t chunk_cbytes; |
1818 | 0 | int32_t chunk_blocksize; |
1819 | 0 | size_t prev_alloc = BLOSC_EXTENDED_HEADER_LENGTH; |
1820 | 0 | uint8_t* data_chunk = NULL; |
1821 | 0 | bool needs_free = false; |
1822 | 0 | const blosc2_io_cb *io_cb = blosc2_get_io_cb(udio->id); |
1823 | 0 | if (io_cb == NULL) { |
1824 | 0 | blosc2_schunk_free(schunk); |
1825 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
1826 | 0 | return NULL; |
1827 | 0 | } |
1828 | | |
1829 | 0 | void* fp = NULL; |
1830 | 0 | if (frame->cframe == NULL) { |
1831 | 0 | data_chunk = malloc((size_t)prev_alloc); |
1832 | 0 | needs_free = true; |
1833 | 0 | if (!frame->sframe) { |
1834 | | // If not the chunks won't be in the frame |
1835 | 0 | fp = io_cb->open(frame->urlpath, "rb", udio->params); |
1836 | 0 | if (fp == NULL) { |
1837 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
1838 | 0 | rc = BLOSC2_ERROR_FILE_OPEN; |
1839 | 0 | goto end; |
1840 | 0 | } |
1841 | 0 | } |
1842 | 0 | } |
1843 | 0 | schunk->data = malloc(nchunks * sizeof(void*)); |
1844 | 0 | for (int i = 0; i < nchunks; i++) { |
1845 | 0 | if (frame->cframe != NULL) { |
1846 | 0 | if (needs_free) { |
1847 | 0 | free(data_chunk); |
1848 | 0 | } |
1849 | 0 | if (offsets[i] < 0) { |
1850 | 0 | int64_t rbytes = frame_get_chunk(frame, i, &data_chunk, &needs_free); |
1851 | 0 | if (rbytes < 0) { |
1852 | 0 | break; |
1853 | 0 | } |
1854 | 0 | } |
1855 | 0 | else { |
1856 | 0 | data_chunk = frame->cframe + header_len + offsets[i]; |
1857 | 0 | } |
1858 | 0 | rc = blosc2_cbuffer_sizes(data_chunk, NULL, &chunk_cbytes, NULL); |
1859 | 0 | if (rc < 0) { |
1860 | 0 | break; |
1861 | 0 | } |
1862 | 0 | } |
1863 | 0 | else { |
1864 | 0 | int64_t rbytes; |
1865 | 0 | if (frame->sframe) { |
1866 | 0 | if (needs_free) { |
1867 | 0 | free(data_chunk); |
1868 | 0 | } |
1869 | 0 | rbytes = frame_get_chunk(frame, i, &data_chunk, &needs_free); |
1870 | 0 | if (rbytes < 0) { |
1871 | 0 | break; |
1872 | 0 | } |
1873 | 0 | } |
1874 | 0 | else { |
1875 | 0 | io_cb->seek(fp, frame->file_offset + header_len + offsets[i], SEEK_SET); |
1876 | 0 | rbytes = io_cb->read(data_chunk, 1, BLOSC_EXTENDED_HEADER_LENGTH, fp); |
1877 | 0 | } |
1878 | 0 | if (rbytes != BLOSC_EXTENDED_HEADER_LENGTH) { |
1879 | 0 | rc = BLOSC2_ERROR_READ_BUFFER; |
1880 | 0 | break; |
1881 | 0 | } |
1882 | 0 | rc = blosc2_cbuffer_sizes(data_chunk, NULL, &chunk_cbytes, NULL); |
1883 | 0 | if (rc < 0) { |
1884 | 0 | break; |
1885 | 0 | } |
1886 | 0 | if (chunk_cbytes > (int32_t)prev_alloc) { |
1887 | 0 | data_chunk = realloc(data_chunk, chunk_cbytes); |
1888 | 0 | prev_alloc = chunk_cbytes; |
1889 | 0 | } |
1890 | 0 | if (!frame->sframe) { |
1891 | 0 | io_cb->seek(fp, frame->file_offset + header_len + offsets[i], SEEK_SET); |
1892 | 0 | rbytes = io_cb->read(data_chunk, 1, chunk_cbytes, fp); |
1893 | 0 | if (rbytes != chunk_cbytes) { |
1894 | 0 | rc = BLOSC2_ERROR_READ_BUFFER; |
1895 | 0 | break; |
1896 | 0 | } |
1897 | 0 | } |
1898 | 0 | } |
1899 | 0 | uint8_t* new_chunk = malloc(chunk_cbytes); |
1900 | 0 | memcpy(new_chunk, data_chunk, chunk_cbytes); |
1901 | 0 | schunk->data[i] = new_chunk; |
1902 | 0 | rc = blosc2_cbuffer_sizes(data_chunk, &chunk_nbytes, NULL, &chunk_blocksize); |
1903 | 0 | if (rc < 0) { |
1904 | 0 | break; |
1905 | 0 | } |
1906 | 0 | acc_nbytes += chunk_nbytes; |
1907 | 0 | acc_cbytes += chunk_cbytes; |
1908 | 0 | if (i == 0) { |
1909 | 0 | blocksize = chunk_blocksize; |
1910 | 0 | } |
1911 | 0 | else if (blocksize != chunk_blocksize) { |
1912 | | // Blocksize varies |
1913 | 0 | blocksize = 0; |
1914 | 0 | } |
1915 | 0 | } |
1916 | | |
1917 | | // We are not attached to a schunk anymore |
1918 | 0 | frame->schunk = NULL; |
1919 | |
|
1920 | 0 | end: |
1921 | 0 | if (needs_free) { |
1922 | 0 | free(data_chunk); |
1923 | 0 | } |
1924 | 0 | if (frame->cframe == NULL) { |
1925 | 0 | if (!frame->sframe) { |
1926 | 0 | io_cb->close(fp); |
1927 | 0 | } |
1928 | 0 | } |
1929 | 0 | free(offsets); |
1930 | | |
1931 | | // cframes and sframes have different ways to store chunks with special values: |
1932 | | // 1) cframes represent special chunks as negative offsets |
1933 | | // 2) sframes does not have the concept of offsets, but rather of data pointers (.data) |
1934 | | // so they always have a pointer to a special chunk |
1935 | | // This is why cframes and sframes have different cbytes and hence, we cannot enforce acc_bytes == schunk->cbytes |
1936 | | // In the future, maybe we could provide special meanings for .data[i] > 0x7FFFFFFF, but not there yet |
1937 | | // if (rc < 0 || acc_nbytes != nbytes || acc_cbytes != cbytes) { |
1938 | 0 | if (rc < 0 || acc_nbytes != nbytes) { |
1939 | 0 | blosc2_schunk_free(schunk); |
1940 | 0 | return NULL; |
1941 | 0 | } |
1942 | | // Update counters |
1943 | 0 | schunk->cbytes = acc_cbytes; |
1944 | 0 | schunk->blocksize = blocksize; |
1945 | |
|
1946 | 5 | out: |
1947 | 5 | rc = frame_get_metalayers(frame, schunk); |
1948 | 5 | if (rc < 0) { |
1949 | 0 | blosc2_schunk_free(schunk); |
1950 | 0 | BLOSC_TRACE_ERROR("Cannot access the metalayers."); |
1951 | 0 | return NULL; |
1952 | 0 | } |
1953 | | |
1954 | 5 | rc = frame_get_vlmetalayers(frame, schunk); |
1955 | 5 | if (rc < 0) { |
1956 | 0 | blosc2_schunk_free(schunk); |
1957 | 0 | BLOSC_TRACE_ERROR("Cannot access the vlmetalayers."); |
1958 | 0 | return NULL; |
1959 | 0 | } |
1960 | | |
1961 | 5 | return schunk; |
1962 | 5 | } |
1963 | | |
1964 | | |
1965 | 0 | void frame_avoid_cframe_free(blosc2_frame_s* frame, bool avoid_cframe_free) { |
1966 | 0 | frame->avoid_cframe_free = avoid_cframe_free; |
1967 | 0 | } |
1968 | | |
1969 | | |
1970 | | struct csize_idx { |
1971 | | int32_t val; |
1972 | | int32_t idx; |
1973 | | }; |
1974 | | |
1975 | | // Helper function for qsorting block offsets |
1976 | 0 | int sort_offset(const void* a, const void* b) { |
1977 | 0 | int32_t a_ = ((struct csize_idx*)a)->val; |
1978 | 0 | int32_t b_ = ((struct csize_idx*)b)->val; |
1979 | 0 | return a_ - b_; |
1980 | 0 | } |
1981 | | |
1982 | | |
1983 | | int get_coffset(blosc2_frame_s* frame, int32_t header_len, int64_t cbytes, |
1984 | 21 | int64_t nchunk, int64_t nchunks, int64_t *offset) { |
1985 | 21 | int32_t off_cbytes; |
1986 | | // Get the offset to nchunk |
1987 | 21 | uint8_t *coffsets = get_coffsets(frame, header_len, cbytes, nchunks, &off_cbytes); |
1988 | 21 | if (coffsets == NULL) { |
1989 | 0 | BLOSC_TRACE_ERROR("Cannot get the offset for chunk %" PRId64 " for the frame.", nchunk); |
1990 | 0 | return BLOSC2_ERROR_DATA; |
1991 | 0 | } |
1992 | | |
1993 | | // Get the 64-bit offset |
1994 | 21 | int rc = blosc2_getitem(coffsets, off_cbytes, (int32_t)nchunk, 1, offset, (int32_t)sizeof(int64_t)); |
1995 | 21 | if (rc < 0) { |
1996 | 0 | BLOSC_TRACE_ERROR("Problems retrieving a chunk offset."); |
1997 | 21 | } else if (!frame->sframe && *offset > frame->len) { |
1998 | 0 | BLOSC_TRACE_ERROR("Cannot read chunk %" PRId64 " outside of frame boundary.", nchunk); |
1999 | 0 | rc = BLOSC2_ERROR_READ_BUFFER; |
2000 | 0 | } |
2001 | | |
2002 | 0 | return rc; |
2003 | 21 | } |
2004 | | |
2005 | | |
2006 | | // Detect and return a chunk with special values in offsets (only zeros, NaNs and non initialized) |
2007 | | int frame_special_chunk(int64_t special_value, int32_t nbytes, int32_t typesize, int32_t blocksize, |
2008 | 5 | uint8_t** chunk, int32_t cbytes, bool *needs_free) { |
2009 | 5 | int rc = 0; |
2010 | 5 | *chunk = malloc(cbytes); |
2011 | 5 | *needs_free = true; |
2012 | | |
2013 | | // Detect the kind of special value |
2014 | 5 | uint64_t zeros_mask = (uint64_t) BLOSC2_SPECIAL_ZERO << (8 * 7); // chunk of zeros |
2015 | 5 | uint64_t nans_mask = (uint64_t) BLOSC2_SPECIAL_NAN << (8 * 7); // chunk of NaNs |
2016 | 5 | uint64_t uninit_mask = (uint64_t) BLOSC2_SPECIAL_UNINIT << (8 * 7); // chunk of uninit values |
2017 | | |
2018 | 5 | blosc2_cparams cparams = BLOSC2_CPARAMS_DEFAULTS; |
2019 | 5 | cparams.typesize = typesize; |
2020 | 5 | cparams.blocksize = blocksize; |
2021 | 5 | if (special_value & zeros_mask) { |
2022 | 5 | rc = blosc2_chunk_zeros(cparams, nbytes, *chunk, cbytes); |
2023 | 5 | if (rc < 0) { |
2024 | 0 | BLOSC_TRACE_ERROR("Error creating a zero chunk"); |
2025 | 0 | } |
2026 | 5 | } |
2027 | 0 | else if (special_value & uninit_mask) { |
2028 | 0 | rc = blosc2_chunk_uninit(cparams, nbytes, *chunk, cbytes); |
2029 | 0 | if (rc < 0) { |
2030 | 0 | BLOSC_TRACE_ERROR("Error creating a non initialized chunk"); |
2031 | 0 | } |
2032 | 0 | } |
2033 | 0 | else if (special_value & nans_mask) { |
2034 | 0 | rc = blosc2_chunk_nans(cparams, nbytes, *chunk, cbytes); |
2035 | 0 | if (rc < 0) { |
2036 | 0 | BLOSC_TRACE_ERROR("Error creating a nan chunk"); |
2037 | 0 | } |
2038 | 0 | } |
2039 | 0 | else { |
2040 | 0 | BLOSC_TRACE_ERROR("Special value not recognized: %" PRId64 "", special_value); |
2041 | 0 | rc = BLOSC2_ERROR_DATA; |
2042 | 0 | } |
2043 | | |
2044 | 5 | if (rc < 0) { |
2045 | 0 | free(*chunk); |
2046 | 0 | *needs_free = false; |
2047 | 0 | *chunk = NULL; |
2048 | 0 | } |
2049 | | |
2050 | 5 | return rc; |
2051 | 5 | } |
2052 | | |
2053 | | |
2054 | | /* Return a compressed chunk that is part of a frame in the `chunk` parameter. |
2055 | | * If the frame is disk-based, a buffer is allocated for the (compressed) chunk, |
2056 | | * and hence a free is needed. You can check if the chunk requires a free with the `needs_free` |
2057 | | * parameter. |
2058 | | * If the chunk does not need a free, it means that a pointer to the location in frame is returned |
2059 | | * in the `chunk` parameter. |
2060 | | * |
2061 | | * The size of the (compressed) chunk is returned. If some problem is detected, a negative code |
2062 | | * is returned instead. |
2063 | | */ |
2064 | 0 | int frame_get_chunk(blosc2_frame_s *frame, int64_t nchunk, uint8_t **chunk, bool *needs_free) { |
2065 | 0 | int32_t header_len; |
2066 | 0 | int64_t frame_len; |
2067 | 0 | int64_t nbytes; |
2068 | 0 | int64_t cbytes; |
2069 | 0 | int32_t blocksize; |
2070 | 0 | int32_t chunksize; |
2071 | 0 | int64_t nchunks; |
2072 | 0 | int32_t typesize; |
2073 | 0 | int64_t offset; |
2074 | 0 | int32_t chunk_cbytes; |
2075 | 0 | int rc; |
2076 | |
|
2077 | 0 | *chunk = NULL; |
2078 | 0 | *needs_free = false; |
2079 | 0 | rc = get_header_info(frame, &header_len, &frame_len, &nbytes, &cbytes, |
2080 | 0 | &blocksize, &chunksize, &nchunks, |
2081 | 0 | &typesize, NULL, NULL, NULL, NULL, NULL, NULL, |
2082 | 0 | frame->schunk->storage->io); |
2083 | 0 | if (rc < 0) { |
2084 | 0 | BLOSC_TRACE_ERROR("Unable to get meta info from frame."); |
2085 | 0 | return rc; |
2086 | 0 | } |
2087 | | |
2088 | 0 | if (nchunk >= nchunks) { |
2089 | 0 | BLOSC_TRACE_ERROR("nchunk ('%" PRId64 "') exceeds the number of chunks " |
2090 | 0 | "('%" PRId64 "') in frame.", nchunk, nchunks); |
2091 | 0 | return BLOSC2_ERROR_INVALID_PARAM; |
2092 | 0 | } |
2093 | | |
2094 | | // Get the offset to nchunk |
2095 | 0 | rc = get_coffset(frame, header_len, cbytes, nchunk, nchunks, &offset); |
2096 | 0 | if (rc < 0) { |
2097 | 0 | BLOSC_TRACE_ERROR("Unable to get offset to chunk %" PRId64 ".", nchunk); |
2098 | 0 | return rc; |
2099 | 0 | } |
2100 | | |
2101 | 0 | if (offset < 0) { |
2102 | | // Special value |
2103 | 0 | chunk_cbytes = BLOSC_EXTENDED_HEADER_LENGTH; |
2104 | 0 | int32_t chunksize_ = chunksize; |
2105 | 0 | if ((nchunk == nchunks - 1) && (nbytes % chunksize)) { |
2106 | | // Last chunk is incomplete. Compute its actual size. |
2107 | 0 | chunksize_ = (int32_t) (nbytes % chunksize); |
2108 | 0 | } |
2109 | 0 | rc = frame_special_chunk(offset, chunksize_, typesize, blocksize, chunk, chunk_cbytes, needs_free); |
2110 | 0 | if (rc < 0) { |
2111 | 0 | return rc; |
2112 | 0 | } |
2113 | 0 | goto end; |
2114 | 0 | } |
2115 | | |
2116 | 0 | if (frame->sframe) { |
2117 | | // Sparse on-disk |
2118 | 0 | nchunk = offset; |
2119 | 0 | return sframe_get_chunk(frame, nchunk, chunk, needs_free); |
2120 | 0 | } |
2121 | | |
2122 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
2123 | 0 | if (io_cb == NULL) { |
2124 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
2125 | 0 | return BLOSC2_ERROR_PLUGIN_IO; |
2126 | 0 | } |
2127 | | |
2128 | 0 | if (frame->cframe == NULL) { |
2129 | 0 | uint8_t header[BLOSC_EXTENDED_HEADER_LENGTH]; |
2130 | 0 | void* fp = io_cb->open(frame->urlpath, "rb", frame->schunk->storage->io->params); |
2131 | 0 | if (fp == NULL) { |
2132 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
2133 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
2134 | 0 | } |
2135 | 0 | io_cb->seek(fp, frame->file_offset + header_len + offset, SEEK_SET); |
2136 | 0 | int64_t rbytes = io_cb->read(header, 1, sizeof(header), fp); |
2137 | 0 | if (rbytes != sizeof(header)) { |
2138 | 0 | BLOSC_TRACE_ERROR("Cannot read the cbytes for chunk in the frame."); |
2139 | 0 | io_cb->close(fp); |
2140 | 0 | return BLOSC2_ERROR_FILE_READ; |
2141 | 0 | } |
2142 | 0 | rc = blosc2_cbuffer_sizes(header, NULL, &chunk_cbytes, NULL); |
2143 | 0 | if (rc < 0) { |
2144 | 0 | BLOSC_TRACE_ERROR("Cannot read the cbytes for chunk in the frame."); |
2145 | 0 | io_cb->close(fp); |
2146 | 0 | return rc; |
2147 | 0 | } |
2148 | 0 | *chunk = malloc(chunk_cbytes); |
2149 | 0 | io_cb->seek(fp, frame->file_offset + header_len + offset, SEEK_SET); |
2150 | 0 | rbytes = io_cb->read(*chunk, 1, chunk_cbytes, fp); |
2151 | 0 | io_cb->close(fp); |
2152 | 0 | if (rbytes != chunk_cbytes) { |
2153 | 0 | BLOSC_TRACE_ERROR("Cannot read the chunk out of the frame."); |
2154 | 0 | return BLOSC2_ERROR_FILE_READ; |
2155 | 0 | } |
2156 | 0 | *needs_free = true; |
2157 | 0 | } else { |
2158 | | // The chunk is in memory and just one pointer away |
2159 | 0 | *chunk = frame->cframe + header_len + offset; |
2160 | 0 | rc = blosc2_cbuffer_sizes(*chunk, NULL, &chunk_cbytes, NULL); |
2161 | 0 | if (rc < 0) { |
2162 | 0 | return rc; |
2163 | 0 | } |
2164 | 0 | } |
2165 | | |
2166 | 0 | end: |
2167 | 0 | return (int32_t)chunk_cbytes; |
2168 | 0 | } |
2169 | | |
2170 | | |
2171 | | /* Return a compressed chunk that is part of a frame in the `chunk` parameter. |
2172 | | * If the frame is disk-based, a buffer is allocated for the (lazy) chunk, |
2173 | | * and hence a free is needed. You can check if the chunk requires a free with the `needs_free` |
2174 | | * parameter. |
2175 | | * If the chunk does not need a free, it means that the frame is in memory and that just a |
2176 | | * pointer to the location of the chunk in memory is returned. |
2177 | | * |
2178 | | * The size of the (compressed, potentially lazy) chunk is returned. If some problem is detected, |
2179 | | * a negative code is returned instead. |
2180 | | */ |
2181 | 21 | int frame_get_lazychunk(blosc2_frame_s *frame, int64_t nchunk, uint8_t **chunk, bool *needs_free) { |
2182 | 21 | int32_t header_len; |
2183 | 21 | int64_t frame_len; |
2184 | 21 | int64_t nbytes; |
2185 | 21 | int64_t cbytes; |
2186 | 21 | int32_t blocksize; |
2187 | 21 | int32_t chunksize; |
2188 | 21 | int64_t nchunks; |
2189 | 21 | int32_t typesize; |
2190 | 21 | int32_t lazychunk_cbytes; |
2191 | 21 | int64_t offset; |
2192 | 21 | void* fp = NULL; |
2193 | | |
2194 | 21 | *chunk = NULL; |
2195 | 21 | *needs_free = false; |
2196 | 21 | int rc = get_header_info(frame, &header_len, &frame_len, &nbytes, &cbytes, |
2197 | 21 | &blocksize, &chunksize, &nchunks, |
2198 | 21 | &typesize, NULL, NULL, NULL, NULL, NULL, NULL, |
2199 | 21 | frame->schunk->storage->io); |
2200 | 21 | if (rc < 0) { |
2201 | 0 | BLOSC_TRACE_ERROR("Unable to get meta info from frame."); |
2202 | 0 | return rc; |
2203 | 0 | } |
2204 | | |
2205 | 21 | if (nchunk >= nchunks) { |
2206 | 0 | BLOSC_TRACE_ERROR("nchunk ('%" PRId64 "') exceeds the number of chunks " |
2207 | 0 | "('%" PRId64 "') in frame.", nchunk, nchunks); |
2208 | 0 | return BLOSC2_ERROR_INVALID_PARAM; |
2209 | 0 | } |
2210 | | |
2211 | | // Get the offset to nchunk |
2212 | 21 | rc = get_coffset(frame, header_len, cbytes, nchunk, nchunks, &offset); |
2213 | 21 | if (rc < 0) { |
2214 | 0 | BLOSC_TRACE_ERROR("Unable to get offset to chunk %" PRId64 ".", nchunk); |
2215 | 0 | return rc; |
2216 | 0 | } |
2217 | | |
2218 | 21 | if (offset < 0) { |
2219 | | // Special value |
2220 | 5 | lazychunk_cbytes = BLOSC_EXTENDED_HEADER_LENGTH; |
2221 | 5 | int32_t chunksize_ = chunksize; |
2222 | 5 | if ((nchunk == nchunks - 1) && (nbytes % chunksize)) { |
2223 | | // Last chunk is incomplete. Compute its actual size. |
2224 | 0 | chunksize_ = (int32_t) (nbytes % chunksize); |
2225 | 0 | } |
2226 | 5 | rc = frame_special_chunk(offset, chunksize_, typesize, blocksize, chunk, |
2227 | 5 | (int32_t)lazychunk_cbytes, needs_free); |
2228 | 5 | goto end; |
2229 | 5 | } |
2230 | | |
2231 | 16 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
2232 | 16 | if (io_cb == NULL) { |
2233 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
2234 | 0 | rc = BLOSC2_ERROR_PLUGIN_IO; |
2235 | 0 | goto end; |
2236 | 0 | } |
2237 | | |
2238 | 16 | if (frame->cframe == NULL) { |
2239 | | // TODO: make this portable across different endianness |
2240 | | // Get info for building a lazy chunk |
2241 | 0 | int32_t chunk_nbytes; |
2242 | 0 | int32_t chunk_cbytes; |
2243 | 0 | int32_t chunk_blocksize; |
2244 | 0 | uint8_t header[BLOSC_EXTENDED_HEADER_LENGTH]; |
2245 | 0 | if (frame->sframe) { |
2246 | | // The chunk is not in the frame |
2247 | 0 | fp = sframe_open_chunk(frame->urlpath, offset, "rb", |
2248 | 0 | frame->schunk->storage->io); |
2249 | 0 | if (fp == NULL) { |
2250 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
2251 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
2252 | 0 | } |
2253 | 0 | } |
2254 | 0 | else { |
2255 | 0 | fp = io_cb->open(frame->urlpath, "rb", frame->schunk->storage->io->params); |
2256 | 0 | if (fp == NULL) { |
2257 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
2258 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
2259 | 0 | } |
2260 | 0 | io_cb->seek(fp, frame->file_offset + header_len + offset, SEEK_SET); |
2261 | 0 | } |
2262 | 0 | int64_t rbytes = io_cb->read(header, 1, BLOSC_EXTENDED_HEADER_LENGTH, fp); |
2263 | 0 | if (rbytes != BLOSC_EXTENDED_HEADER_LENGTH) { |
2264 | 0 | BLOSC_TRACE_ERROR("Cannot read the header for chunk in the frame."); |
2265 | 0 | rc = BLOSC2_ERROR_FILE_READ; |
2266 | 0 | goto end; |
2267 | 0 | } |
2268 | 0 | rc = blosc2_cbuffer_sizes(header, &chunk_nbytes, &chunk_cbytes, &chunk_blocksize); |
2269 | 0 | if (rc < 0) { |
2270 | 0 | goto end; |
2271 | 0 | } |
2272 | 0 | size_t nblocks = chunk_nbytes / chunk_blocksize; |
2273 | 0 | size_t leftover_block = chunk_nbytes % chunk_blocksize; |
2274 | 0 | nblocks = leftover_block ? nblocks + 1 : nblocks; |
2275 | | // Allocate space for the lazy chunk |
2276 | 0 | int32_t trailer_len; |
2277 | 0 | int32_t special_type = (header[BLOSC2_CHUNK_BLOSC2_FLAGS] >> 4) & BLOSC2_SPECIAL_MASK; |
2278 | 0 | int memcpyed = header[BLOSC2_CHUNK_FLAGS] & (uint8_t) BLOSC_MEMCPYED; |
2279 | |
|
2280 | 0 | int32_t trailer_offset = BLOSC_EXTENDED_HEADER_LENGTH; |
2281 | 0 | size_t streams_offset = BLOSC_EXTENDED_HEADER_LENGTH; |
2282 | 0 | if (special_type == 0) { |
2283 | | // Regular values have offsets for blocks |
2284 | 0 | trailer_offset += (int32_t) (nblocks * sizeof(int32_t)); |
2285 | 0 | if (memcpyed) { |
2286 | 0 | streams_offset += 0; |
2287 | 0 | } else { |
2288 | 0 | streams_offset += nblocks * sizeof(int32_t); |
2289 | 0 | } |
2290 | 0 | trailer_len = (int32_t) (sizeof(int32_t) + sizeof(int64_t) + nblocks * sizeof(int32_t)); |
2291 | 0 | lazychunk_cbytes = trailer_offset + trailer_len; |
2292 | 0 | } |
2293 | 0 | else if (special_type == BLOSC2_SPECIAL_VALUE) { |
2294 | 0 | trailer_offset += typesize; |
2295 | 0 | streams_offset += typesize; |
2296 | 0 | trailer_len = 0; |
2297 | 0 | lazychunk_cbytes = trailer_offset + trailer_len; |
2298 | 0 | } |
2299 | 0 | else { |
2300 | 0 | rc = BLOSC2_ERROR_INVALID_HEADER; |
2301 | 0 | goto end; |
2302 | 0 | } |
2303 | 0 | *chunk = malloc(lazychunk_cbytes); |
2304 | 0 | *needs_free = true; |
2305 | | |
2306 | | // Read just the full header and bstarts section too (lazy partial length) |
2307 | 0 | if (frame->sframe) { |
2308 | 0 | io_cb->seek(fp, 0, SEEK_SET); |
2309 | 0 | } |
2310 | 0 | else { |
2311 | 0 | io_cb->seek(fp, frame->file_offset + header_len + offset, SEEK_SET); |
2312 | 0 | } |
2313 | |
|
2314 | 0 | rbytes = io_cb->read(*chunk, 1, (int64_t)streams_offset, fp); |
2315 | 0 | if (rbytes != (int64_t)streams_offset) { |
2316 | 0 | BLOSC_TRACE_ERROR("Cannot read the (lazy) chunk out of the frame."); |
2317 | 0 | rc = BLOSC2_ERROR_FILE_READ; |
2318 | 0 | goto end; |
2319 | 0 | } |
2320 | 0 | if (special_type == BLOSC2_SPECIAL_VALUE) { |
2321 | | // Value runlen is not returning a lazy chunk. We are done. |
2322 | 0 | goto end; |
2323 | 0 | } |
2324 | | |
2325 | | // Mark chunk as lazy |
2326 | 0 | uint8_t* blosc2_flags = *chunk + BLOSC2_CHUNK_BLOSC2_FLAGS; |
2327 | 0 | *blosc2_flags |= 0x08U; |
2328 | | |
2329 | | // Add the trailer (currently, nchunk + offset + block_csizes) |
2330 | 0 | if (frame->sframe) { |
2331 | 0 | *(int32_t*)(*chunk + trailer_offset) = (int32_t)offset; // offset is nchunk for sframes |
2332 | 0 | *(int64_t*)(*chunk + trailer_offset + sizeof(int32_t)) = offset; |
2333 | 0 | } |
2334 | 0 | else { |
2335 | 0 | *(int32_t*)(*chunk + trailer_offset) = (int32_t)nchunk; |
2336 | 0 | *(int64_t*)(*chunk + trailer_offset + sizeof(int32_t)) = header_len + offset; |
2337 | 0 | } |
2338 | |
|
2339 | 0 | int32_t* block_csizes = malloc(nblocks * sizeof(int32_t)); |
2340 | |
|
2341 | 0 | if (memcpyed) { |
2342 | | // When memcpyed the blocksizes are trivial to compute |
2343 | 0 | for (int i = 0; i < (int)nblocks - 1; i++) { |
2344 | 0 | block_csizes[i] = (int)chunk_blocksize; |
2345 | 0 | } |
2346 | | // The last block could be incomplete, mainly due to the fact that the block size is not divisible |
2347 | | // by the typesize |
2348 | 0 | block_csizes[nblocks - 1] = (int32_t)leftover_block ? (int32_t)leftover_block : chunk_blocksize; |
2349 | 0 | } |
2350 | 0 | else { |
2351 | | // In regular, compressed chunks, we need to sort the bstarts (they can be out |
2352 | | // of order because of multi-threading), and get a reverse index too. |
2353 | 0 | memcpy(block_csizes, *chunk + BLOSC_EXTENDED_HEADER_LENGTH, nblocks * sizeof(int32_t)); |
2354 | | // Helper structure to keep track of original indexes |
2355 | 0 | struct csize_idx *csize_idx = malloc(nblocks * sizeof(struct csize_idx)); |
2356 | 0 | for (int n = 0; n < (int)nblocks; n++) { |
2357 | 0 | csize_idx[n].val = block_csizes[n]; |
2358 | 0 | csize_idx[n].idx = n; |
2359 | 0 | } |
2360 | 0 | qsort(csize_idx, nblocks, sizeof(struct csize_idx), &sort_offset); |
2361 | | // Compute the actual csizes |
2362 | 0 | int idx; |
2363 | 0 | for (int n = 0; n < (int)nblocks - 1; n++) { |
2364 | 0 | idx = csize_idx[n].idx; |
2365 | 0 | block_csizes[idx] = csize_idx[n + 1].val - csize_idx[n].val; |
2366 | 0 | } |
2367 | 0 | idx = csize_idx[nblocks - 1].idx; |
2368 | 0 | block_csizes[idx] = (int)chunk_cbytes - csize_idx[nblocks - 1].val; |
2369 | 0 | free(csize_idx); |
2370 | 0 | } |
2371 | | // Copy the csizes at the end of the trailer |
2372 | 0 | void *trailer_csizes = *chunk + lazychunk_cbytes - nblocks * sizeof(int32_t); |
2373 | 0 | memcpy(trailer_csizes, block_csizes, nblocks * sizeof(int32_t)); |
2374 | 0 | free(block_csizes); |
2375 | 16 | } else { |
2376 | | // The chunk is in memory and just one pointer away |
2377 | 16 | int64_t chunk_header_offset = header_len + offset; |
2378 | 16 | int64_t chunk_cbytes_offset = chunk_header_offset + BLOSC_MIN_HEADER_LENGTH; |
2379 | | |
2380 | 16 | *chunk = frame->cframe + chunk_header_offset; |
2381 | | |
2382 | 16 | if (chunk_cbytes_offset > frame->len) { |
2383 | 0 | BLOSC_TRACE_ERROR("Cannot read the header for chunk in the (contiguous) frame."); |
2384 | 0 | rc = BLOSC2_ERROR_READ_BUFFER; |
2385 | 16 | } else { |
2386 | 16 | rc = blosc2_cbuffer_sizes(*chunk, NULL, &lazychunk_cbytes, NULL); |
2387 | 16 | if (rc && chunk_cbytes_offset + lazychunk_cbytes > frame_len) { |
2388 | 0 | BLOSC_TRACE_ERROR("Compressed bytes exceed beyond frame length."); |
2389 | 0 | rc = BLOSC2_ERROR_READ_BUFFER; |
2390 | 0 | } |
2391 | 16 | } |
2392 | 16 | } |
2393 | | |
2394 | 21 | end: |
2395 | 21 | if (fp != NULL) { |
2396 | 0 | io_cb->close(fp); |
2397 | 0 | } |
2398 | 21 | if (rc < 0) { |
2399 | 0 | if (*needs_free) { |
2400 | 0 | free(*chunk); |
2401 | 0 | *chunk = NULL; |
2402 | 0 | *needs_free = false; |
2403 | 0 | } |
2404 | 0 | return rc; |
2405 | 0 | } |
2406 | | |
2407 | 21 | return (int)lazychunk_cbytes; |
2408 | 21 | } |
2409 | | |
2410 | | |
2411 | | /* Fill an empty frame with special values (fast path). */ |
2412 | | int64_t frame_fill_special(blosc2_frame_s* frame, int64_t nitems, int special_value, |
2413 | 0 | int32_t chunksize, blosc2_schunk* schunk) { |
2414 | 0 | int32_t header_len; |
2415 | 0 | int64_t frame_len; |
2416 | 0 | int64_t nbytes; |
2417 | 0 | int64_t cbytes; |
2418 | 0 | int32_t blocksize; |
2419 | 0 | int32_t typesize; |
2420 | 0 | int64_t nchunks; |
2421 | |
|
2422 | 0 | int rc = get_header_info(frame, &header_len, &frame_len, &nbytes, &cbytes, &blocksize, NULL, |
2423 | 0 | &nchunks, &typesize, NULL, NULL, NULL, NULL, NULL, NULL, |
2424 | 0 | schunk->storage->io); |
2425 | 0 | if (rc < 0) { |
2426 | 0 | BLOSC_TRACE_ERROR("Unable to get meta info from frame."); |
2427 | 0 | return BLOSC2_ERROR_DATA; |
2428 | 0 | } |
2429 | | |
2430 | 0 | if (nitems == 0) { |
2431 | 0 | return frame_len; |
2432 | 0 | } |
2433 | | |
2434 | 0 | if ((nitems / chunksize) > INT_MAX) { |
2435 | 0 | BLOSC_TRACE_ERROR("nitems is too large. Try increasing the chunksize."); |
2436 | 0 | return BLOSC2_ERROR_FRAME_SPECIAL; |
2437 | 0 | } |
2438 | | |
2439 | 0 | if ((nbytes > 0) || (cbytes > 0)) { |
2440 | 0 | BLOSC_TRACE_ERROR("Filling with special values only works on empty frames"); |
2441 | 0 | return BLOSC2_ERROR_FRAME_SPECIAL; |
2442 | 0 | } |
2443 | | |
2444 | | // Compute the number of chunks and the length of the offsets chunk |
2445 | 0 | int32_t chunkitems = chunksize / typesize; |
2446 | 0 | nchunks = nitems / chunkitems; |
2447 | 0 | int32_t leftover_items = (int32_t)(nitems % chunkitems); |
2448 | 0 | if (leftover_items) { |
2449 | 0 | nchunks += 1; |
2450 | 0 | } |
2451 | |
|
2452 | 0 | blosc2_cparams* cparams; |
2453 | 0 | blosc2_schunk_get_cparams(schunk, &cparams); |
2454 | | |
2455 | | // Build the offsets with a special chunk |
2456 | 0 | int new_off_cbytes = BLOSC_EXTENDED_HEADER_LENGTH + sizeof(int64_t); |
2457 | 0 | uint8_t* off_chunk = malloc(new_off_cbytes); |
2458 | 0 | uint64_t offset_value = ((uint64_t)1 << 63); |
2459 | 0 | uint8_t* sample_chunk = malloc(BLOSC_EXTENDED_HEADER_LENGTH); |
2460 | 0 | int csize; |
2461 | 0 | switch (special_value) { |
2462 | 0 | case BLOSC2_SPECIAL_ZERO: |
2463 | 0 | offset_value += (uint64_t) BLOSC2_SPECIAL_ZERO << (8 * 7); |
2464 | 0 | csize = blosc2_chunk_zeros(*cparams, chunksize, sample_chunk, BLOSC_EXTENDED_HEADER_LENGTH); |
2465 | 0 | break; |
2466 | 0 | case BLOSC2_SPECIAL_UNINIT: |
2467 | 0 | offset_value += (uint64_t) BLOSC2_SPECIAL_UNINIT << (8 * 7); |
2468 | 0 | csize = blosc2_chunk_uninit(*cparams, chunksize, sample_chunk, BLOSC_EXTENDED_HEADER_LENGTH); |
2469 | 0 | break; |
2470 | 0 | case BLOSC2_SPECIAL_NAN: |
2471 | 0 | offset_value += (uint64_t)BLOSC2_SPECIAL_NAN << (8 * 7); |
2472 | 0 | csize = blosc2_chunk_nans(*cparams, chunksize, sample_chunk, BLOSC_EXTENDED_HEADER_LENGTH); |
2473 | 0 | break; |
2474 | 0 | default: |
2475 | 0 | BLOSC_TRACE_ERROR("Only zeros, NaNs or non-initialized values are supported."); |
2476 | 0 | return BLOSC2_ERROR_FRAME_SPECIAL; |
2477 | 0 | } |
2478 | 0 | if (csize < 0) { |
2479 | 0 | BLOSC_TRACE_ERROR("Error creating sample chunk"); |
2480 | 0 | return BLOSC2_ERROR_FRAME_SPECIAL; |
2481 | 0 | } |
2482 | 0 | cparams->typesize = sizeof(int64_t); // change it to offsets typesize |
2483 | | // cparams->blocksize = 0; // automatic blocksize |
2484 | 0 | cparams->blocksize = 8 * 2 * 1024; // based on experiments with create_frame.c bench |
2485 | 0 | cparams->clevel = 5; |
2486 | 0 | cparams->compcode = BLOSC_BLOSCLZ; |
2487 | 0 | int32_t special_nbytes = (int32_t) (nchunks * sizeof(int64_t)); |
2488 | 0 | rc = blosc2_chunk_repeatval(*cparams, special_nbytes, off_chunk, new_off_cbytes, &offset_value); |
2489 | 0 | free(cparams); |
2490 | 0 | if (rc < 0) { |
2491 | 0 | BLOSC_TRACE_ERROR("Error creating a special offsets chunk"); |
2492 | 0 | return BLOSC2_ERROR_DATA; |
2493 | 0 | } |
2494 | | |
2495 | | // Get the blocksize associated to the sample chunk |
2496 | 0 | blosc2_cbuffer_sizes(sample_chunk, NULL, NULL, &blocksize); |
2497 | 0 | free(sample_chunk); |
2498 | | // and use it for the super-chunk |
2499 | 0 | schunk->blocksize = blocksize; |
2500 | | // schunk->blocksize = 0; // for experimenting with automatic blocksize |
2501 | | |
2502 | | // We have the new offsets; update the frame. |
2503 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
2504 | 0 | if (io_cb == NULL) { |
2505 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
2506 | 0 | return BLOSC2_ERROR_PLUGIN_IO; |
2507 | 0 | } |
2508 | | |
2509 | 0 | int64_t new_frame_len = header_len + new_off_cbytes + frame->trailer_len; |
2510 | 0 | void* fp = NULL; |
2511 | 0 | if (frame->cframe != NULL) { |
2512 | 0 | uint8_t* framep = frame->cframe; |
2513 | | /* Make space for the new chunk and copy it */ |
2514 | 0 | frame->cframe = framep = realloc(framep, (size_t)new_frame_len); |
2515 | 0 | if (framep == NULL) { |
2516 | 0 | BLOSC_TRACE_ERROR("Cannot realloc space for the frame."); |
2517 | 0 | return BLOSC2_ERROR_FRAME_SPECIAL; |
2518 | 0 | } |
2519 | | /* Copy the offsets */ |
2520 | 0 | memcpy(framep + header_len, off_chunk, (size_t)new_off_cbytes); |
2521 | 0 | } |
2522 | 0 | else { |
2523 | 0 | size_t wbytes; |
2524 | 0 | if (frame->sframe) { |
2525 | | // Update the offsets chunk in the chunks frame |
2526 | 0 | fp = sframe_open_index(frame->urlpath, "rb+", frame->schunk->storage->io); |
2527 | 0 | if (fp == NULL) { |
2528 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
2529 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
2530 | 0 | } |
2531 | 0 | io_cb->seek(fp, frame->file_offset + header_len, SEEK_SET); |
2532 | 0 | } |
2533 | 0 | else { |
2534 | | // Regular frame |
2535 | 0 | fp = io_cb->open(frame->urlpath, "rb+", schunk->storage->io->params); |
2536 | 0 | if (fp == NULL) { |
2537 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
2538 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
2539 | 0 | } |
2540 | 0 | io_cb->seek(fp, frame->file_offset + header_len + cbytes, SEEK_SET); |
2541 | 0 | } |
2542 | 0 | wbytes = io_cb->write(off_chunk, 1, new_off_cbytes, fp); // the new offsets |
2543 | 0 | io_cb->close(fp); |
2544 | 0 | if (wbytes != (size_t)new_off_cbytes) { |
2545 | 0 | BLOSC_TRACE_ERROR("Cannot write the offsets to frame."); |
2546 | 0 | return BLOSC2_ERROR_FRAME_SPECIAL; |
2547 | 0 | } |
2548 | 0 | } |
2549 | | |
2550 | | // Invalidate the cache for chunk offsets |
2551 | 0 | if (frame->coffsets != NULL) { |
2552 | 0 | free(frame->coffsets); |
2553 | 0 | frame->coffsets = NULL; |
2554 | 0 | } |
2555 | 0 | free(off_chunk); |
2556 | |
|
2557 | 0 | frame->len = new_frame_len; |
2558 | 0 | rc = frame_update_header(frame, schunk, false); |
2559 | 0 | if (rc < 0) { |
2560 | 0 | return BLOSC2_ERROR_FRAME_SPECIAL; |
2561 | 0 | } |
2562 | | |
2563 | 0 | rc = frame_update_trailer(frame, schunk); |
2564 | 0 | if (rc < 0) { |
2565 | 0 | return BLOSC2_ERROR_FRAME_SPECIAL; |
2566 | 0 | } |
2567 | | |
2568 | 0 | return frame->len; |
2569 | 0 | } |
2570 | | |
2571 | | |
2572 | | /* Append an existing chunk into a frame. */ |
2573 | 0 | void* frame_append_chunk(blosc2_frame_s* frame, void* chunk, blosc2_schunk* schunk) { |
2574 | 0 | int8_t* chunk_ = chunk; |
2575 | 0 | int32_t header_len; |
2576 | 0 | int64_t frame_len; |
2577 | 0 | int64_t nbytes; |
2578 | 0 | int64_t cbytes; |
2579 | 0 | int32_t blocksize; |
2580 | 0 | int32_t chunksize; |
2581 | 0 | int64_t nchunks; |
2582 | 0 | int rc = get_header_info(frame, &header_len, &frame_len, &nbytes, &cbytes, &blocksize, &chunksize, |
2583 | 0 | &nchunks, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
2584 | 0 | frame->schunk->storage->io); |
2585 | 0 | if (rc < 0) { |
2586 | 0 | BLOSC_TRACE_ERROR("Unable to get meta info from frame."); |
2587 | 0 | return NULL; |
2588 | 0 | } |
2589 | | |
2590 | | /* The uncompressed and compressed sizes start at byte 4 and 12 */ |
2591 | 0 | int32_t chunk_nbytes; |
2592 | 0 | int32_t chunk_cbytes; |
2593 | 0 | rc = blosc2_cbuffer_sizes(chunk, &chunk_nbytes, &chunk_cbytes, NULL); |
2594 | 0 | if (rc < 0) { |
2595 | 0 | return NULL; |
2596 | 0 | } |
2597 | | |
2598 | 0 | if ((nchunks > 0) && (chunk_nbytes > chunksize)) { |
2599 | 0 | BLOSC_TRACE_ERROR("Appending chunks with a larger chunksize than frame is " |
2600 | 0 | "not allowed yet %d != %d.", chunk_nbytes, chunksize); |
2601 | 0 | return NULL; |
2602 | 0 | } |
2603 | | |
2604 | | // Check that we are not appending a small chunk after another small chunk |
2605 | 0 | int32_t chunk_nbytes_last; |
2606 | 0 | if (chunksize == 0 && (nchunks > 0) && (chunk_nbytes < chunksize)) { |
2607 | 0 | uint8_t* last_chunk; |
2608 | 0 | bool needs_free; |
2609 | 0 | rc = frame_get_lazychunk(frame, nchunks - 1, &last_chunk, &needs_free); |
2610 | 0 | if (rc < 0) { |
2611 | 0 | BLOSC_TRACE_ERROR("Cannot get the last chunk (in position %" PRId64 ").", nchunks - 1); |
2612 | 0 | } else { |
2613 | 0 | rc = blosc2_cbuffer_sizes(last_chunk, &chunk_nbytes_last, NULL, NULL); |
2614 | 0 | } |
2615 | 0 | if (needs_free) { |
2616 | 0 | free(last_chunk); |
2617 | 0 | } |
2618 | 0 | if (rc < 0) { |
2619 | 0 | return NULL; |
2620 | 0 | } |
2621 | 0 | if ((chunk_nbytes_last < chunksize) && (nbytes < chunksize)) { |
2622 | 0 | BLOSC_TRACE_ERROR("Appending two consecutive chunks with a chunksize smaller " |
2623 | 0 | "than the frame chunksize is not allowed yet: %d != %d.", |
2624 | 0 | chunk_nbytes, chunksize); |
2625 | 0 | return NULL; |
2626 | 0 | } |
2627 | 0 | } |
2628 | | |
2629 | | // Get the current offsets and add one more |
2630 | 0 | int32_t off_nbytes = (int32_t) ((nchunks + 1) * sizeof(int64_t)); |
2631 | 0 | int64_t* offsets = (int64_t *) malloc((size_t)off_nbytes); |
2632 | 0 | if (nchunks > 0) { |
2633 | 0 | int32_t coffsets_cbytes; |
2634 | 0 | uint8_t *coffsets = get_coffsets(frame, header_len, cbytes, nchunks, &coffsets_cbytes); |
2635 | 0 | if (coffsets == NULL) { |
2636 | 0 | BLOSC_TRACE_ERROR("Cannot get the offsets for the frame."); |
2637 | 0 | free(offsets); |
2638 | 0 | return NULL; |
2639 | 0 | } |
2640 | 0 | if (coffsets_cbytes == 0) { |
2641 | 0 | coffsets_cbytes = (int32_t)cbytes; |
2642 | 0 | } |
2643 | | |
2644 | | // Decompress offsets |
2645 | 0 | blosc2_dparams off_dparams = BLOSC2_DPARAMS_DEFAULTS; |
2646 | 0 | blosc2_context *dctx = blosc2_create_dctx(off_dparams); |
2647 | 0 | if (dctx == NULL) { |
2648 | 0 | BLOSC_TRACE_ERROR("Error while creating the decompression context"); |
2649 | 0 | return NULL; |
2650 | 0 | } |
2651 | 0 | int32_t prev_nbytes = blosc2_decompress_ctx(dctx, coffsets, coffsets_cbytes, offsets, |
2652 | 0 | off_nbytes); |
2653 | 0 | blosc2_free_ctx(dctx); |
2654 | 0 | if (prev_nbytes < 0) { |
2655 | 0 | free(offsets); |
2656 | 0 | BLOSC_TRACE_ERROR("Cannot decompress the offsets chunk."); |
2657 | 0 | return NULL; |
2658 | 0 | } |
2659 | 0 | } |
2660 | | |
2661 | | // Add the new offset |
2662 | 0 | int64_t sframe_chunk_id = -1; |
2663 | 0 | int special_value = (chunk_[BLOSC2_CHUNK_BLOSC2_FLAGS] >> 4) & BLOSC2_SPECIAL_MASK; |
2664 | 0 | uint64_t offset_value = ((uint64_t)1 << 63); |
2665 | 0 | switch (special_value) { |
2666 | 0 | case BLOSC2_SPECIAL_ZERO: |
2667 | | // Zero chunk. Code it in a special way. |
2668 | 0 | offset_value += (uint64_t) BLOSC2_SPECIAL_ZERO << (8 * 7); // chunk of zeros |
2669 | 0 | to_little(offsets + nchunks, &offset_value, sizeof(uint64_t)); |
2670 | 0 | chunk_cbytes = 0; // we don't need to store the chunk |
2671 | 0 | break; |
2672 | 0 | case BLOSC2_SPECIAL_UNINIT: |
2673 | | // Non initizalized values chunk. Code it in a special way. |
2674 | 0 | offset_value += (uint64_t) BLOSC2_SPECIAL_UNINIT << (8 * 7); // chunk of uninit values |
2675 | 0 | to_little(offsets + nchunks, &offset_value, sizeof(uint64_t)); |
2676 | 0 | chunk_cbytes = 0; // we don't need to store the chunk |
2677 | 0 | break; |
2678 | 0 | case BLOSC2_SPECIAL_NAN: |
2679 | | // NaN chunk. Code it in a special way. |
2680 | 0 | offset_value += (uint64_t)BLOSC2_SPECIAL_NAN << (8 * 7); // chunk of NANs |
2681 | 0 | to_little(offsets + nchunks, &offset_value, sizeof(uint64_t)); |
2682 | 0 | chunk_cbytes = 0; // we don't need to store the chunk |
2683 | 0 | break; |
2684 | 0 | default: |
2685 | 0 | if (frame->sframe) { |
2686 | | // Compute the sframe_chunk_id value |
2687 | 0 | for (int i = 0; i < nchunks; ++i) { |
2688 | 0 | if (offsets[i] > sframe_chunk_id) { |
2689 | 0 | sframe_chunk_id = offsets[i]; |
2690 | 0 | } |
2691 | 0 | } |
2692 | 0 | offsets[nchunks] = ++sframe_chunk_id; |
2693 | 0 | } |
2694 | 0 | else { |
2695 | 0 | offsets[nchunks] = cbytes; |
2696 | 0 | } |
2697 | 0 | } |
2698 | | |
2699 | | // Re-compress the offsets again |
2700 | 0 | blosc2_cparams cparams = BLOSC2_CPARAMS_DEFAULTS; |
2701 | 0 | cparams.splitmode = BLOSC_NEVER_SPLIT; |
2702 | 0 | cparams.typesize = sizeof(int64_t); |
2703 | 0 | cparams.blocksize = 16 * 1024; // based on experiments with create_frame.c bench |
2704 | 0 | cparams.nthreads = 4; // 4 threads seems a decent default for nowadays CPUs |
2705 | 0 | cparams.compcode = BLOSC_BLOSCLZ; |
2706 | 0 | blosc2_context* cctx = blosc2_create_cctx(cparams); |
2707 | 0 | if (cctx == NULL) { |
2708 | 0 | BLOSC_TRACE_ERROR("Error while creating the compression context"); |
2709 | 0 | return NULL; |
2710 | 0 | } |
2711 | 0 | cctx->typesize = sizeof(int64_t); // override a possible BLOSC_TYPESIZE env variable (or chaos may appear) |
2712 | 0 | void* off_chunk = malloc((size_t)off_nbytes + BLOSC2_MAX_OVERHEAD); |
2713 | 0 | int32_t new_off_cbytes = blosc2_compress_ctx(cctx, offsets, off_nbytes, |
2714 | 0 | off_chunk, off_nbytes + BLOSC2_MAX_OVERHEAD); |
2715 | 0 | blosc2_free_ctx(cctx); |
2716 | 0 | free(offsets); |
2717 | 0 | if (new_off_cbytes < 0) { |
2718 | 0 | free(off_chunk); |
2719 | 0 | return NULL; |
2720 | 0 | } |
2721 | | // printf("%f\n", (double) off_nbytes / new_off_cbytes); |
2722 | | |
2723 | 0 | int64_t new_cbytes = cbytes + chunk_cbytes; |
2724 | 0 | int64_t new_frame_len; |
2725 | 0 | if (frame->sframe) { |
2726 | 0 | new_frame_len = header_len + 0 + new_off_cbytes + frame->trailer_len; |
2727 | 0 | } |
2728 | 0 | else { |
2729 | 0 | new_frame_len = header_len + new_cbytes + new_off_cbytes + frame->trailer_len; |
2730 | 0 | } |
2731 | |
|
2732 | 0 | void* fp = NULL; |
2733 | 0 | if (frame->cframe != NULL) { |
2734 | 0 | uint8_t* framep = frame->cframe; |
2735 | | /* Make space for the new chunk and copy it */ |
2736 | 0 | frame->cframe = framep = realloc(framep, (size_t)new_frame_len); |
2737 | 0 | if (framep == NULL) { |
2738 | 0 | BLOSC_TRACE_ERROR("Cannot realloc space for the frame."); |
2739 | 0 | return NULL; |
2740 | 0 | } |
2741 | | /* Copy the chunk */ |
2742 | 0 | memcpy(framep + header_len + cbytes, chunk, (size_t)chunk_cbytes); |
2743 | | /* Copy the offsets */ |
2744 | 0 | memcpy(framep + header_len + new_cbytes, off_chunk, (size_t)new_off_cbytes); |
2745 | 0 | } |
2746 | 0 | else { |
2747 | 0 | int64_t wbytes; |
2748 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
2749 | 0 | if (io_cb == NULL) { |
2750 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
2751 | 0 | return NULL; |
2752 | 0 | } |
2753 | | |
2754 | 0 | if (frame->sframe) { |
2755 | | // Update the offsets chunk in the chunks frame |
2756 | 0 | if (chunk_cbytes != 0) { |
2757 | 0 | if (sframe_chunk_id < 0) { |
2758 | 0 | BLOSC_TRACE_ERROR("The chunk id (%" PRId64 ") is not correct", sframe_chunk_id); |
2759 | 0 | return NULL; |
2760 | 0 | } |
2761 | 0 | if (sframe_create_chunk(frame, chunk, sframe_chunk_id, chunk_cbytes) == NULL) { |
2762 | 0 | BLOSC_TRACE_ERROR("Cannot write the full chunk."); |
2763 | 0 | return NULL; |
2764 | 0 | } |
2765 | 0 | } |
2766 | 0 | fp = sframe_open_index(frame->urlpath, "rb+", |
2767 | 0 | frame->schunk->storage->io); |
2768 | 0 | if (fp == NULL) { |
2769 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
2770 | 0 | return NULL; |
2771 | 0 | } |
2772 | 0 | io_cb->seek(fp, frame->file_offset + header_len, SEEK_SET); |
2773 | 0 | } |
2774 | 0 | else { |
2775 | | // Regular frame |
2776 | 0 | fp = io_cb->open(frame->urlpath, "rb+", frame->schunk->storage->io->params); |
2777 | 0 | if (fp == NULL) { |
2778 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
2779 | 0 | return NULL; |
2780 | 0 | } |
2781 | 0 | io_cb->seek(fp, frame->file_offset + header_len + cbytes, SEEK_SET); |
2782 | 0 | wbytes = io_cb->write(chunk, 1, chunk_cbytes, fp); // the new chunk |
2783 | 0 | if (wbytes != chunk_cbytes) { |
2784 | 0 | BLOSC_TRACE_ERROR("Cannot write the full chunk to frame."); |
2785 | 0 | io_cb->close(fp); |
2786 | 0 | return NULL; |
2787 | 0 | } |
2788 | 0 | } |
2789 | 0 | wbytes = io_cb->write(off_chunk, 1, new_off_cbytes, fp); // the new offsets |
2790 | 0 | io_cb->close(fp); |
2791 | 0 | if (wbytes != new_off_cbytes) { |
2792 | 0 | BLOSC_TRACE_ERROR("Cannot write the offsets to frame."); |
2793 | 0 | return NULL; |
2794 | 0 | } |
2795 | 0 | } |
2796 | | // Invalidate the cache for chunk offsets |
2797 | 0 | if (frame->coffsets != NULL) { |
2798 | 0 | free(frame->coffsets); |
2799 | 0 | frame->coffsets = NULL; |
2800 | 0 | } |
2801 | 0 | free(chunk); // chunk has always to be a copy when reaching here... |
2802 | 0 | free(off_chunk); |
2803 | |
|
2804 | 0 | frame->len = new_frame_len; |
2805 | 0 | rc = frame_update_header(frame, schunk, false); |
2806 | 0 | if (rc < 0) { |
2807 | 0 | return NULL; |
2808 | 0 | } |
2809 | | |
2810 | 0 | rc = frame_update_trailer(frame, schunk); |
2811 | 0 | if (rc < 0) { |
2812 | 0 | return NULL; |
2813 | 0 | } |
2814 | | |
2815 | 0 | return frame; |
2816 | 0 | } |
2817 | | |
2818 | | |
2819 | 0 | void* frame_insert_chunk(blosc2_frame_s* frame, int64_t nchunk, void* chunk, blosc2_schunk* schunk) { |
2820 | 0 | uint8_t* chunk_ = chunk; |
2821 | 0 | int32_t header_len; |
2822 | 0 | int64_t frame_len; |
2823 | 0 | int64_t nbytes; |
2824 | 0 | int64_t cbytes; |
2825 | 0 | int32_t blocksize; |
2826 | 0 | int32_t chunksize; |
2827 | 0 | int64_t nchunks; |
2828 | 0 | int rc = get_header_info(frame, &header_len, &frame_len, &nbytes, &cbytes, |
2829 | 0 | &blocksize, &chunksize, &nchunks, |
2830 | 0 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
2831 | 0 | frame->schunk->storage->io); |
2832 | 0 | if (rc < 0) { |
2833 | 0 | BLOSC_TRACE_ERROR("Unable to get meta info from frame."); |
2834 | 0 | return NULL; |
2835 | 0 | } |
2836 | 0 | int32_t chunk_cbytes; |
2837 | 0 | rc = blosc2_cbuffer_sizes(chunk_, NULL, &chunk_cbytes, NULL); |
2838 | 0 | if (rc < 0) { |
2839 | 0 | return NULL; |
2840 | 0 | } |
2841 | | |
2842 | | // Get the current offsets |
2843 | 0 | int32_t off_nbytes = (int32_t) ((nchunks + 1) * sizeof(int64_t)); |
2844 | 0 | int64_t* offsets = (int64_t *) malloc((size_t)off_nbytes); |
2845 | 0 | if (nchunks > 0) { |
2846 | 0 | int32_t coffsets_cbytes = 0; |
2847 | 0 | uint8_t *coffsets = get_coffsets(frame, header_len, cbytes, nchunks, &coffsets_cbytes); |
2848 | 0 | if (coffsets == NULL) { |
2849 | 0 | BLOSC_TRACE_ERROR("Cannot get the offsets for the frame."); |
2850 | 0 | return NULL; |
2851 | 0 | } |
2852 | 0 | if (coffsets_cbytes == 0) { |
2853 | 0 | coffsets_cbytes = (int32_t)cbytes; |
2854 | 0 | } |
2855 | | |
2856 | | // Decompress offsets |
2857 | 0 | blosc2_dparams off_dparams = BLOSC2_DPARAMS_DEFAULTS; |
2858 | 0 | blosc2_context *dctx = blosc2_create_dctx(off_dparams); |
2859 | 0 | if (dctx == NULL) { |
2860 | 0 | BLOSC_TRACE_ERROR("Error while creating the decompression context"); |
2861 | 0 | return NULL; |
2862 | 0 | } |
2863 | 0 | int32_t prev_nbytes = blosc2_decompress_ctx(dctx, coffsets, coffsets_cbytes, offsets, off_nbytes); |
2864 | 0 | blosc2_free_ctx(dctx); |
2865 | 0 | if (prev_nbytes < 0) { |
2866 | 0 | free(offsets); |
2867 | 0 | BLOSC_TRACE_ERROR("Cannot decompress the offsets chunk."); |
2868 | 0 | return NULL; |
2869 | 0 | } |
2870 | 0 | } |
2871 | | |
2872 | | // TODO: Improvement: Check if new chunk is smaller than previous one |
2873 | | |
2874 | | // Move offsets |
2875 | 0 | for (int64_t i = nchunks; i > nchunk; i--) { |
2876 | 0 | offsets[i] = offsets[i - 1]; |
2877 | 0 | } |
2878 | | // Add the new offset |
2879 | 0 | int64_t sframe_chunk_id = -1; |
2880 | 0 | int special_value = (chunk_[BLOSC2_CHUNK_BLOSC2_FLAGS] >> 4) & BLOSC2_SPECIAL_MASK; |
2881 | 0 | uint64_t offset_value = ((uint64_t)1 << 63); |
2882 | 0 | switch (special_value) { |
2883 | 0 | case BLOSC2_SPECIAL_ZERO: |
2884 | | // Zero chunk. Code it in a special way. |
2885 | 0 | offset_value += (uint64_t)BLOSC2_SPECIAL_ZERO << (8 * 7); // indicate a chunk of zeros |
2886 | 0 | to_little(offsets + nchunk, &offset_value, sizeof(uint64_t)); |
2887 | 0 | chunk_cbytes = 0; // we don't need to store the chunk |
2888 | 0 | break; |
2889 | 0 | case BLOSC2_SPECIAL_UNINIT: |
2890 | | // Non initizalized values chunk. Code it in a special way. |
2891 | 0 | offset_value += (uint64_t) BLOSC2_SPECIAL_UNINIT << (8 * 7); // chunk of uninit values |
2892 | 0 | to_little(offsets + nchunk, &offset_value, sizeof(uint64_t)); |
2893 | 0 | chunk_cbytes = 0; // we don't need to store the chunk |
2894 | 0 | break; |
2895 | 0 | case BLOSC2_SPECIAL_NAN: |
2896 | | // NaN chunk. Code it in a special way. |
2897 | 0 | offset_value += (uint64_t)BLOSC2_SPECIAL_NAN << (8 * 7); // indicate a chunk of NANs |
2898 | 0 | to_little(offsets + nchunk, &offset_value, sizeof(uint64_t)); |
2899 | 0 | chunk_cbytes = 0; // we don't need to store the chunk |
2900 | 0 | break; |
2901 | 0 | default: |
2902 | 0 | if (frame->sframe) { |
2903 | 0 | for (int i = 0; i <= nchunks; ++i) { |
2904 | | // offsets[nchunk] is still uninitialized here |
2905 | 0 | if (i != nchunk && offsets[i] > sframe_chunk_id) { |
2906 | 0 | sframe_chunk_id = offsets[i]; |
2907 | 0 | } |
2908 | 0 | } |
2909 | 0 | offsets[nchunk] = ++sframe_chunk_id; |
2910 | 0 | } |
2911 | 0 | else { |
2912 | 0 | offsets[nchunk] = cbytes; |
2913 | 0 | } |
2914 | 0 | } |
2915 | | |
2916 | | // Re-compress the offsets again |
2917 | 0 | blosc2_cparams cparams = BLOSC2_CPARAMS_DEFAULTS; |
2918 | 0 | cparams.splitmode = BLOSC_NEVER_SPLIT; |
2919 | 0 | cparams.typesize = sizeof(int64_t); |
2920 | 0 | cparams.blocksize = 16 * 1024; // based on experiments with create_frame.c bench |
2921 | 0 | cparams.nthreads = 4; // 4 threads seems a decent default for nowadays CPUs |
2922 | 0 | cparams.compcode = BLOSC_BLOSCLZ; |
2923 | 0 | blosc2_context* cctx = blosc2_create_cctx(cparams); |
2924 | 0 | if (cctx == NULL) { |
2925 | 0 | BLOSC_TRACE_ERROR("Error while creating the compression context"); |
2926 | 0 | return NULL; |
2927 | 0 | } |
2928 | 0 | void* off_chunk = malloc((size_t)off_nbytes + BLOSC2_MAX_OVERHEAD); |
2929 | 0 | int32_t new_off_cbytes = blosc2_compress_ctx(cctx, offsets, off_nbytes, |
2930 | 0 | off_chunk, off_nbytes + BLOSC2_MAX_OVERHEAD); |
2931 | 0 | blosc2_free_ctx(cctx); |
2932 | |
|
2933 | 0 | free(offsets); |
2934 | 0 | if (new_off_cbytes < 0) { |
2935 | 0 | free(off_chunk); |
2936 | 0 | return NULL; |
2937 | 0 | } |
2938 | | |
2939 | 0 | int64_t new_cbytes = cbytes + chunk_cbytes; |
2940 | |
|
2941 | 0 | int64_t new_frame_len; |
2942 | 0 | if (frame->sframe) { |
2943 | 0 | new_frame_len = header_len + 0 + new_off_cbytes + frame->trailer_len; |
2944 | 0 | } |
2945 | 0 | else { |
2946 | 0 | new_frame_len = header_len + new_cbytes + new_off_cbytes + frame->trailer_len; |
2947 | 0 | } |
2948 | | |
2949 | | // Add the chunk and update meta |
2950 | 0 | void* fp = NULL; |
2951 | 0 | if (frame->cframe != NULL) { |
2952 | 0 | uint8_t* framep = frame->cframe; |
2953 | | /* Make space for the new chunk and copy it */ |
2954 | 0 | frame->cframe = framep = realloc(framep, (size_t)new_frame_len); |
2955 | 0 | if (framep == NULL) { |
2956 | 0 | BLOSC_TRACE_ERROR("Cannot realloc space for the frame."); |
2957 | 0 | return NULL; |
2958 | 0 | } |
2959 | | /* Copy the chunk */ |
2960 | 0 | memcpy(framep + header_len + cbytes, chunk, (size_t)chunk_cbytes); |
2961 | | /* Copy the offsets */ |
2962 | 0 | memcpy(framep + header_len + new_cbytes, off_chunk, (size_t)new_off_cbytes); |
2963 | 0 | } else { |
2964 | 0 | int64_t wbytes; |
2965 | |
|
2966 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
2967 | 0 | if (io_cb == NULL) { |
2968 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
2969 | 0 | return NULL; |
2970 | 0 | } |
2971 | | |
2972 | 0 | if (frame->sframe) { |
2973 | 0 | if (chunk_cbytes != 0) { |
2974 | 0 | if (sframe_chunk_id < 0) { |
2975 | 0 | BLOSC_TRACE_ERROR("The chunk id (%" PRId64 ") is not correct", sframe_chunk_id); |
2976 | 0 | return NULL; |
2977 | 0 | } |
2978 | 0 | if (sframe_create_chunk(frame, chunk, sframe_chunk_id, chunk_cbytes) == NULL) { |
2979 | 0 | BLOSC_TRACE_ERROR("Cannot write the full chunk."); |
2980 | 0 | return NULL; |
2981 | 0 | } |
2982 | 0 | } |
2983 | | // Update the offsets chunk in the chunks frame |
2984 | 0 | fp = sframe_open_index(frame->urlpath, "rb+", |
2985 | 0 | frame->schunk->storage->io); |
2986 | 0 | if (fp == NULL) { |
2987 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
2988 | 0 | return NULL; |
2989 | 0 | } |
2990 | 0 | io_cb->seek(fp, frame->file_offset + header_len + 0, SEEK_SET); |
2991 | 0 | } |
2992 | 0 | else { |
2993 | | // Regular frame |
2994 | 0 | fp = io_cb->open(frame->urlpath, "rb+", frame->schunk->storage->io->params); |
2995 | 0 | if (fp == NULL) { |
2996 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
2997 | 0 | return NULL; |
2998 | 0 | } |
2999 | 0 | io_cb->seek(fp, frame->file_offset + header_len + cbytes, SEEK_SET); |
3000 | 0 | wbytes = io_cb->write(chunk, 1, chunk_cbytes, fp); // the new chunk |
3001 | 0 | if (wbytes != chunk_cbytes) { |
3002 | 0 | BLOSC_TRACE_ERROR("Cannot write the full chunk to frame."); |
3003 | 0 | io_cb->close(fp); |
3004 | 0 | return NULL; |
3005 | 0 | } |
3006 | 0 | } |
3007 | 0 | wbytes = io_cb->write(off_chunk, 1, new_off_cbytes, fp); // the new offsets |
3008 | 0 | io_cb->close(fp); |
3009 | 0 | if (wbytes != new_off_cbytes) { |
3010 | 0 | BLOSC_TRACE_ERROR("Cannot write the offsets to frame."); |
3011 | 0 | return NULL; |
3012 | 0 | } |
3013 | | // Invalidate the cache for chunk offsets |
3014 | 0 | if (frame->coffsets != NULL) { |
3015 | 0 | free(frame->coffsets); |
3016 | 0 | frame->coffsets = NULL; |
3017 | 0 | } |
3018 | 0 | } |
3019 | 0 | free(chunk); // chunk has always to be a copy when reaching here... |
3020 | 0 | free(off_chunk); |
3021 | |
|
3022 | 0 | frame->len = new_frame_len; |
3023 | 0 | rc = frame_update_header(frame, schunk, false); |
3024 | 0 | if (rc < 0) { |
3025 | 0 | return NULL; |
3026 | 0 | } |
3027 | | |
3028 | 0 | rc = frame_update_trailer(frame, schunk); |
3029 | 0 | if (rc < 0) { |
3030 | 0 | return NULL; |
3031 | 0 | } |
3032 | | |
3033 | 0 | return frame; |
3034 | 0 | } |
3035 | | |
3036 | | |
3037 | 0 | void* frame_update_chunk(blosc2_frame_s* frame, int64_t nchunk, void* chunk, blosc2_schunk* schunk) { |
3038 | 0 | uint8_t *chunk_ = (uint8_t *) chunk; |
3039 | 0 | int32_t header_len; |
3040 | 0 | int64_t frame_len; |
3041 | 0 | int64_t nbytes; |
3042 | 0 | int64_t cbytes; |
3043 | 0 | int32_t blocksize; |
3044 | 0 | int32_t chunksize; |
3045 | 0 | int64_t nchunks; |
3046 | 0 | int rc = get_header_info(frame, &header_len, &frame_len, &nbytes, &cbytes, |
3047 | 0 | &blocksize, &chunksize, &nchunks, |
3048 | 0 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
3049 | 0 | frame->schunk->storage->io); |
3050 | 0 | if (rc < 0) { |
3051 | 0 | BLOSC_TRACE_ERROR("Unable to get meta info from frame."); |
3052 | 0 | return NULL; |
3053 | 0 | } |
3054 | 0 | if (nchunk >= nchunks) { |
3055 | 0 | BLOSC_TRACE_ERROR("The chunk must already exist."); |
3056 | 0 | return NULL; |
3057 | 0 | } |
3058 | | |
3059 | 0 | int32_t chunk_cbytes; |
3060 | 0 | rc = blosc2_cbuffer_sizes(chunk, NULL, &chunk_cbytes, NULL); |
3061 | 0 | if (rc < 0) { |
3062 | 0 | return NULL; |
3063 | 0 | } |
3064 | | |
3065 | | // Get the current offsets |
3066 | 0 | int32_t off_nbytes = (int32_t) (nchunks * sizeof(int64_t)); |
3067 | 0 | int64_t* offsets = (int64_t *) malloc((size_t)off_nbytes); |
3068 | 0 | if (nchunks > 0) { |
3069 | 0 | int32_t coffsets_cbytes = 0; |
3070 | 0 | uint8_t *coffsets = get_coffsets(frame, header_len, cbytes, nchunks, &coffsets_cbytes); |
3071 | 0 | if (coffsets == NULL) { |
3072 | 0 | BLOSC_TRACE_ERROR("Cannot get the offsets for the frame."); |
3073 | 0 | return NULL; |
3074 | 0 | } |
3075 | 0 | if (coffsets_cbytes == 0) { |
3076 | 0 | coffsets_cbytes = (int32_t)cbytes; |
3077 | 0 | } |
3078 | | |
3079 | | // Decompress offsets |
3080 | 0 | blosc2_dparams off_dparams = BLOSC2_DPARAMS_DEFAULTS; |
3081 | 0 | blosc2_context *dctx = blosc2_create_dctx(off_dparams); |
3082 | 0 | if (dctx == NULL) { |
3083 | 0 | BLOSC_TRACE_ERROR("Error while creating the decompression context"); |
3084 | 0 | return NULL; |
3085 | 0 | } |
3086 | 0 | int32_t prev_nbytes = blosc2_decompress_ctx(dctx, coffsets, coffsets_cbytes, offsets, off_nbytes); |
3087 | 0 | blosc2_free_ctx(dctx); |
3088 | 0 | if (prev_nbytes < 0) { |
3089 | 0 | free(offsets); |
3090 | 0 | BLOSC_TRACE_ERROR("Cannot decompress the offsets chunk."); |
3091 | 0 | return NULL; |
3092 | 0 | } |
3093 | 0 | } |
3094 | 0 | int32_t cbytes_old; |
3095 | 0 | int64_t old_offset; |
3096 | 0 | if (!frame->sframe) { |
3097 | | // See how big would be the space |
3098 | 0 | old_offset = offsets[nchunk]; |
3099 | 0 | bool needs_free; |
3100 | 0 | uint8_t *chunk_old; |
3101 | 0 | int err = blosc2_schunk_get_chunk(schunk, nchunk, &chunk_old, &needs_free); |
3102 | 0 | if (err < 0) { |
3103 | 0 | BLOSC_TRACE_ERROR("%" PRId64 " chunk can not be obtained from schunk.", nchunk); |
3104 | 0 | return NULL; |
3105 | 0 | } |
3106 | | |
3107 | 0 | if (chunk_old == NULL) { |
3108 | 0 | cbytes_old = 0; |
3109 | 0 | } |
3110 | 0 | else { |
3111 | 0 | cbytes_old = sw32_(chunk_old + BLOSC2_CHUNK_CBYTES); |
3112 | 0 | if (cbytes_old == BLOSC2_MAX_OVERHEAD) { |
3113 | 0 | cbytes_old = 0; |
3114 | 0 | } |
3115 | 0 | } |
3116 | 0 | if (needs_free) { |
3117 | 0 | free(chunk_old); |
3118 | 0 | } |
3119 | 0 | } |
3120 | | |
3121 | | // Add the new offset |
3122 | 0 | int64_t sframe_chunk_id; |
3123 | 0 | if (frame->sframe) { |
3124 | 0 | if (offsets[nchunk] < 0) { |
3125 | 0 | sframe_chunk_id = -1; |
3126 | 0 | } |
3127 | 0 | else { |
3128 | | // In case there was a reorder in a sframe |
3129 | 0 | sframe_chunk_id = offsets[nchunk]; |
3130 | 0 | } |
3131 | 0 | } |
3132 | 0 | int special_value = (chunk_[BLOSC2_CHUNK_BLOSC2_FLAGS] >> 4) & BLOSC2_SPECIAL_MASK; |
3133 | 0 | uint64_t offset_value = ((uint64_t)1 << 63); |
3134 | 0 | switch (special_value) { |
3135 | 0 | case BLOSC2_SPECIAL_ZERO: |
3136 | | // Zero chunk. Code it in a special way. |
3137 | 0 | offset_value += (uint64_t)BLOSC2_SPECIAL_ZERO << (8 * 7); // indicate a chunk of zeros |
3138 | 0 | to_little(offsets + nchunk, &offset_value, sizeof(uint64_t)); |
3139 | 0 | chunk_cbytes = 0; // we don't need to store the chunk |
3140 | 0 | break; |
3141 | 0 | case BLOSC2_SPECIAL_UNINIT: |
3142 | | // Non initizalized values chunk. Code it in a special way. |
3143 | 0 | offset_value += (uint64_t)BLOSC2_SPECIAL_UNINIT << (8 * 7); // indicate a chunk of uninit values |
3144 | 0 | to_little(offsets + nchunk, &offset_value, sizeof(uint64_t)); |
3145 | 0 | chunk_cbytes = 0; // we don't need to store the chunk |
3146 | 0 | break; |
3147 | 0 | case BLOSC2_SPECIAL_NAN: |
3148 | | // NaN chunk. Code it in a special way. |
3149 | 0 | offset_value += (uint64_t)BLOSC2_SPECIAL_NAN << (8 * 7); // indicate a chunk of NANs |
3150 | 0 | to_little(offsets + nchunk, &offset_value, sizeof(uint64_t)); |
3151 | 0 | chunk_cbytes = 0; // we don't need to store the chunk |
3152 | 0 | break; |
3153 | 0 | default: |
3154 | 0 | if (frame->sframe) { |
3155 | 0 | if (sframe_chunk_id < 0) { |
3156 | 0 | for (int i = 0; i < nchunks; ++i) { |
3157 | 0 | if (offsets[i] > sframe_chunk_id) { |
3158 | 0 | sframe_chunk_id = offsets[i]; |
3159 | 0 | } |
3160 | 0 | } |
3161 | 0 | offsets[nchunk] = ++sframe_chunk_id; |
3162 | 0 | } |
3163 | 0 | } |
3164 | 0 | else { |
3165 | | // Add the new offset |
3166 | 0 | offsets[nchunk] = cbytes; |
3167 | 0 | } |
3168 | 0 | } |
3169 | | |
3170 | 0 | if (!frame->sframe && chunk_cbytes != 0 && cbytes_old >= chunk_cbytes) { |
3171 | 0 | offsets[nchunk] = old_offset; |
3172 | 0 | cbytes = old_offset; |
3173 | 0 | } |
3174 | | // Re-compress the offsets again |
3175 | 0 | blosc2_cparams cparams = BLOSC2_CPARAMS_DEFAULTS; |
3176 | 0 | cparams.splitmode = BLOSC_NEVER_SPLIT; |
3177 | 0 | cparams.typesize = sizeof(int64_t); |
3178 | 0 | cparams.blocksize = 16 * 1024; // based on experiments with create_frame.c bench |
3179 | 0 | cparams.nthreads = 4; // 4 threads seems a decent default for nowadays CPUs |
3180 | 0 | cparams.compcode = BLOSC_BLOSCLZ; |
3181 | 0 | blosc2_context* cctx = blosc2_create_cctx(cparams); |
3182 | 0 | if (cctx == NULL) { |
3183 | 0 | BLOSC_TRACE_ERROR("Error while creating the compression context"); |
3184 | 0 | return NULL; |
3185 | 0 | } |
3186 | 0 | void* off_chunk = malloc((size_t)off_nbytes + BLOSC2_MAX_OVERHEAD); |
3187 | 0 | int32_t new_off_cbytes = blosc2_compress_ctx(cctx, offsets, off_nbytes, |
3188 | 0 | off_chunk, off_nbytes + BLOSC2_MAX_OVERHEAD); |
3189 | 0 | blosc2_free_ctx(cctx); |
3190 | |
|
3191 | 0 | free(offsets); |
3192 | 0 | if (new_off_cbytes < 0) { |
3193 | 0 | free(off_chunk); |
3194 | 0 | return NULL; |
3195 | 0 | } |
3196 | | |
3197 | 0 | int64_t new_cbytes = schunk->cbytes; |
3198 | 0 | int64_t new_frame_len; |
3199 | 0 | if (frame->sframe) { |
3200 | | // The chunk is not stored in the frame |
3201 | 0 | new_frame_len = header_len + 0 + new_off_cbytes + frame->trailer_len; |
3202 | 0 | } |
3203 | 0 | else { |
3204 | 0 | new_frame_len = header_len + new_cbytes + new_off_cbytes + frame->trailer_len; |
3205 | 0 | } |
3206 | |
|
3207 | 0 | void* fp = NULL; |
3208 | 0 | if (frame->cframe != NULL) { |
3209 | 0 | uint8_t* framep = frame->cframe; |
3210 | | /* Make space for the new chunk and copy it */ |
3211 | 0 | frame->cframe = framep = realloc(framep, (size_t)new_frame_len); |
3212 | 0 | if (framep == NULL) { |
3213 | 0 | BLOSC_TRACE_ERROR("Cannot realloc space for the frame."); |
3214 | 0 | return NULL; |
3215 | 0 | } |
3216 | | /* Copy the chunk */ |
3217 | 0 | memcpy(framep + header_len + cbytes, chunk, (size_t)chunk_cbytes); |
3218 | | /* Copy the offsets */ |
3219 | 0 | memcpy(framep + header_len + new_cbytes, off_chunk, (size_t)new_off_cbytes); |
3220 | 0 | } else { |
3221 | 0 | int64_t wbytes; |
3222 | |
|
3223 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
3224 | 0 | if (io_cb == NULL) { |
3225 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
3226 | 0 | return NULL; |
3227 | 0 | } |
3228 | | |
3229 | 0 | if (frame->sframe) { |
3230 | | // Create the chunks file, if it's a special value this will delete its old content |
3231 | 0 | if (sframe_chunk_id >= 0) { |
3232 | 0 | if (sframe_create_chunk(frame, chunk, sframe_chunk_id, chunk_cbytes) == NULL) { |
3233 | 0 | BLOSC_TRACE_ERROR("Cannot write the full chunk."); |
3234 | 0 | return NULL; |
3235 | 0 | } |
3236 | 0 | } |
3237 | | // Update the offsets chunk in the chunks frame |
3238 | 0 | fp = sframe_open_index(frame->urlpath, "rb+", |
3239 | 0 | frame->schunk->storage->io); |
3240 | 0 | if (fp == NULL) { |
3241 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
3242 | 0 | return NULL; |
3243 | 0 | } |
3244 | 0 | io_cb->seek(fp, frame->file_offset + header_len + 0, SEEK_SET); |
3245 | 0 | } |
3246 | 0 | else { |
3247 | | // Regular frame |
3248 | 0 | fp = io_cb->open(frame->urlpath, "rb+", frame->schunk->storage->io->params); |
3249 | 0 | if (fp == NULL) { |
3250 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
3251 | 0 | return NULL; |
3252 | 0 | } |
3253 | 0 | io_cb->seek(fp, frame->file_offset + header_len + cbytes, SEEK_SET); |
3254 | 0 | wbytes = io_cb->write(chunk, 1, chunk_cbytes, fp); // the new chunk |
3255 | 0 | if (wbytes != chunk_cbytes) { |
3256 | 0 | BLOSC_TRACE_ERROR("Cannot write the full chunk to frame."); |
3257 | 0 | io_cb->close(fp); |
3258 | 0 | return NULL; |
3259 | 0 | } |
3260 | 0 | io_cb->seek(fp, frame->file_offset + header_len + new_cbytes, SEEK_SET); |
3261 | 0 | } |
3262 | 0 | wbytes = io_cb->write(off_chunk, 1, new_off_cbytes, fp); // the new offsets |
3263 | 0 | io_cb->close(fp); |
3264 | 0 | if (wbytes != new_off_cbytes) { |
3265 | 0 | BLOSC_TRACE_ERROR("Cannot write the offsets to frame."); |
3266 | 0 | return NULL; |
3267 | 0 | } |
3268 | | // Invalidate the cache for chunk offsets |
3269 | 0 | if (frame->coffsets != NULL) { |
3270 | 0 | free(frame->coffsets); |
3271 | 0 | frame->coffsets = NULL; |
3272 | 0 | } |
3273 | 0 | } |
3274 | 0 | free(chunk); // chunk has always to be a copy when reaching here... |
3275 | 0 | free(off_chunk); |
3276 | |
|
3277 | 0 | frame->len = new_frame_len; |
3278 | 0 | rc = frame_update_header(frame, schunk, false); |
3279 | 0 | if (rc < 0) { |
3280 | 0 | return NULL; |
3281 | 0 | } |
3282 | | |
3283 | 0 | rc = frame_update_trailer(frame, schunk); |
3284 | 0 | if (rc < 0) { |
3285 | 0 | return NULL; |
3286 | 0 | } |
3287 | | |
3288 | 0 | return frame; |
3289 | 0 | } |
3290 | | |
3291 | | |
3292 | 0 | void* frame_delete_chunk(blosc2_frame_s* frame, int64_t nchunk, blosc2_schunk* schunk) { |
3293 | 0 | int32_t header_len; |
3294 | 0 | int64_t frame_len; |
3295 | 0 | int64_t nbytes; |
3296 | 0 | int64_t cbytes; |
3297 | 0 | int32_t blocksize; |
3298 | 0 | int32_t chunksize; |
3299 | 0 | int64_t nchunks; |
3300 | 0 | int rc = get_header_info(frame, &header_len, &frame_len, &nbytes, &cbytes, |
3301 | 0 | &blocksize, &chunksize, &nchunks, |
3302 | 0 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, frame->schunk->storage->io); |
3303 | 0 | if (rc < 0) { |
3304 | 0 | BLOSC_TRACE_ERROR("Unable to get meta info from frame."); |
3305 | 0 | return NULL; |
3306 | 0 | } |
3307 | | |
3308 | | // Get the current offsets |
3309 | 0 | int32_t off_nbytes = (int32_t) (nchunks * sizeof(int64_t)); |
3310 | 0 | int64_t* offsets = (int64_t *) malloc((size_t)off_nbytes); |
3311 | 0 | if (nchunks > 0) { |
3312 | 0 | int32_t coffsets_cbytes = 0; |
3313 | 0 | uint8_t *coffsets = get_coffsets(frame, header_len, cbytes, nchunks, &coffsets_cbytes); |
3314 | 0 | if (coffsets == NULL) { |
3315 | 0 | BLOSC_TRACE_ERROR("Cannot get the offsets for the frame."); |
3316 | 0 | return NULL; |
3317 | 0 | } |
3318 | 0 | if (coffsets_cbytes == 0) { |
3319 | 0 | coffsets_cbytes = (int32_t)cbytes; |
3320 | 0 | } |
3321 | | |
3322 | | // Decompress offsets |
3323 | 0 | blosc2_dparams off_dparams = BLOSC2_DPARAMS_DEFAULTS; |
3324 | 0 | blosc2_context *dctx = blosc2_create_dctx(off_dparams); |
3325 | 0 | if (dctx == NULL) { |
3326 | 0 | BLOSC_TRACE_ERROR("Error while creating the decompression context"); |
3327 | 0 | return NULL; |
3328 | 0 | } |
3329 | 0 | int32_t prev_nbytes = blosc2_decompress_ctx(dctx, coffsets, coffsets_cbytes, offsets, off_nbytes); |
3330 | 0 | blosc2_free_ctx(dctx); |
3331 | 0 | if (prev_nbytes < 0) { |
3332 | 0 | free(offsets); |
3333 | 0 | BLOSC_TRACE_ERROR("Cannot decompress the offsets chunk."); |
3334 | 0 | return NULL; |
3335 | 0 | } |
3336 | 0 | } |
3337 | | |
3338 | | // Delete the new offset |
3339 | 0 | for (int64_t i = nchunk; i < nchunks - 1; i++) { |
3340 | 0 | offsets[i] = offsets[i + 1]; |
3341 | 0 | } |
3342 | 0 | offsets[nchunks - 1] = 0; |
3343 | | |
3344 | | // Re-compress the offsets again |
3345 | 0 | blosc2_cparams cparams = BLOSC2_CPARAMS_DEFAULTS; |
3346 | 0 | cparams.splitmode = BLOSC_NEVER_SPLIT; |
3347 | 0 | cparams.typesize = sizeof(int64_t); |
3348 | 0 | cparams.blocksize = 16 * 1024; // based on experiments with create_frame.c bench |
3349 | 0 | cparams.nthreads = 4; // 4 threads seems a decent default for nowadays CPUs |
3350 | 0 | cparams.compcode = BLOSC_BLOSCLZ; |
3351 | 0 | blosc2_context* cctx = blosc2_create_cctx(cparams); |
3352 | 0 | if (cctx == NULL) { |
3353 | 0 | BLOSC_TRACE_ERROR("Error while creating the compression context"); |
3354 | 0 | return NULL; |
3355 | 0 | } |
3356 | 0 | void* off_chunk = malloc((size_t)off_nbytes + BLOSC2_MAX_OVERHEAD); |
3357 | 0 | int32_t new_off_cbytes = blosc2_compress_ctx(cctx, offsets, off_nbytes - (int32_t)sizeof(int64_t), |
3358 | 0 | off_chunk, off_nbytes + BLOSC2_MAX_OVERHEAD); |
3359 | 0 | blosc2_free_ctx(cctx); |
3360 | |
|
3361 | 0 | free(offsets); |
3362 | 0 | if (new_off_cbytes < 0) { |
3363 | 0 | free(off_chunk); |
3364 | 0 | return NULL; |
3365 | 0 | } |
3366 | | |
3367 | 0 | int64_t new_cbytes = cbytes; |
3368 | |
|
3369 | 0 | int64_t new_frame_len; |
3370 | 0 | if (frame->sframe) { |
3371 | 0 | new_frame_len = header_len + 0 + new_off_cbytes + frame->trailer_len; |
3372 | 0 | } |
3373 | 0 | else { |
3374 | 0 | new_frame_len = header_len + new_cbytes + new_off_cbytes + frame->trailer_len; |
3375 | 0 | } |
3376 | | |
3377 | | // Add the chunk and update meta |
3378 | 0 | FILE* fp = NULL; |
3379 | 0 | if (frame->cframe != NULL) { |
3380 | 0 | uint8_t* framep = frame->cframe; |
3381 | | /* Make space for the new chunk and copy it */ |
3382 | 0 | frame->cframe = framep = realloc(framep, (size_t)new_frame_len); |
3383 | 0 | if (framep == NULL) { |
3384 | 0 | BLOSC_TRACE_ERROR("Cannot realloc space for the frame."); |
3385 | 0 | return NULL; |
3386 | 0 | } |
3387 | | /* Copy the offsets */ |
3388 | 0 | memcpy(framep + header_len + new_cbytes, off_chunk, (size_t)new_off_cbytes); |
3389 | 0 | } else { |
3390 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
3391 | 0 | if (io_cb == NULL) { |
3392 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
3393 | 0 | return NULL; |
3394 | 0 | } |
3395 | | |
3396 | 0 | size_t wbytes; |
3397 | 0 | if (frame->sframe) { |
3398 | 0 | int64_t offset; |
3399 | 0 | rc = get_coffset(frame, header_len, cbytes, nchunk, nchunks, &offset); |
3400 | 0 | if (rc < 0) { |
3401 | 0 | BLOSC_TRACE_ERROR("Unable to get offset to chunk %" PRId64 ".", nchunk); |
3402 | 0 | return NULL; |
3403 | 0 | } |
3404 | 0 | if (offset >= 0){ |
3405 | | // Remove the chunk file only if it is not a special value chunk |
3406 | 0 | int err = sframe_delete_chunk(frame->urlpath, offset); |
3407 | 0 | if (err != 0) { |
3408 | 0 | BLOSC_TRACE_ERROR("Unable to delete chunk!"); |
3409 | 0 | return NULL; |
3410 | 0 | } |
3411 | 0 | } |
3412 | | // Update the offsets chunk in the chunks frame |
3413 | 0 | fp = sframe_open_index(frame->urlpath, "rb+", frame->schunk->storage->io); |
3414 | 0 | if (fp == NULL) { |
3415 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
3416 | 0 | return NULL; |
3417 | 0 | } |
3418 | 0 | io_cb->seek(fp, frame->file_offset + header_len + 0, SEEK_SET); |
3419 | 0 | } |
3420 | 0 | else { |
3421 | | // Regular frame |
3422 | 0 | fp = io_cb->open(frame->urlpath, "rb+", frame->schunk->storage->io); |
3423 | 0 | if (fp == NULL) { |
3424 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
3425 | 0 | return NULL; |
3426 | 0 | } |
3427 | 0 | io_cb->seek(fp, frame->file_offset + header_len + cbytes, SEEK_SET); |
3428 | 0 | } |
3429 | 0 | wbytes = io_cb->write(off_chunk, 1, new_off_cbytes, fp); // the new offsets |
3430 | 0 | io_cb->close(fp); |
3431 | 0 | if (wbytes != (size_t)new_off_cbytes) { |
3432 | 0 | BLOSC_TRACE_ERROR("Cannot write the offsets to frame."); |
3433 | 0 | return NULL; |
3434 | 0 | } |
3435 | | // Invalidate the cache for chunk offsets |
3436 | 0 | if (frame->coffsets != NULL) { |
3437 | 0 | free(frame->coffsets); |
3438 | 0 | frame->coffsets = NULL; |
3439 | 0 | } |
3440 | 0 | } |
3441 | 0 | free(off_chunk); |
3442 | |
|
3443 | 0 | frame->len = new_frame_len; |
3444 | 0 | rc = frame_update_header(frame, schunk, false); |
3445 | 0 | if (rc < 0) { |
3446 | 0 | return NULL; |
3447 | 0 | } |
3448 | | |
3449 | 0 | rc = frame_update_trailer(frame, schunk); |
3450 | 0 | if (rc < 0) { |
3451 | 0 | return NULL; |
3452 | 0 | } |
3453 | | |
3454 | 0 | return frame; |
3455 | 0 | } |
3456 | | |
3457 | | |
3458 | 0 | int frame_reorder_offsets(blosc2_frame_s* frame, const int64_t* offsets_order, blosc2_schunk* schunk) { |
3459 | | // Get header info |
3460 | 0 | int32_t header_len; |
3461 | 0 | int64_t frame_len; |
3462 | 0 | int64_t nbytes; |
3463 | 0 | int64_t cbytes; |
3464 | 0 | int32_t blocksize; |
3465 | 0 | int32_t chunksize; |
3466 | 0 | int64_t nchunks; |
3467 | 0 | int ret = get_header_info(frame, &header_len, &frame_len, &nbytes, &cbytes, |
3468 | 0 | &blocksize, &chunksize, &nchunks, |
3469 | 0 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
3470 | 0 | frame->schunk->storage->io); |
3471 | 0 | if (ret < 0) { |
3472 | 0 | BLOSC_TRACE_ERROR("Cannot get the header info for the frame."); |
3473 | 0 | return ret; |
3474 | 0 | } |
3475 | | |
3476 | | // Get the current offsets and add one more |
3477 | 0 | int32_t off_nbytes = (int32_t) (nchunks * sizeof(int64_t)); |
3478 | 0 | int64_t* offsets = (int64_t *) malloc((size_t)off_nbytes); |
3479 | |
|
3480 | 0 | int32_t coffsets_cbytes = 0; |
3481 | 0 | uint8_t *coffsets = get_coffsets(frame, header_len, cbytes, nchunks, &coffsets_cbytes); |
3482 | 0 | if (coffsets == NULL) { |
3483 | 0 | BLOSC_TRACE_ERROR("Cannot get the offsets for the frame."); |
3484 | 0 | free(offsets); |
3485 | 0 | return BLOSC2_ERROR_DATA; |
3486 | 0 | } |
3487 | | |
3488 | | // Decompress offsets |
3489 | 0 | blosc2_dparams off_dparams = BLOSC2_DPARAMS_DEFAULTS; |
3490 | 0 | blosc2_context *dctx = blosc2_create_dctx(off_dparams); |
3491 | 0 | if (dctx == NULL) { |
3492 | 0 | BLOSC_TRACE_ERROR("Error while creating the decompression context"); |
3493 | 0 | return BLOSC2_ERROR_NULL_POINTER; |
3494 | 0 | } |
3495 | 0 | int32_t prev_nbytes = blosc2_decompress_ctx(dctx, coffsets, coffsets_cbytes, |
3496 | 0 | offsets, off_nbytes); |
3497 | 0 | blosc2_free_ctx(dctx); |
3498 | 0 | if (prev_nbytes < 0) { |
3499 | 0 | free(offsets); |
3500 | 0 | BLOSC_TRACE_ERROR("Cannot decompress the offsets chunk."); |
3501 | 0 | return prev_nbytes; |
3502 | 0 | } |
3503 | | |
3504 | | // Make a copy of the chunk offsets and reorder it |
3505 | 0 | int64_t *offsets_copy = malloc(prev_nbytes); |
3506 | 0 | memcpy(offsets_copy, offsets, prev_nbytes); |
3507 | |
|
3508 | 0 | for (int i = 0; i < nchunks; ++i) { |
3509 | 0 | offsets[i] = offsets_copy[offsets_order[i]]; |
3510 | 0 | } |
3511 | 0 | free(offsets_copy); |
3512 | | |
3513 | | // Re-compress the offsets again |
3514 | 0 | blosc2_cparams cparams = BLOSC2_CPARAMS_DEFAULTS; |
3515 | 0 | cparams.splitmode = BLOSC_NEVER_SPLIT; |
3516 | 0 | cparams.typesize = sizeof(int64_t); |
3517 | 0 | cparams.blocksize = 16 * 1024; // based on experiments with create_frame.c bench |
3518 | 0 | cparams.nthreads = 4; // 4 threads seems a decent default for nowadays CPUs |
3519 | 0 | cparams.compcode = BLOSC_BLOSCLZ; |
3520 | 0 | blosc2_context* cctx = blosc2_create_cctx(cparams); |
3521 | 0 | if (cctx == NULL) { |
3522 | 0 | BLOSC_TRACE_ERROR("Error while creating the compression context"); |
3523 | 0 | return BLOSC2_ERROR_NULL_POINTER; |
3524 | 0 | } |
3525 | 0 | void* off_chunk = malloc((size_t)off_nbytes + BLOSC2_MAX_OVERHEAD); |
3526 | 0 | int32_t new_off_cbytes = blosc2_compress_ctx(cctx, offsets, off_nbytes, |
3527 | 0 | off_chunk, off_nbytes + BLOSC2_MAX_OVERHEAD); |
3528 | 0 | blosc2_free_ctx(cctx); |
3529 | |
|
3530 | 0 | if (new_off_cbytes < 0) { |
3531 | 0 | free(offsets); |
3532 | 0 | free(off_chunk); |
3533 | 0 | return new_off_cbytes; |
3534 | 0 | } |
3535 | 0 | free(offsets); |
3536 | 0 | int64_t new_frame_len; |
3537 | 0 | if (frame->sframe) { |
3538 | | // The chunks are not in the frame |
3539 | 0 | new_frame_len = header_len + 0 + new_off_cbytes + frame->trailer_len; |
3540 | 0 | } |
3541 | 0 | else { |
3542 | 0 | new_frame_len = header_len + cbytes + new_off_cbytes + frame->trailer_len; |
3543 | 0 | } |
3544 | |
|
3545 | 0 | if (frame->cframe != NULL) { |
3546 | 0 | uint8_t* framep = frame->cframe; |
3547 | | /* Make space for the new chunk and copy it */ |
3548 | 0 | frame->cframe = framep = realloc(framep, (size_t)new_frame_len); |
3549 | 0 | if (framep == NULL) { |
3550 | 0 | BLOSC_TRACE_ERROR("Cannot realloc space for the frame."); |
3551 | 0 | return BLOSC2_ERROR_MEMORY_ALLOC; |
3552 | 0 | } |
3553 | | /* Copy the offsets */ |
3554 | 0 | memcpy(framep + header_len + cbytes, off_chunk, (size_t)new_off_cbytes); |
3555 | 0 | } |
3556 | 0 | else { |
3557 | 0 | void* fp = NULL; |
3558 | |
|
3559 | 0 | blosc2_io_cb *io_cb = blosc2_get_io_cb(frame->schunk->storage->io->id); |
3560 | 0 | if (io_cb == NULL) { |
3561 | 0 | BLOSC_TRACE_ERROR("Error getting the input/output API"); |
3562 | 0 | return BLOSC2_ERROR_PLUGIN_IO; |
3563 | 0 | } |
3564 | | |
3565 | 0 | if (frame->sframe) { |
3566 | | // Update the offsets chunk in the chunks frame |
3567 | 0 | fp = sframe_open_index(frame->urlpath, "rb+", |
3568 | 0 | frame->schunk->storage->io); |
3569 | 0 | if (fp == NULL) { |
3570 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
3571 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
3572 | 0 | } |
3573 | 0 | io_cb->seek(fp, frame->file_offset + header_len + 0, SEEK_SET); |
3574 | 0 | } |
3575 | 0 | else { |
3576 | | // Regular frame |
3577 | 0 | fp = io_cb->open(frame->urlpath, "rb+", frame->schunk->storage->io->params); |
3578 | 0 | if (fp == NULL) { |
3579 | 0 | BLOSC_TRACE_ERROR("Error opening file in: %s", frame->urlpath); |
3580 | 0 | return BLOSC2_ERROR_FILE_OPEN; |
3581 | 0 | } |
3582 | 0 | io_cb->seek(fp, frame->file_offset + header_len + cbytes, SEEK_SET); |
3583 | 0 | } |
3584 | 0 | int64_t wbytes = io_cb->write(off_chunk, 1, new_off_cbytes, fp); // the new offsets |
3585 | 0 | io_cb->close(fp); |
3586 | 0 | if (wbytes != new_off_cbytes) { |
3587 | 0 | BLOSC_TRACE_ERROR("Cannot write the offsets to frame."); |
3588 | 0 | return BLOSC2_ERROR_FILE_WRITE; |
3589 | 0 | } |
3590 | 0 | } |
3591 | | |
3592 | | // Invalidate the cache for chunk offsets |
3593 | 0 | if (frame->coffsets != NULL) { |
3594 | 0 | free(frame->coffsets); |
3595 | 0 | frame->coffsets = NULL; |
3596 | 0 | } |
3597 | 0 | free(off_chunk); |
3598 | |
|
3599 | 0 | frame->len = new_frame_len; |
3600 | 0 | int rc = frame_update_header(frame, schunk, false); |
3601 | 0 | if (rc < 0) { |
3602 | 0 | return rc; |
3603 | 0 | } |
3604 | | |
3605 | 0 | rc = frame_update_trailer(frame, schunk); |
3606 | 0 | if (rc < 0) { |
3607 | 0 | return rc; |
3608 | 0 | } |
3609 | | |
3610 | 0 | return 0; |
3611 | 0 | } |
3612 | | |
3613 | | |
3614 | | /* Decompress and return a chunk that is part of a frame. */ |
3615 | 21 | int frame_decompress_chunk(blosc2_context *dctx, blosc2_frame_s* frame, int64_t nchunk, void *dest, int32_t nbytes) { |
3616 | 21 | uint8_t* src; |
3617 | 21 | bool needs_free; |
3618 | 21 | int32_t chunk_nbytes; |
3619 | 21 | int32_t chunk_cbytes; |
3620 | 21 | int rc; |
3621 | | |
3622 | | // Use a lazychunk here in order to do a potential parallel read. |
3623 | 21 | rc = frame_get_lazychunk(frame, nchunk, &src, &needs_free); |
3624 | 21 | if (rc < 0) { |
3625 | 0 | BLOSC_TRACE_ERROR("Cannot get the chunk in position %" PRId64 ".", nchunk); |
3626 | 0 | goto end; |
3627 | 0 | } |
3628 | 21 | chunk_cbytes = rc; |
3629 | 21 | if (chunk_cbytes < (signed)sizeof(int32_t)) { |
3630 | | /* Not enough input to read `nbytes` */ |
3631 | 0 | rc = BLOSC2_ERROR_READ_BUFFER; |
3632 | 0 | } |
3633 | | |
3634 | 21 | rc = blosc2_cbuffer_sizes(src, &chunk_nbytes, &chunk_cbytes, NULL); |
3635 | 21 | if (rc < 0) { |
3636 | 0 | goto end; |
3637 | 0 | } |
3638 | | |
3639 | | /* Create a buffer for destination */ |
3640 | 21 | if (chunk_nbytes > nbytes) { |
3641 | 0 | BLOSC_TRACE_ERROR("Not enough space for decompressing in dest."); |
3642 | 0 | rc = BLOSC2_ERROR_WRITE_BUFFER; |
3643 | 0 | goto end; |
3644 | 0 | } |
3645 | | /* And decompress it */ |
3646 | 21 | dctx->header_overhead = BLOSC_EXTENDED_HEADER_LENGTH; |
3647 | 21 | int chunksize = rc = blosc2_decompress_ctx(dctx, src, chunk_cbytes, dest, nbytes); |
3648 | 21 | if (chunksize < 0 || chunksize != chunk_nbytes) { |
3649 | 4 | BLOSC_TRACE_ERROR("Error in decompressing chunk."); |
3650 | 4 | if (chunksize >= 0) |
3651 | 0 | rc = BLOSC2_ERROR_FAILURE; |
3652 | 4 | } |
3653 | 21 | end: |
3654 | 21 | if (needs_free) { |
3655 | 5 | free(src); |
3656 | 5 | } |
3657 | 21 | return rc; |
3658 | 21 | } |