Line data Source code
1 : #include "fd_ssparse.h"
2 :
3 : #include "../../../util/log/fd_log.h"
4 : #include "../../../util/archive/fd_tar.h"
5 : #include "../../../flamenco/runtime/fd_runtime_const.h"
6 : #include "../../../flamenco/runtime/fd_system_ids.h"
7 :
8 : #include <stdlib.h>
9 : #include <errno.h>
10 :
11 0 : #define FD_SSPARSE_STATE_TAR_HEADER (0)
12 0 : #define FD_SSPARSE_STATE_SCROLL_TAR_HEADER (1)
13 0 : #define FD_SSPARSE_STATE_VERSION (2)
14 0 : #define FD_SSPARSE_STATE_MANIFEST (3)
15 0 : #define FD_SSPARSE_STATE_ACCOUNT_HEADER (4)
16 0 : #define FD_SSPARSE_STATE_ACCOUNT_DATA (5)
17 0 : #define FD_SSPARSE_STATE_ACCOUNT_PADDING (6)
18 0 : #define FD_SSPARSE_STATE_STATUS_CACHE (7)
19 0 : #define FD_SSPARSE_STATE_SCROLL_ACCOUNT_GARBAGE (8)
20 : #define FD_SSPARSE_STATE_ACCOUNT_BATCH (9)
21 :
22 : struct fd_ssparse_private {
23 : int state;
24 : uint batch_enabled : 1;
25 :
26 : struct {
27 : int seen_zero_tar_frame;
28 : int seen_manifest;
29 : int seen_status_cache;
30 : int seen_version;
31 : } flags;
32 :
33 : uchar version[ 5UL ];
34 :
35 : struct {
36 : acc_vec_map_t * acc_vec_map;
37 : acc_vec_t * acc_vec_pool;
38 : } manifest;
39 :
40 : struct {
41 : uchar header[ 512UL ];
42 : ulong file_bytes;
43 : ulong file_bytes_consumed;
44 : ulong header_bytes_consumed;
45 : } tar;
46 :
47 : struct {
48 : uchar const * owner;
49 : uchar header[ 136UL ];
50 : ulong header_bytes_consumed;
51 : ulong data_bytes_consumed;
52 : ulong data_len;
53 : } account;
54 :
55 : ulong acc_vec_bytes;
56 : ulong slot;
57 : ulong bytes_consumed;
58 :
59 : ulong seed;
60 : ulong max_acc_vecs;
61 : ulong magic;
62 : };
63 :
64 : FD_FN_CONST ulong
65 0 : fd_ssparse_align( void ) {
66 0 : return fd_ulong_max( alignof(fd_ssparse_t), fd_ulong_max( acc_vec_pool_align(), acc_vec_map_align() ) );
67 0 : }
68 :
69 : FD_FN_CONST ulong
70 0 : fd_ssparse_footprint( ulong max_acc_vecs ) {
71 0 : ulong l = FD_LAYOUT_INIT;
72 0 : l = FD_LAYOUT_APPEND( l, fd_ssparse_align(), sizeof(fd_ssparse_t) );
73 0 : l = FD_LAYOUT_APPEND( l, acc_vec_pool_align(), acc_vec_pool_footprint( max_acc_vecs ) );
74 0 : l = FD_LAYOUT_APPEND( l, acc_vec_map_align(), acc_vec_map_footprint( max_acc_vecs ) );
75 0 : return FD_LAYOUT_FINI( l, fd_ssparse_align() );
76 0 : }
77 :
78 : void *
79 : fd_ssparse_new( void * shmem,
80 : ulong max_acc_vecs,
81 0 : ulong seed ) {
82 0 : if( FD_UNLIKELY( !shmem ) ) {
83 0 : FD_LOG_WARNING(( "NULL shmem" ));
84 0 : return NULL;
85 0 : }
86 :
87 0 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shmem, fd_ssparse_align() ) ) ) {
88 0 : FD_LOG_WARNING(( "unaligned shmem" ));
89 0 : return NULL;
90 0 : }
91 :
92 0 : FD_SCRATCH_ALLOC_INIT( l, shmem );
93 0 : fd_ssparse_t * ssparse = FD_SCRATCH_ALLOC_APPEND( l, fd_ssparse_align(), sizeof(fd_ssparse_t) );
94 0 : void * _acc_vec_pool = FD_SCRATCH_ALLOC_APPEND( l, acc_vec_pool_align(), acc_vec_pool_footprint( max_acc_vecs ) );
95 0 : void * _acc_vec_map = FD_SCRATCH_ALLOC_APPEND( l, acc_vec_map_align(), acc_vec_map_footprint( max_acc_vecs ) );
96 :
97 0 : ssparse->manifest.acc_vec_pool = acc_vec_pool_join( acc_vec_pool_new( _acc_vec_pool, max_acc_vecs ) );
98 0 : FD_TEST( ssparse->manifest.acc_vec_pool );
99 :
100 0 : ssparse->manifest.acc_vec_map = acc_vec_map_join( acc_vec_map_new( _acc_vec_map, max_acc_vecs, seed ) );
101 0 : FD_TEST( ssparse->manifest.acc_vec_map );
102 :
103 0 : ssparse->state = FD_SSPARSE_STATE_TAR_HEADER;
104 0 : fd_memset( &ssparse->flags, 0, sizeof(ssparse->flags) );
105 :
106 0 : ssparse->bytes_consumed = 0UL;
107 0 : ssparse->seed = seed;
108 0 : ssparse->max_acc_vecs = max_acc_vecs;
109 :
110 0 : ssparse->tar.header_bytes_consumed = 0UL;
111 0 : ssparse->tar.file_bytes_consumed = 0UL;
112 0 : ssparse->tar.file_bytes = 0UL;
113 :
114 0 : ssparse->account.owner = NULL;
115 0 : ssparse->account.header_bytes_consumed = 0UL;
116 0 : ssparse->account.data_bytes_consumed = 0UL;
117 0 : ssparse->account.data_len = 0UL;
118 0 : ssparse->acc_vec_bytes = 0UL;
119 :
120 0 : FD_COMPILER_MFENCE();
121 0 : ssparse->magic = FD_SSPARSE_MAGIC;
122 0 : FD_COMPILER_MFENCE();
123 :
124 0 : return (void *)ssparse;
125 0 : }
126 :
127 : fd_ssparse_t *
128 0 : fd_ssparse_join( void * shssparse ) {
129 0 : if( FD_UNLIKELY( !shssparse ) ) {
130 0 : FD_LOG_WARNING(( "NULL shssparse" ));
131 0 : return NULL;
132 0 : }
133 :
134 0 : if( FD_UNLIKELY( !fd_ulong_is_aligned( (ulong)shssparse, fd_ssparse_align() ) ) ) {
135 0 : FD_LOG_WARNING(( "misaligned shssparse" ));
136 0 : return NULL;
137 0 : }
138 :
139 0 : fd_ssparse_t * ssparse = (fd_ssparse_t *)shssparse;
140 :
141 0 : if( FD_UNLIKELY( ssparse->magic!=FD_SSPARSE_MAGIC ) ) {
142 0 : FD_LOG_WARNING(( "bad magic" ));
143 0 : return NULL;
144 0 : }
145 :
146 0 : return ssparse;
147 0 : }
148 :
149 : void
150 0 : fd_ssparse_reset( fd_ssparse_t * ssparse ) {
151 0 : ssparse->state = FD_SSPARSE_STATE_TAR_HEADER;
152 0 : fd_memset( &ssparse->flags, 0, sizeof(ssparse->flags) );
153 0 : ssparse->bytes_consumed = 0UL;
154 :
155 0 : ssparse->tar.header_bytes_consumed = 0UL;
156 0 : ssparse->tar.file_bytes_consumed = 0UL;
157 0 : ssparse->tar.file_bytes = 0UL;
158 :
159 0 : ssparse->account.owner = NULL;
160 0 : ssparse->account.header_bytes_consumed = 0UL;
161 0 : ssparse->account.data_bytes_consumed = 0UL;
162 0 : ssparse->account.data_len = 0UL;
163 0 : ssparse->acc_vec_bytes = 0UL;
164 :
165 0 : acc_vec_map_reset( ssparse->manifest.acc_vec_map );
166 0 : acc_vec_pool_reset( ssparse->manifest.acc_vec_pool );
167 0 : }
168 :
169 : static int
170 : parse_tar_header_name( char const * name,
171 : ulong * id,
172 0 : ulong * slot ) {
173 0 : char name_buf[ FD_TAR_NAME_SZ ];
174 0 : fd_memcpy( name_buf, name, FD_TAR_NAME_SZ );
175 0 : name_buf[ FD_TAR_NAME_SZ-1 ] = '\0';
176 :
177 0 : char const * ptr = name_buf;
178 :
179 0 : if( FD_UNLIKELY( strncmp( ptr, "accounts/", 9UL ) ) ) {
180 0 : *id = ULONG_MAX;
181 0 : *slot = ULONG_MAX;
182 0 : return -1;
183 0 : }
184 :
185 0 : ptr += 9UL;
186 0 : char const * next = strchr( ptr, '.' );
187 0 : if( FD_UNLIKELY( !next ) ) {
188 0 : *id = ULONG_MAX;
189 0 : *slot = ULONG_MAX;
190 0 : return -1;
191 0 : }
192 0 : errno = 0;
193 0 : char * endptr;
194 0 : *slot = strtoul( ptr, &endptr, 10 );
195 0 : if( FD_UNLIKELY( errno==ERANGE || *endptr!='.' || endptr==ptr ) ) {
196 0 : *id = ULONG_MAX;
197 0 : *slot = ULONG_MAX;
198 0 : return -1;
199 0 : }
200 :
201 0 : errno = 0;
202 0 : ptr = next + 1;
203 0 : *id = strtoul( ptr, &endptr, 10 );
204 0 : if( FD_UNLIKELY( errno==ERANGE || *endptr!='\0' || endptr==ptr ) ) {
205 0 : *id = ULONG_MAX;
206 0 : *slot = ULONG_MAX;
207 0 : return -1;
208 0 : }
209 :
210 0 : return 0;
211 0 : }
212 :
213 : static int
214 : advance_tar( fd_ssparse_t * ssparse,
215 : uchar const * data,
216 : ulong data_sz,
217 0 : fd_ssparse_advance_result_t * result ) {
218 0 : ulong consume = fd_ulong_min( data_sz, 512UL - ssparse->tar.header_bytes_consumed );
219 0 : if( FD_UNLIKELY( !consume ) ) {
220 0 : FD_LOG_WARNING(( "unexpected end of data in tar header, data_sz=%lu, header_bytes_consumed=%lu", data_sz, ssparse->tar.header_bytes_consumed ));
221 0 : return FD_SSPARSE_ADVANCE_ERROR;
222 0 : }
223 :
224 0 : fd_memcpy( ssparse->tar.header+ssparse->tar.header_bytes_consumed, data, consume );
225 0 : ssparse->bytes_consumed += consume;
226 0 : result->bytes_consumed = consume;
227 0 : ssparse->tar.header_bytes_consumed += consume;
228 :
229 0 : if( FD_UNLIKELY( ssparse->tar.header_bytes_consumed<512UL ) ) return FD_SSPARSE_ADVANCE_AGAIN;
230 :
231 0 : fd_tar_meta_t const * hdr = (fd_tar_meta_t const *)ssparse->tar.header;
232 0 : ssparse->tar.header_bytes_consumed = 0UL;
233 :
234 : /* "ustar\x00" and "ustar \x00" (overlaps with version) are both
235 : valid values for magic. These are POSIX ustar and OLDGNU versions
236 : respectively. */
237 0 : if( FD_UNLIKELY( memcmp( hdr->magic, FD_TAR_MAGIC, FD_TAR_MAGIC_SZ ) ) ) {
238 0 : int not_zero = 0;
239 0 : for( ulong i=0UL; i<512UL; i++ ) not_zero |= ssparse->tar.header[ i ];
240 0 : if( FD_UNLIKELY( not_zero ) ) {
241 0 : FD_LOG_WARNING(( "invalid tar header magic `%." FD_EXPAND_THEN_STRINGIFY(FD_TAR_MAGIC_SZ) "s`", hdr->magic ));
242 0 : return FD_SSPARSE_ADVANCE_ERROR;
243 0 : }
244 :
245 0 : if( FD_LIKELY( ssparse->flags.seen_zero_tar_frame ) ) {
246 0 : if( FD_UNLIKELY( !ssparse->flags.seen_version || !ssparse->flags.seen_manifest || !ssparse->flags.seen_status_cache ) ) {
247 0 : FD_LOG_WARNING(( "unexpected end of file before version or manifest or status cache" ));
248 0 : return FD_SSPARSE_ADVANCE_ERROR;
249 0 : }
250 :
251 0 : return FD_SSPARSE_ADVANCE_DONE;
252 0 : }
253 :
254 0 : ssparse->flags.seen_zero_tar_frame = 1;
255 0 : return FD_SSPARSE_ADVANCE_AGAIN;
256 0 : }
257 :
258 0 : if( FD_UNLIKELY( ssparse->flags.seen_zero_tar_frame ) ) {
259 0 : FD_LOG_WARNING(( "unexpected valid tar header after zero frame" ));
260 0 : return FD_SSPARSE_ADVANCE_ERROR;
261 0 : }
262 :
263 0 : ssparse->tar.file_bytes = fd_tar_meta_get_size( hdr );
264 0 : if( FD_UNLIKELY( ssparse->tar.file_bytes==ULONG_MAX ) ) {
265 0 : FD_LOG_WARNING(( "invalid tar header size %." FD_EXPAND_THEN_STRINGIFY(FD_TAR_SIZE_SZ) "s "
266 0 : "for tar header name %." FD_EXPAND_THEN_STRINGIFY(FD_TAR_NAME_SZ) "s", hdr->size, hdr->name ));
267 0 : return FD_SSPARSE_ADVANCE_ERROR;
268 0 : }
269 :
270 0 : if( FD_UNLIKELY( hdr->typeflag==FD_TAR_TYPE_DIR ) ) return FD_SSPARSE_ADVANCE_AGAIN;
271 :
272 0 : if( FD_UNLIKELY( !fd_tar_meta_is_reg( hdr ) ) ) {
273 0 : FD_LOG_WARNING(( "invalid tar header type %d", hdr->typeflag ));
274 0 : return FD_SSPARSE_ADVANCE_ERROR;
275 0 : }
276 0 : if( FD_UNLIKELY( !ssparse->tar.file_bytes ) ) {
277 0 : FD_LOG_WARNING(( "invalid tar header size %lu", ssparse->tar.file_bytes ));
278 0 : return FD_SSPARSE_ADVANCE_ERROR;
279 0 : }
280 :
281 : /* TODO: Check every header field here for validity? */
282 :
283 0 : int desired_state;
284 0 : if( FD_LIKELY( !strncmp( hdr->name, "version", 7UL ) ) ) {
285 0 : desired_state = FD_SSPARSE_STATE_VERSION;
286 0 : if( FD_UNLIKELY( ssparse->tar.file_bytes!=5UL ) ) {
287 0 : FD_LOG_WARNING(( "invalid version file size %lu", ssparse->tar.file_bytes ));
288 0 : return FD_SSPARSE_ADVANCE_ERROR;
289 0 : }
290 0 : } else if( FD_LIKELY( !strncmp( hdr->name, "accounts/", 9UL ) ) ) {
291 0 : ssparse->account.header_bytes_consumed = 0UL;
292 0 : desired_state = FD_SSPARSE_STATE_ACCOUNT_HEADER;
293 0 : ulong id, slot;
294 0 : if( FD_UNLIKELY( -1==parse_tar_header_name( hdr->name, &id, &slot ) ) ) {
295 0 : FD_LOG_WARNING(( "invalid account append vec name %." FD_EXPAND_THEN_STRINGIFY(FD_TAR_NAME_SZ) "s", hdr->name ));
296 0 : return FD_SSPARSE_ADVANCE_ERROR;
297 0 : }
298 :
299 0 : acc_vec_key_t key = { .slot = slot, .id = id };
300 0 : acc_vec_t const * acc_vec = acc_vec_map_ele_query_const( ssparse->manifest.acc_vec_map, &key, NULL, ssparse->manifest.acc_vec_pool );
301 0 : if( FD_UNLIKELY( !acc_vec ) ) {
302 0 : FD_LOG_WARNING(( "append vec %lu.%lu not found in manifest", slot, id ));
303 0 : return FD_SSPARSE_ADVANCE_ERROR;
304 0 : }
305 :
306 0 : ssparse->acc_vec_bytes = acc_vec->file_sz;
307 0 : if( FD_UNLIKELY( ssparse->acc_vec_bytes>ssparse->tar.file_bytes ) ) {
308 0 : FD_LOG_WARNING(( "invalid append vec file size %lu > %lu", ssparse->acc_vec_bytes, ssparse->tar.file_bytes ));
309 0 : return FD_SSPARSE_ADVANCE_ERROR;
310 0 : }
311 :
312 0 : ssparse->slot = slot;
313 0 : } else if( FD_LIKELY( !strncmp( hdr->name, "snapshots/status_cache", 22UL ) ) ) desired_state = FD_SSPARSE_STATE_STATUS_CACHE;
314 0 : else if( FD_LIKELY( !strncmp( hdr->name, "snapshots/", 10UL ) ) ) {
315 0 : desired_state = FD_SSPARSE_STATE_MANIFEST;
316 0 : } else {
317 0 : FD_LOG_WARNING(( "unexpected tar header name `%." FD_EXPAND_THEN_STRINGIFY(FD_TAR_NAME_SZ) "s`", hdr->name ));
318 0 : return FD_SSPARSE_ADVANCE_ERROR;
319 0 : }
320 :
321 0 : ssparse->tar.file_bytes_consumed = 0UL;
322 :
323 0 : switch( desired_state ) {
324 0 : case FD_SSPARSE_STATE_VERSION:
325 0 : if( FD_UNLIKELY( ssparse->flags.seen_version ) ) {
326 0 : FD_LOG_WARNING(( "unexpected duplicate version file" ));
327 0 : return FD_SSPARSE_ADVANCE_ERROR;
328 0 : }
329 :
330 0 : ssparse->flags.seen_version = 1;
331 0 : ssparse->state = FD_SSPARSE_STATE_VERSION;
332 0 : break;
333 0 : case FD_SSPARSE_STATE_MANIFEST:
334 0 : if( FD_UNLIKELY( ssparse->flags.seen_manifest ) ) {
335 0 : FD_LOG_WARNING(( "unexpected duplicate manifest file" ));
336 0 : return FD_SSPARSE_ADVANCE_ERROR;
337 0 : }
338 :
339 0 : ssparse->flags.seen_manifest = 1;
340 0 : ssparse->state = FD_SSPARSE_STATE_MANIFEST;
341 0 : break;
342 0 : case FD_SSPARSE_STATE_ACCOUNT_HEADER:
343 0 : if( FD_UNLIKELY( !ssparse->flags.seen_manifest ) ) {
344 0 : FD_LOG_WARNING(( "unexpected account append vec file before manifest" ));
345 0 : return FD_SSPARSE_ADVANCE_ERROR;
346 0 : }
347 :
348 0 : ssparse->account.header_bytes_consumed = 0UL;
349 0 : ssparse->state = FD_SSPARSE_STATE_ACCOUNT_HEADER;
350 0 : break;
351 0 : case FD_SSPARSE_STATE_STATUS_CACHE:
352 0 : if( FD_UNLIKELY( ssparse->flags.seen_status_cache ) ) {
353 0 : FD_LOG_WARNING(( "unexpected status cache file" ));
354 0 : return FD_SSPARSE_ADVANCE_ERROR;
355 0 : }
356 :
357 0 : ssparse->flags.seen_status_cache = 1;
358 0 : ssparse->state = FD_SSPARSE_STATE_STATUS_CACHE;
359 0 : break;
360 0 : default:
361 0 : FD_LOG_ERR(( "unexpected tar header desired state %d", desired_state ));
362 0 : break;
363 0 : }
364 :
365 0 : return FD_SSPARSE_ADVANCE_AGAIN;
366 0 : }
367 :
368 : static int
369 : advance_version( fd_ssparse_t * ssparse,
370 : uchar const * data,
371 : ulong data_sz,
372 0 : fd_ssparse_advance_result_t * result ) {
373 0 : ulong consume = fd_ulong_min( data_sz, ssparse->tar.file_bytes-ssparse->tar.file_bytes_consumed );
374 0 : if( FD_UNLIKELY( !consume ) ) {
375 0 : FD_LOG_WARNING(( "unexpected end of data while parsing version file, data_sz=%lu, file_bytes_consumed=%lu, file_bytes=%lu", data_sz, ssparse->tar.file_bytes_consumed, ssparse->tar.file_bytes ));
376 0 : return FD_SSPARSE_ADVANCE_ERROR;
377 0 : }
378 :
379 0 : fd_memcpy( ssparse->version+ssparse->tar.file_bytes_consumed, data, consume );
380 :
381 0 : ssparse->tar.file_bytes_consumed += consume;
382 0 : ssparse->bytes_consumed += consume;
383 0 : result->bytes_consumed = consume;
384 :
385 0 : if( FD_LIKELY( ssparse->tar.file_bytes_consumed<ssparse->tar.file_bytes ) ) return FD_SSPARSE_ADVANCE_AGAIN;
386 :
387 0 : FD_TEST( ssparse->tar.file_bytes_consumed==ssparse->tar.file_bytes );
388 0 : FD_TEST( ssparse->tar.file_bytes_consumed==5UL );
389 :
390 0 : if( FD_UNLIKELY( memcmp( ssparse->version, "1.2.0", 5UL ) ) ) {
391 0 : FD_LOG_WARNING(( "invalid version file %.*s", 5, ssparse->version ));
392 0 : return FD_SSPARSE_ADVANCE_ERROR;
393 0 : }
394 :
395 0 : ssparse->state = FD_SSPARSE_STATE_SCROLL_TAR_HEADER;
396 0 : return FD_SSPARSE_ADVANCE_AGAIN;
397 0 : }
398 :
399 : static int
400 : advance_status_cache( fd_ssparse_t * ssparse,
401 : uchar const * data,
402 : ulong data_sz,
403 0 : fd_ssparse_advance_result_t * result ) {
404 0 : ulong consume = fd_ulong_min( data_sz, ssparse->tar.file_bytes-ssparse->tar.file_bytes_consumed );
405 0 : if( FD_UNLIKELY( !consume ) ) {
406 0 : FD_LOG_WARNING(( "unexpected end of data while parsing status cache, data_sz=%lu, file_bytes_consumed=%lu, file_bytes=%lu", data_sz, ssparse->tar.file_bytes_consumed, ssparse->tar.file_bytes ));
407 0 : return FD_SSPARSE_ADVANCE_ERROR;
408 0 : }
409 :
410 0 : ssparse->tar.file_bytes_consumed += consume;
411 0 : ssparse->bytes_consumed += consume;
412 :
413 0 : result->bytes_consumed = consume;
414 0 : result->status_cache.data = data;
415 0 : result->status_cache.data_sz = consume;
416 :
417 0 : if( FD_LIKELY( ssparse->tar.file_bytes_consumed<ssparse->tar.file_bytes ) ) {
418 0 : return FD_SSPARSE_ADVANCE_STATUS_CACHE;
419 0 : }
420 0 : else { /* ssparse->tar.file_bytes_consumed==ssparse->tar.file_bytes */
421 : /* finished parsing status cache */
422 0 : ssparse->state = FD_SSPARSE_STATE_SCROLL_TAR_HEADER;
423 0 : return FD_SSPARSE_ADVANCE_STATUS_CACHE;
424 0 : }
425 0 : }
426 :
427 : static int
428 : advance_manifest( fd_ssparse_t * ssparse,
429 : uchar const * data,
430 : ulong data_sz,
431 0 : fd_ssparse_advance_result_t * result ) {
432 0 : ulong consume = fd_ulong_min( data_sz, ssparse->tar.file_bytes-ssparse->tar.file_bytes_consumed );
433 0 : if( FD_UNLIKELY( !consume ) ) {
434 0 : FD_LOG_WARNING(( "unexpected end of data while parsing manifest, data_sz=%lu, file_bytes_consumed=%lu, file_bytes=%lu", data_sz, ssparse->tar.file_bytes_consumed, ssparse->tar.file_bytes ));
435 0 : return FD_SSPARSE_ADVANCE_ERROR;
436 0 : }
437 :
438 0 : ssparse->tar.file_bytes_consumed += consume;
439 0 : ssparse->bytes_consumed += consume;
440 :
441 0 : result->bytes_consumed = consume;
442 0 : result->manifest.data = data;
443 0 : result->manifest.data_sz = consume;
444 0 : result->manifest.acc_vec_map = ssparse->manifest.acc_vec_map;
445 0 : result->manifest.acc_vec_pool = ssparse->manifest.acc_vec_pool;
446 :
447 0 : if( FD_LIKELY( ssparse->tar.file_bytes_consumed<ssparse->tar.file_bytes ) ) {
448 0 : return FD_SSPARSE_ADVANCE_MANIFEST;
449 0 : }
450 0 : else { /* ssparse->tar.file_bytes_consumed==ssparse->tar.file_bytes */
451 : /* finished parsing manifest */
452 0 : ssparse->state = FD_SSPARSE_STATE_SCROLL_TAR_HEADER;
453 0 : return FD_SSPARSE_ADVANCE_MANIFEST;
454 0 : }
455 0 : }
456 :
457 : static int
458 : advance_next_tar( fd_ssparse_t * ssparse,
459 : uchar const * data,
460 : ulong data_sz,
461 0 : fd_ssparse_advance_result_t * result ) {
462 0 : (void)data;
463 : /* skip padding */
464 0 : ulong bytes_remaining = fd_ulong_align_up( ssparse->bytes_consumed, 512UL ) - ssparse->bytes_consumed;
465 0 : ulong pad_sz = bytes_remaining;
466 0 : pad_sz = fd_ulong_min( pad_sz, data_sz );
467 0 : if( FD_UNLIKELY( !pad_sz && bytes_remaining ) ) {
468 0 : FD_LOG_WARNING(( "unexpected end of data while parsing tar header padding, data_sz=%lu, bytes_consumed=%lu, bytes_remaining=%lu", data_sz, ssparse->bytes_consumed, bytes_remaining ));
469 0 : return FD_SSPARSE_ADVANCE_ERROR;
470 0 : }
471 :
472 0 : ssparse->bytes_consumed += pad_sz;
473 0 : result->bytes_consumed = pad_sz;
474 0 : bytes_remaining -= pad_sz;
475 :
476 0 : if( FD_LIKELY( !bytes_remaining ) ) ssparse->state = FD_SSPARSE_STATE_TAR_HEADER;
477 0 : return FD_SSPARSE_ADVANCE_AGAIN;
478 0 : }
479 :
480 : static int
481 : advance_account_batch( fd_ssparse_t * ssparse,
482 : uchar const * data,
483 : ulong data_sz,
484 0 : fd_ssparse_advance_result_t * result ) {
485 : /* Cannot create a batch unless the parser is aligned to an account. */
486 0 : if( FD_UNLIKELY( ssparse->account.header_bytes_consumed ) ) return FD_SSPARSE_ADVANCE_AGAIN;
487 :
488 : /* Each account is at least 136 bytes large. Don't attempt to create
489 : a batch unless at least 4 accounts fit. */
490 0 : ulong avail = fd_ulong_min( data_sz, ssparse->acc_vec_bytes - ssparse->tar.file_bytes_consumed );
491 0 : if( FD_UNLIKELY( avail<(4*136UL) ) ) return FD_SSPARSE_ADVANCE_AGAIN;
492 :
493 : /* Skip over accounts until we reached EOF or batch is full */
494 0 : result->account_batch.batch_cnt = 0;
495 0 : ulong off = 0UL;
496 0 : for( ulong idx=0UL; idx<FD_SSPARSE_ACC_BATCH_MAX && off+136UL<=avail; idx++ ) {
497 0 : uchar const * acc_hdr = (uchar *)data+off;
498 :
499 : /* We want ConfigProgram accounts to go through the slow path,
500 : since they are published from there to consumers for monitoring. */
501 0 : if( FD_UNLIKELY( !memcmp( acc_hdr+64UL, fd_solana_config_program_id.key, sizeof(fd_hash_t) ) ) ) {
502 0 : if( FD_UNLIKELY( idx==0UL ) ) return FD_SSPARSE_ADVANCE_AGAIN; /* At the front of the batch, abort */
503 0 : else break; /* otherwise, break early. */
504 0 : }
505 :
506 0 : ulong acc_data_sz = fd_ulong_load_8_fast( acc_hdr+8 );
507 0 : ulong next_off = off+136UL+acc_data_sz;
508 0 : ulong pad_sz = fd_ulong_align_up( ssparse->tar.file_bytes_consumed+next_off, 8UL ) -
509 0 : ( ssparse->tar.file_bytes_consumed+next_off );
510 0 : next_off += pad_sz;
511 0 : if( FD_UNLIKELY( next_off>avail ) ) break; /* account is fragmented */
512 0 : if( FD_UNLIKELY( acc_data_sz > (24UL<<20) ) ) {
513 0 : FD_LOG_ERR(( "invalid account data size %lu", acc_data_sz ));
514 0 : }
515 0 : result->account_batch.batch_cnt = idx+1UL;
516 0 : result->account_batch.batch[ idx ] = acc_hdr;
517 0 : ssparse->account.header_bytes_consumed = 136UL;
518 0 : ssparse->account.data_bytes_consumed = acc_data_sz;
519 0 : ssparse->account.data_len = acc_data_sz;
520 0 : off = next_off;
521 0 : }
522 :
523 : /* Not worth batching if current chunk contains too few accounts. */
524 0 : if( FD_UNLIKELY( result->account_batch.batch_cnt!=FD_SSPARSE_ACC_BATCH_MAX ) ) {
525 0 : return FD_SSPARSE_ADVANCE_AGAIN;
526 0 : }
527 :
528 0 : ssparse->tar.file_bytes_consumed += off;
529 0 : ssparse->bytes_consumed += off;
530 0 : result->bytes_consumed = off;
531 :
532 : /* reset state */
533 :
534 0 : ssparse->state = FD_SSPARSE_STATE_ACCOUNT_PADDING;
535 :
536 0 : result->account_batch.slot = ssparse->slot;
537 :
538 0 : return FD_SSPARSE_ADVANCE_ACCOUNT_BATCH;
539 0 : }
540 :
541 : static int
542 : advance_account_header( fd_ssparse_t * ssparse,
543 : uchar const * data,
544 : ulong data_sz,
545 0 : fd_ssparse_advance_result_t * result ) {
546 0 : ulong consume = fd_ulong_min( 136UL-ssparse->account.header_bytes_consumed, fd_ulong_min( data_sz, ssparse->acc_vec_bytes-ssparse->tar.file_bytes_consumed ) );
547 :
548 0 : if( FD_UNLIKELY( !consume ) ) {
549 0 : if( FD_LIKELY( ssparse->tar.file_bytes_consumed==ssparse->acc_vec_bytes ) ) {
550 0 : ssparse->state = FD_SSPARSE_STATE_SCROLL_ACCOUNT_GARBAGE;
551 0 : return FD_SSPARSE_ADVANCE_AGAIN;
552 0 : } else {
553 0 : FD_LOG_WARNING(( "unexpected end of data while advancing account header, data_sz=%lu, file_bytes_consumed=%lu, acc_vec_bytes=%lu", data_sz, ssparse->tar.file_bytes_consumed, ssparse->acc_vec_bytes ));
554 0 : return FD_SSPARSE_ADVANCE_ERROR;
555 0 : }
556 0 : }
557 :
558 0 : if( FD_UNLIKELY( consume<136UL ) ) {
559 0 : fd_memcpy( ssparse->account.header+ssparse->account.header_bytes_consumed, data, consume );
560 0 : } else if( ssparse->batch_enabled ) {
561 : /* fast path */
562 0 : int res = advance_account_batch( ssparse, data, data_sz, result );
563 0 : if( res==FD_SSPARSE_ADVANCE_ACCOUNT_BATCH ) return res;
564 : /* fall through and continue processing account header */
565 0 : }
566 :
567 0 : ssparse->account.header_bytes_consumed += consume;
568 0 : ssparse->tar.file_bytes_consumed += consume;
569 0 : ssparse->bytes_consumed += consume;
570 0 : result->bytes_consumed = consume;
571 :
572 0 : if( FD_UNLIKELY( ssparse->account.header_bytes_consumed<136UL ) ) return FD_SSPARSE_ADVANCE_AGAIN;
573 :
574 0 : uchar const * hdr = ssparse->account.header;
575 0 : if( FD_LIKELY( consume==136UL ) ) hdr = data;
576 :
577 0 : result->account_header.data_len = fd_ulong_load_8_fast( hdr+8UL );
578 0 : if( FD_UNLIKELY( result->account_header.data_len>FD_RUNTIME_ACC_SZ_MAX ) ) {
579 0 : FD_LOG_WARNING(( "invalid account header data length %lu", result->account_header.data_len ));
580 0 : return FD_SSPARSE_ADVANCE_ERROR;
581 0 : }
582 :
583 0 : result->account_header.pubkey = hdr+16UL;
584 0 : result->account_header.lamports = fd_ulong_load_8_fast( hdr+48UL );
585 0 : result->account_header.rent_epoch = fd_ulong_load_8_fast( hdr+56UL );
586 0 : result->account_header.owner = hdr+64UL;
587 0 : result->account_header.executable = hdr[ 96UL ];
588 0 : if( FD_UNLIKELY( result->account_header.executable>1 ) ) {
589 0 : char pubkey_str[ FD_BASE58_ENCODED_32_SZ ];
590 0 : fd_base58_encode_32( result->account_header.pubkey, NULL, pubkey_str );
591 0 : FD_LOG_WARNING(( "invalid account header executable %d for account %s", result->account_header.executable, pubkey_str ));
592 0 : return FD_SSPARSE_ADVANCE_ERROR;
593 0 : }
594 0 : result->account_header.hash = hdr+104UL;
595 0 : result->account_header.slot = ssparse->slot;
596 :
597 0 : ssparse->account.owner = hdr+64UL;
598 0 : ssparse->account.data_len = result->account_header.data_len;
599 0 : ssparse->account.data_bytes_consumed = 0UL;
600 0 : ssparse->state = FD_SSPARSE_STATE_ACCOUNT_DATA;
601 :
602 0 : return FD_SSPARSE_ADVANCE_ACCOUNT_HEADER;
603 0 : }
604 :
605 : static int
606 : advance_account_data( fd_ssparse_t * ssparse,
607 : uchar const * data,
608 : ulong data_sz,
609 0 : fd_ssparse_advance_result_t * result ) {
610 0 : if( FD_UNLIKELY( ssparse->account.data_bytes_consumed==ssparse->account.data_len ) ) {
611 0 : ssparse->state = FD_SSPARSE_STATE_ACCOUNT_PADDING;
612 0 : return FD_SSPARSE_ADVANCE_AGAIN;
613 0 : }
614 :
615 0 : ulong consume = fd_ulong_min( data_sz, ssparse->acc_vec_bytes-ssparse->tar.file_bytes_consumed );
616 0 : if( FD_UNLIKELY( !consume ) ) {
617 0 : FD_LOG_WARNING(( "account data extends beyond append vec size" ));
618 0 : return FD_SSPARSE_ADVANCE_ERROR;
619 0 : }
620 :
621 0 : consume = fd_ulong_min( consume, ssparse->account.data_len-ssparse->account.data_bytes_consumed );
622 0 : if( FD_UNLIKELY( !consume ) ) {
623 0 : FD_LOG_WARNING(( "unexpected end of data while parsing account data, data_sz=%lu, data_bytes_consumed=%lu, data_len=%lu", data_sz, ssparse->account.data_bytes_consumed, ssparse->account.data_len ));
624 0 : return FD_SSPARSE_ADVANCE_ERROR;
625 0 : }
626 :
627 0 : ssparse->tar.file_bytes_consumed += consume;
628 0 : ssparse->bytes_consumed += consume;
629 0 : ssparse->account.data_bytes_consumed += consume;
630 0 : result->bytes_consumed = consume;
631 :
632 0 : result->account_data.owner = ssparse->account.owner;
633 0 : result->account_data.data_sz = consume;
634 0 : result->account_data.data = data;
635 :
636 0 : FD_TEST( ssparse->account.data_bytes_consumed<=ssparse->account.data_len );
637 0 : if( FD_LIKELY( ssparse->account.data_bytes_consumed==ssparse->account.data_len ) ) {
638 0 : ssparse->state = FD_SSPARSE_STATE_ACCOUNT_PADDING;
639 0 : }
640 :
641 0 : return FD_SSPARSE_ADVANCE_ACCOUNT_DATA;
642 0 : }
643 :
644 : static int
645 : advance_account_padding( fd_ssparse_t * ssparse,
646 : uchar const * data,
647 : ulong data_sz,
648 0 : fd_ssparse_advance_result_t * result ) {
649 0 : (void)data;
650 :
651 0 : ulong pad_sz = fd_ulong_align_up( ssparse->tar.file_bytes_consumed, 8UL ) - ssparse->tar.file_bytes_consumed;
652 0 : pad_sz = fd_ulong_min( pad_sz, ssparse->acc_vec_bytes - ssparse->tar.file_bytes_consumed );
653 0 : if( FD_UNLIKELY( !pad_sz ) ) {
654 0 : if( FD_LIKELY( ssparse->tar.file_bytes_consumed==ssparse->acc_vec_bytes ) ) ssparse->state = FD_SSPARSE_STATE_SCROLL_TAR_HEADER;
655 0 : else ssparse->state = FD_SSPARSE_STATE_ACCOUNT_HEADER;
656 :
657 0 : ssparse->account.header_bytes_consumed = 0UL;
658 0 : return FD_SSPARSE_ADVANCE_AGAIN;
659 0 : }
660 :
661 0 : ulong consume = fd_ulong_min( data_sz, pad_sz );
662 0 : if( FD_UNLIKELY( !consume ) ) {
663 0 : FD_LOG_WARNING(( "unexpected end of data while parsing account padding, data_sz=%lu, file_bytes_consumed=%lu, acc_vec_bytes=%lu", data_sz, ssparse->tar.file_bytes_consumed, ssparse->acc_vec_bytes ));
664 0 : return FD_SSPARSE_ADVANCE_ERROR;
665 0 : }
666 :
667 0 : ssparse->tar.file_bytes_consumed += consume;
668 0 : ssparse->bytes_consumed += consume;
669 0 : result->bytes_consumed = consume;
670 :
671 0 : ulong remaining = fd_ulong_align_up( ssparse->tar.file_bytes_consumed, 8UL ) - ssparse->tar.file_bytes_consumed;
672 0 : if( FD_LIKELY( !remaining ) ) {
673 0 : ssparse->account.header_bytes_consumed = 0UL;
674 0 : ssparse->state = FD_SSPARSE_STATE_ACCOUNT_HEADER;
675 0 : }
676 0 : return FD_SSPARSE_ADVANCE_AGAIN;
677 0 : }
678 :
679 : static int
680 : advance_account_garbage( fd_ssparse_t * ssparse,
681 : uchar const * data,
682 : ulong data_sz,
683 0 : fd_ssparse_advance_result_t * result ) {
684 0 : (void)data;
685 0 : ulong rem = ssparse->tar.file_bytes-ssparse->tar.file_bytes_consumed;
686 0 : if( FD_UNLIKELY( !rem ) ) {
687 0 : ssparse->state = FD_SSPARSE_STATE_SCROLL_TAR_HEADER;
688 0 : return FD_SSPARSE_ADVANCE_AGAIN;
689 0 : }
690 :
691 0 : if( FD_UNLIKELY( !data_sz ) ) {
692 0 : FD_LOG_WARNING(( "unexpected end of data while parsing append vec garbage, data_sz=%lu, remaining_bytes=%lu", data_sz, rem ));
693 0 : return FD_SSPARSE_ADVANCE_ERROR;
694 0 : }
695 :
696 0 : ulong consume = fd_ulong_min( data_sz, rem );
697 0 : ssparse->tar.file_bytes_consumed += consume;
698 0 : ssparse->bytes_consumed += consume;
699 0 : result->bytes_consumed = consume;
700 :
701 0 : if( FD_LIKELY( ssparse->tar.file_bytes_consumed<ssparse->tar.file_bytes ) ) return FD_SSPARSE_ADVANCE_AGAIN;
702 :
703 0 : ssparse->state = FD_SSPARSE_STATE_SCROLL_TAR_HEADER;
704 0 : return FD_SSPARSE_ADVANCE_AGAIN;
705 0 : }
706 :
707 : int
708 : fd_ssparse_advance( fd_ssparse_t * ssparse,
709 : uchar const * data,
710 : ulong data_sz,
711 0 : fd_ssparse_advance_result_t * result ) {
712 0 : result->bytes_consumed = 0UL;
713 :
714 0 : switch( ssparse->state ) {
715 0 : case FD_SSPARSE_STATE_TAR_HEADER: return advance_tar( ssparse, data, data_sz, result );
716 0 : case FD_SSPARSE_STATE_SCROLL_TAR_HEADER: return advance_next_tar( ssparse, data, data_sz, result );
717 0 : case FD_SSPARSE_STATE_VERSION: return advance_version( ssparse, data, data_sz, result );
718 0 : case FD_SSPARSE_STATE_MANIFEST: return advance_manifest( ssparse, data, data_sz, result );
719 0 : case FD_SSPARSE_STATE_ACCOUNT_HEADER: return advance_account_header( ssparse, data, data_sz, result );
720 0 : case FD_SSPARSE_STATE_ACCOUNT_DATA: return advance_account_data( ssparse, data, data_sz, result );
721 0 : case FD_SSPARSE_STATE_ACCOUNT_PADDING: return advance_account_padding( ssparse, data, data_sz, result );
722 0 : case FD_SSPARSE_STATE_STATUS_CACHE: return advance_status_cache( ssparse, data, data_sz, result );
723 0 : case FD_SSPARSE_STATE_SCROLL_ACCOUNT_GARBAGE: return advance_account_garbage( ssparse, data, data_sz, result );
724 0 : default: FD_LOG_ERR(( "invalid state %d", ssparse->state ));
725 0 : }
726 0 : }
727 :
728 : void
729 : fd_ssparse_batch_enable( fd_ssparse_t * ssparse,
730 0 : int enabled ) {
731 0 : ssparse->batch_enabled = !!enabled;
732 0 : }
733 :
734 : int
735 : fd_ssparse_populate_acc_vec_map( fd_ssparse_t * ssparse,
736 : ulong * slots,
737 : ulong * ids,
738 : ulong * file_szs,
739 0 : ulong cnt ) {
740 0 : for( ulong i=0UL; i<cnt; i++ ) {
741 0 : acc_vec_key_t key = { .slot=slots[ i ], .id=ids[ i ] };
742 0 : if( FD_UNLIKELY( acc_vec_map_ele_query( ssparse->manifest.acc_vec_map, &key, NULL, ssparse->manifest.acc_vec_pool ) ) ) return -1;
743 0 : acc_vec_t * acc_vec = acc_vec_pool_ele_acquire( ssparse->manifest.acc_vec_pool );
744 0 : acc_vec->key.id = ids[ i ];
745 0 : acc_vec->key.slot = slots[ i ];
746 0 : acc_vec->file_sz = file_szs[ i ];
747 0 : acc_vec_map_ele_insert( ssparse->manifest.acc_vec_map, acc_vec, ssparse->manifest.acc_vec_pool );
748 0 : }
749 0 : return 0;
750 0 : }
|