/src/netcdf-c/libnczarr/zutil.c
Line | Count | Source |
1 | | /********************************************************************* |
2 | | * Copyright 2018, UCAR/Unidata |
3 | | * See netcdf/COPYRIGHT file for copying and redistribution conditions. |
4 | | *********************************************************************/ |
5 | | |
6 | | /** |
7 | | * @file |
8 | | * @internal Misc. utility code |
9 | | * |
10 | | * @author Dennis Heimbigner |
11 | | */ |
12 | | |
13 | | #include "zincludes.h" |
14 | | #include <stddef.h> |
15 | | |
16 | | #undef DEBUG |
17 | | |
18 | | /**************************************************/ |
19 | | /* Static zarr type name table */ |
20 | | |
21 | | /* Table of nc_type X {Zarr,NCZarr} X endianness |
22 | | Issue: Need to distinquish NC_STRING && MAXSTRLEN==1 from NC_CHAR |
23 | | in a way that allows other Zarr implementations to read the data. |
24 | | |
25 | | Available info: |
26 | | Write: we have the netcdf type, so there is no ambiguity. |
27 | | Read: we have the variable type and also any attribute dtype, |
28 | | but those types are ambiguous. |
29 | | We also have the attribute vs variable type problem. |
30 | | For pure zarr, we have to infer the type of an attribute, |
31 | | so if we have "var:strattr = \"abcdef\"", then we need |
32 | | to decide how to infer the type: NC_STRING vs NC_CHAR. |
33 | | |
34 | | Solution: |
35 | | For variables and for NCZarr type attributes, distinquish by using: |
36 | | * ">S1" for NC_CHAR. |
37 | | * "|S1" for NC_STRING && MAXSTRLEN==1 |
38 | | * "|Sn" for NC_STRING && MAXSTRLEN==n |
39 | | This is admittedly a bit of a hack, and the first case in particular |
40 | | will probably cause errors in some other Zarr implementations; the Zarr |
41 | | spec is unclear about what combinations are legal. |
42 | | Note that we could use "|U1", but since this is utf-16 or utf-32 |
43 | | in python, it may cause problems when reading what amounts to utf-8. |
44 | | |
45 | | For attributes, we infer: |
46 | | * NC_CHAR if the hint is 0 |
47 | | - e.g. var:strattr = 'abcdef'" => NC_CHAR |
48 | | * NC_STRING if hint is NC_STRING. |
49 | | - e.g. string var:strattr = \"abc\", \"def\"" => NC_STRING |
50 | | |
51 | | Note also that if we read a pure zarr file we will probably always |
52 | | see "|S1", so we will never see a variable of type NC_CHAR. |
53 | | We might however see an attribute of type string. |
54 | | */ |
55 | | static const struct ZTYPES { |
56 | | char* zarr[3]; |
57 | | char* nczarr[3]; |
58 | | } znames[NUM_ATOMIC_TYPES] = { |
59 | | /* nc_type Pure Zarr NCZarr |
60 | | NE LE BE NE LE BE*/ |
61 | | /*NC_NAT*/ {{NULL,NULL,NULL}, {NULL,NULL,NULL}}, |
62 | | /*NC_BYTE*/ {{"|i1","<i1",">i1"},{"|i1","<i1",">i1"}}, |
63 | | /*NC_CHAR*/ {{">S1",">S1",">S1"},{">S1",">S1",">S1"}}, |
64 | | /*NC_SHORT*/ {{"|i2","<i2",">i2"},{"|i2","<i2",">i2"}}, |
65 | | /*NC_INT*/ {{"|i4","<i4",">i4"},{"|i4","<i4",">i4"}}, |
66 | | /*NC_FLOAT*/ {{"|f4","<f4",">f4"},{"|f4","<f4",">f4"}}, |
67 | | /*NC_DOUBLE*/ {{"|f8","<f8",">f8"},{"|f8","<f8",">f8"}}, |
68 | | /*NC_UBYTE*/ {{"|u1","<u1",">u1"},{"|u1","<u1",">u1"}}, |
69 | | /*NC_USHORT*/ {{"|u2","<u2",">u2"},{"|u2","<u2",">u2"}}, |
70 | | /*NC_UINT*/ {{"|u4","<u4",">u4"},{"|u4","<u4",">u4"}}, |
71 | | /*NC_INT64*/ {{"|i8","<i8",">i8"},{"|i8","<i8",">i8"}}, |
72 | | /*NC_UINT64*/ {{"|u8","<u8",">u8"},{"|u8","<u8",">u8"}}, |
73 | | /*NC_STRING*/ {{"|S%d","|S%d","|S%d"},{"|S%d","|S%d","|S%d"}}, |
74 | | }; |
75 | | |
76 | | #if 0 |
77 | | static const char* zfillvalue[NUM_ATOMIC_TYPES] = { |
78 | | NULL, /*NC_NAT*/ |
79 | | "-127", /*NC_BYTE*/ |
80 | | "0", /*NC_CHAR*/ |
81 | | "-32767", /*NC_SHORT*/ |
82 | | "-2147483647", /*NC_INT*/ |
83 | | "9.9692099683868690e+36f", /* near 15 * 2^119 */ /*NC_FLOAT*/ |
84 | | "9.9692099683868690e+36", /*NC_DOUBLE*/ |
85 | | "255", /*NC_UBYTE*/ |
86 | | "65535", /*NC_USHORT*/ |
87 | | "4294967295", /*NC_UINT*/ |
88 | | "-9223372036854775806", /*NC_INT64*/ |
89 | | "18446744073709551614", /*NC_UINT64*/ |
90 | | "", /*NC_STRING*/ |
91 | | }; |
92 | | #endif |
93 | | |
94 | | /* map nc_type -> NCJ_SORT */ |
95 | | static int zjsonsort[NUM_ATOMIC_TYPES] = { |
96 | | NCJ_UNDEF, /*NC_NAT*/ |
97 | | NCJ_INT, /*NC_BYTE*/ |
98 | | NCJ_INT, /*NC_CHAR*/ |
99 | | NCJ_INT, /*NC_SHORT*/ |
100 | | NCJ_INT, /*NC_INT*/ |
101 | | NCJ_DOUBLE, /*NC_FLOAT*/ |
102 | | NCJ_DOUBLE, /*NC_DOUBLE*/ |
103 | | NCJ_INT, /*NC_UBYTE*/ |
104 | | NCJ_INT, /*NC_USHORT*/ |
105 | | NCJ_INT, /*NC_UINT*/ |
106 | | NCJ_INT, /*NC_INT64*/ |
107 | | NCJ_INT, /*NC_UINT64*/ |
108 | | NCJ_STRING, /*NC_STRING*/ |
109 | | }; |
110 | | |
111 | | /* Forward */ |
112 | | |
113 | | /**************************************************/ |
114 | | |
115 | | /** |
116 | | @internal Get key for a group |
117 | | @param grp - [in] group |
118 | | @param pathp - [out] full path |
119 | | @return NC_NOERR |
120 | | @author Dennis Heimbigner |
121 | | */ |
122 | | int |
123 | | NCZ_grpkey(const NC_GRP_INFO_T* grp, char** pathp) |
124 | 0 | { |
125 | 0 | int stat = NC_NOERR; |
126 | 0 | NClist* segments = nclistnew(); |
127 | 0 | NCbytes* path = NULL; |
128 | 0 | NC_GRP_INFO_T* parent = NULL; |
129 | 0 | size_t i; |
130 | |
|
131 | 0 | nclistinsert(segments,0,(void*)grp); |
132 | 0 | parent = grp->parent; |
133 | 0 | while(parent != NULL) { |
134 | 0 | nclistinsert(segments,0,parent); |
135 | 0 | parent = parent->parent; |
136 | 0 | } |
137 | 0 | path = ncbytesnew(); |
138 | 0 | for(i=0;i<nclistlength(segments);i++) { |
139 | 0 | grp = nclistget(segments,i); |
140 | 0 | if(i > 1) ncbytescat(path,"/"); /* Assume root is named "/" */ |
141 | 0 | ncbytescat(path,grp->hdr.name); |
142 | 0 | } |
143 | 0 | if(pathp) *pathp = ncbytesextract(path); |
144 | |
|
145 | 0 | nclistfree(segments); |
146 | 0 | ncbytesfree(path); |
147 | 0 | return stat; |
148 | |
|
149 | 0 | } |
150 | | |
151 | | /** |
152 | | @internal Get key for a var |
153 | | @param var - [in] var |
154 | | @param pathp - [out] full path |
155 | | @return NC_NOERR |
156 | | @author Dennis Heimbigner |
157 | | */ |
158 | | int |
159 | | NCZ_varkey(const NC_VAR_INFO_T* var, char** pathp) |
160 | 0 | { |
161 | 0 | int stat = NC_NOERR; |
162 | 0 | char* grppath = NULL; |
163 | 0 | char* varpath = NULL; |
164 | | |
165 | | /* Start by creating the full path for the parent group */ |
166 | 0 | if((stat = NCZ_grpkey(var->container,&grppath))) |
167 | 0 | goto done; |
168 | | /* Create the suffix path using the var name */ |
169 | 0 | if((stat = nczm_concat(grppath,var->hdr.name,&varpath))) |
170 | 0 | goto done; |
171 | | /* return path */ |
172 | 0 | if(pathp) {*pathp = varpath; varpath = NULL;} |
173 | |
|
174 | 0 | done: |
175 | 0 | nullfree(grppath); |
176 | 0 | nullfree(varpath); |
177 | 0 | return stat; |
178 | 0 | } |
179 | | |
180 | | /** |
181 | | @internal Get key for a dimension |
182 | | @param dim - [in] dim |
183 | | @param pathp - [out] full path |
184 | | @return NC_NOERR |
185 | | @author Dennis Heimbigner |
186 | | */ |
187 | | int |
188 | | NCZ_dimkey(const NC_DIM_INFO_T* dim, char** pathp) |
189 | 0 | { |
190 | 0 | int stat = NC_NOERR; |
191 | 0 | char* grppath = NULL; |
192 | 0 | char* dimpath = NULL; |
193 | | |
194 | | /* Start by creating the full path for the parent group */ |
195 | 0 | if((stat = NCZ_grpkey(dim->container,&grppath))) |
196 | 0 | goto done; |
197 | | /* Create the suffix path using the dim name */ |
198 | 0 | if((stat = nczm_concat(grppath,dim->hdr.name,&dimpath))) |
199 | 0 | goto done; |
200 | | /* return path */ |
201 | 0 | if(pathp) {*pathp = dimpath; dimpath = NULL;} |
202 | |
|
203 | 0 | done: |
204 | 0 | nullfree(grppath); |
205 | 0 | nullfree(dimpath); |
206 | 0 | return stat; |
207 | 0 | } |
208 | | |
209 | | /** |
210 | | @internal Split a key into pieces along '/' character; elide any leading '/' |
211 | | @param key - [in] |
212 | | @param segments - [out] split path |
213 | | @return NC_NOERR |
214 | | @author Dennis Heimbigner |
215 | | */ |
216 | | int |
217 | | ncz_splitkey(const char* key, NClist* segments) |
218 | 0 | { |
219 | 0 | return nczm_split(key,segments); |
220 | 0 | } |
221 | | |
222 | | /**************************************************/ |
223 | | /* Json sync code */ |
224 | | |
225 | | /** |
226 | | @internal Down load a .z... structure into memory |
227 | | @param zmap - [in] controlling zarr map |
228 | | @param key - [in] .z... object to load |
229 | | @param jsonp - [out] root of the loaded json (NULL if key does not exist) |
230 | | @return NC_NOERR |
231 | | @return NC_EXXX |
232 | | @author Dennis Heimbigner |
233 | | */ |
234 | | int |
235 | | NCZ_downloadjson(NCZMAP* zmap, const char* key, NCjson** jsonp) |
236 | 0 | { |
237 | 0 | int stat = NC_NOERR; |
238 | 0 | size64_t len; |
239 | 0 | char* content = NULL; |
240 | 0 | NCjson* json = NULL; |
241 | |
|
242 | 0 | switch(stat = nczmap_len(zmap, key, &len)) { |
243 | 0 | case NC_NOERR: break; |
244 | 0 | case NC_ENOOBJECT: case NC_EEMPTY: |
245 | 0 | stat = NC_NOERR; |
246 | 0 | goto exit; |
247 | 0 | default: goto done; |
248 | 0 | } |
249 | 0 | if((content = malloc(len+1)) == NULL) |
250 | 0 | {stat = NC_ENOMEM; goto done;} |
251 | 0 | if((stat = nczmap_read(zmap, key, 0, len, (void*)content))) |
252 | 0 | goto done; |
253 | 0 | content[len] = '\0'; |
254 | 0 | if((stat = NCJparse(content,0,&json)) < 0) |
255 | 0 | {stat = NC_ENCZARR; goto done;} |
256 | | |
257 | 0 | exit: |
258 | 0 | if(jsonp) {*jsonp = json; json = NULL;} |
259 | |
|
260 | 0 | done: |
261 | 0 | NCJreclaim(json); |
262 | 0 | nullfree(content); |
263 | 0 | return stat; |
264 | 0 | } |
265 | | |
266 | | /** |
267 | | @internal Upload a modified json tree to a .z... structure. |
268 | | @param zmap - [in] controlling zarr map |
269 | | @param key - [in] .z... object to load |
270 | | @param json - [in] root of the json tree |
271 | | @return NC_NOERR |
272 | | @author Dennis Heimbigner |
273 | | */ |
274 | | int |
275 | | NCZ_uploadjson(NCZMAP* zmap, const char* key, NCjson* json) |
276 | 0 | { |
277 | 0 | int stat = NC_NOERR; |
278 | 0 | char* content = NULL; |
279 | |
|
280 | 0 | ZTRACE(4,"zmap=%p key=%s",zmap,key); |
281 | |
|
282 | | #ifdef DEBUG |
283 | | fprintf(stderr,"uploadjson: %s\n",key); fflush(stderr); |
284 | | #endif |
285 | | /* Unparse the modified json tree */ |
286 | 0 | if((stat = NCJunparse(json,0,&content))) |
287 | 0 | goto done; |
288 | 0 | ZTRACEMORE(4,"\tjson=%s",content); |
289 | | |
290 | 0 | if(getenv("NCS3JSON") != NULL) |
291 | 0 | fprintf(stderr,">>>> uploadjson: %s: %s\n",key,content); |
292 | | |
293 | | /* Write the metadata */ |
294 | 0 | if((stat = nczmap_write(zmap, key, strlen(content), content))) |
295 | 0 | goto done; |
296 | | |
297 | 0 | done: |
298 | 0 | nullfree(content); |
299 | 0 | return ZUNTRACE(stat); |
300 | 0 | } |
301 | | |
302 | | #if 0 |
303 | | /** |
304 | | @internal create object, return empty dict; ok if already exists. |
305 | | @param zmap - [in] map |
306 | | @param key - [in] key of the object |
307 | | @param jsonp - [out] return parsed json |
308 | | @return NC_NOERR |
309 | | @return NC_EINVAL if object exists |
310 | | @author Dennis Heimbigner |
311 | | */ |
312 | | int |
313 | | NCZ_createdict(NCZMAP* zmap, const char* key, NCjson** jsonp) |
314 | | { |
315 | | int stat = NC_NOERR; |
316 | | NCjson* json = NULL; |
317 | | |
318 | | /* See if it already exists */ |
319 | | if((stat = NCZ_downloadjson(zmap,key,&json))) goto done; |
320 | | ifjson == NULL) { |
321 | | if((stat = nczmap_def(zmap,key,NCZ_ISMETA))) goto done; |
322 | | } else { |
323 | | /* Already exists, fail */ |
324 | | stat = NC_EINVAL; |
325 | | goto done; |
326 | | } |
327 | | /* Create the empty dictionary */ |
328 | | if((stat = NCJnew(NCJ_DICT,&json))) |
329 | | goto done; |
330 | | if(jsonp) {*jsonp = json; json = NULL;} |
331 | | done: |
332 | | NCJreclaim(json); |
333 | | return stat; |
334 | | } |
335 | | |
336 | | /** |
337 | | @internal create object, return empty array; ok if already exists. |
338 | | @param zmap - [in] map |
339 | | @param key - [in] key of the object |
340 | | @param jsonp - [out] return parsed json |
341 | | @return NC_NOERR |
342 | | @return NC_EINVAL if object exits |
343 | | @author Dennis Heimbigner |
344 | | */ |
345 | | int |
346 | | NCZ_createarray(NCZMAP* zmap, const char* key, NCjson** jsonp) |
347 | | { |
348 | | int stat = NC_NOERR; |
349 | | NCjson* json = NULL; |
350 | | |
351 | | if((stat = NCZ_downloadjson(zmap,key,&json))) goto done; |
352 | | if(json == NULL) { /* create it */ |
353 | | if((stat = nczmap_def(zmap,key,NCZ_ISMETA))) goto done; |
354 | | /* Create the initial array */ |
355 | | if((stat = NCJnew(NCJ_ARRAY,&json))) goto done; |
356 | | } else { |
357 | | stat = NC_EINVAL; |
358 | | goto done; |
359 | | } |
360 | | if(json->sort != NCJ_ARRAY) {stat = NC_ENCZARR; goto done;} |
361 | | if(jsonp) {*jsonp = json; json = NULL;} |
362 | | done: |
363 | | NCJreclaim(json); |
364 | | return stat; |
365 | | } |
366 | | #endif /*0*/ |
367 | | |
368 | | #if 0 |
369 | | /** |
370 | | @internal Given an nc_type, produce the corresponding |
371 | | default fill value as a string. |
372 | | @param nctype - [in] nc_type |
373 | | @param defaltp - [out] pointer to hold pointer to the value |
374 | | @return NC_NOERR |
375 | | @author Dennis Heimbigner |
376 | | */ |
377 | | |
378 | | int |
379 | | ncz_default_fill_value(nc_type nctype, const char** dfaltp) |
380 | | { |
381 | | if(nctype <= 0 || nctype > NC_MAX_ATOMIC_TYPE) return NC_EINVAL; |
382 | | if(dfaltp) *dfaltp = zfillvalue[nctype]; |
383 | | return NC_NOERR; |
384 | | } |
385 | | #endif |
386 | | |
387 | | /** |
388 | | @internal Given an nc_type, produce the corresponding |
389 | | fill value JSON type |
390 | | @param nctype - [in] nc_type |
391 | | @param sortp - [out] pointer to hold pointer to the JSON type |
392 | | @return NC_NOERR |
393 | | @author Dennis Heimbigner |
394 | | */ |
395 | | |
396 | | int |
397 | | ncz_fill_value_sort(nc_type nctype, int* sortp) |
398 | 0 | { |
399 | 0 | if(nctype <= 0 || nctype > NC_MAX_ATOMIC_TYPE) return NC_EINVAL; |
400 | 0 | if(sortp) *sortp = zjsonsort[nctype]; |
401 | 0 | return NC_NOERR; |
402 | 0 | } |
403 | | |
404 | | /* |
405 | | Given a path to a group, return the list of objects |
406 | | that contain another object with the name of the tag. |
407 | | For example, we can get the immediate list of subgroups |
408 | | by using the tag ".zgroup". |
409 | | Basically we return the set of X where <prefix>/X/<tag> |
410 | | is an object in the map. |
411 | | Note: need to test with "/", "", and with and without trailing "/". |
412 | | */ |
413 | | int |
414 | | NCZ_subobjects(NCZMAP* map, const char* prefix, const char* tag, char dimsep, NClist* objlist) |
415 | 0 | { |
416 | 0 | size_t i; |
417 | 0 | int stat = NC_NOERR; |
418 | 0 | NClist* matches = nclistnew(); |
419 | 0 | NCbytes* path = ncbytesnew(); |
420 | | |
421 | | /* Get the list of names just below prefix */ |
422 | 0 | if((stat = nczmap_search(map,prefix,matches))) goto done; |
423 | 0 | for(i=0;i<nclistlength(matches);i++) { |
424 | 0 | const char* name = nclistget(matches,i); |
425 | 0 | size_t namelen= strlen(name); |
426 | | /* Ignore keys that start with .z or .nc or a potential chunk name */ |
427 | 0 | if(namelen >= 3 && name[0] == '.' && name[1] == 'n' && name[2] == 'c') |
428 | 0 | continue; |
429 | 0 | if(namelen >= 2 && name[0] == '.' && name[1] == 'z') |
430 | 0 | continue; |
431 | 0 | if(NCZ_ischunkname(name,dimsep)) |
432 | 0 | continue; |
433 | | /* Create <prefix>/<name>/<tag> and see if it exists */ |
434 | 0 | ncbytesclear(path); |
435 | 0 | ncbytescat(path,prefix); |
436 | 0 | ncbytescat(path,"/"); |
437 | 0 | ncbytescat(path,name); |
438 | 0 | ncbytescat(path,tag); |
439 | | /* See if this object exists */ |
440 | 0 | if((stat = nczmap_exists(map,ncbytescontents(path))) == NC_NOERR) |
441 | 0 | nclistpush(objlist,name); |
442 | 0 | } |
443 | |
|
444 | 0 | done: |
445 | 0 | nclistfreeall(matches); |
446 | 0 | ncbytesfree(path); |
447 | 0 | return stat; |
448 | 0 | } |
449 | | |
450 | | #if 0 |
451 | | /* Convert a netcdf-4 type integer */ |
452 | | int |
453 | | ncz_nctypedecode(const char* snctype, nc_type* nctypep) |
454 | | { |
455 | | unsigned nctype = 0; |
456 | | if(sscanf(snctype,"%u",&nctype)!=1) return NC_EINVAL; |
457 | | if(nctypep) *nctypep = nctype; |
458 | | return NC_NOERR; |
459 | | } |
460 | | #endif |
461 | | |
462 | | /** |
463 | | @internal Given an nc_type+other, produce the corresponding dtype string. |
464 | | @param nctype - [in] nc_type |
465 | | @param endianness - [in] endianness |
466 | | @param purezarr - [in] 1=>pure zarr, 0 => nczarr |
467 | | @param strlen - [in] max string length |
468 | | @param namep - [out] pointer to hold pointer to the dtype; user frees |
469 | | @return NC_NOERR |
470 | | @return NC_EINVAL |
471 | | @author Dennis Heimbigner |
472 | | */ |
473 | | |
474 | | int |
475 | | ncz_nctype2dtype(nc_type nctype, int endianness, int purezarr, int len, char** dnamep) |
476 | 0 | { |
477 | 0 | char dname[64]; |
478 | 0 | char* format = NULL; |
479 | |
|
480 | 0 | if(nctype <= NC_NAT || nctype > NC_MAX_ATOMIC_TYPE) return NC_EINVAL; |
481 | 0 | if(purezarr) |
482 | 0 | format = znames[nctype].zarr[endianness]; |
483 | 0 | else |
484 | 0 | format = znames[nctype].nczarr[endianness]; |
485 | 0 | snprintf(dname,sizeof(dname),format,len); |
486 | 0 | if(dnamep) *dnamep = strdup(dname); |
487 | 0 | return NC_NOERR; |
488 | 0 | } |
489 | | |
490 | | /* |
491 | | @internal Convert a numcodecs dtype spec to a corresponding nc_type. |
492 | | @param nctype - [in] dtype the dtype to convert |
493 | | @param nctype - [in] typehint help disambiguate char vs string |
494 | | @param purezarr - [in] 1=>pure zarr, 0 => nczarr |
495 | | @param nctypep - [out] hold corresponding type |
496 | | @param endianp - [out] hold corresponding endianness |
497 | | @param typelenp - [out] hold corresponding type size (for fixed length strings) |
498 | | @return NC_NOERR |
499 | | @return NC_EINVAL |
500 | | @author Dennis Heimbigner |
501 | | */ |
502 | | |
503 | | int |
504 | | ncz_dtype2nctype(const char* dtype, nc_type typehint, int purezarr, nc_type* nctypep, int* endianp, int* typelenp) |
505 | 0 | { |
506 | 0 | int stat = NC_NOERR; |
507 | 0 | int typelen = 0; |
508 | 0 | int count; |
509 | 0 | char tchar; |
510 | 0 | nc_type nctype = NC_NAT; |
511 | 0 | int endianness = -1; |
512 | 0 | const char* p; |
513 | 0 | int n; |
514 | |
|
515 | 0 | if(endianp) *endianp = NC_ENDIAN_NATIVE; |
516 | 0 | if(nctypep) *nctypep = NC_NAT; |
517 | |
|
518 | 0 | if(dtype == NULL) goto zerr; |
519 | 0 | p = dtype; |
520 | 0 | switch (*p++) { |
521 | 0 | case '<': endianness = NC_ENDIAN_LITTLE; break; |
522 | 0 | case '>': endianness = NC_ENDIAN_BIG; break; |
523 | 0 | case '|': endianness = NC_ENDIAN_NATIVE; break; |
524 | 0 | default: p--; endianness = NC_ENDIAN_NATIVE; break; |
525 | 0 | } |
526 | 0 | tchar = *p++; /* get the base type */ |
527 | | /* Decode the type length */ |
528 | 0 | count = sscanf(p,"%d%n",&typelen,&n); |
529 | 0 | if(count == 0) goto zerr; |
530 | 0 | p += n; |
531 | | |
532 | | /* Short circuit fixed length strings */ |
533 | 0 | if(tchar == 'S') { |
534 | | /* Fixed length string */ |
535 | 0 | switch (typelen) { |
536 | 0 | case 1: |
537 | 0 | nctype = (endianness == NC_ENDIAN_BIG ? NC_CHAR : NC_STRING); |
538 | 0 | if(purezarr) nctype = NC_STRING; /* Zarr has no NC_CHAR type */ |
539 | 0 | break; |
540 | 0 | default: |
541 | 0 | nctype = NC_STRING; |
542 | 0 | break; |
543 | 0 | } |
544 | | /* String/char have no endianness */ |
545 | 0 | endianness = NC_ENDIAN_NATIVE; |
546 | 0 | } else { |
547 | 0 | switch(typelen) { |
548 | 0 | case 1: |
549 | 0 | switch (tchar) { |
550 | 0 | case 'i': nctype = NC_BYTE; break; |
551 | 0 | case 'u': nctype = NC_UBYTE; break; |
552 | 0 | default: goto zerr; |
553 | 0 | } |
554 | 0 | break; |
555 | 0 | case 2: |
556 | 0 | switch (tchar) { |
557 | 0 | case 'i': nctype = NC_SHORT; break; |
558 | 0 | case 'u': nctype = NC_USHORT; break; |
559 | 0 | default: goto zerr; |
560 | 0 | } |
561 | 0 | break; |
562 | 0 | case 4: |
563 | 0 | switch (tchar) { |
564 | 0 | case 'i': nctype = NC_INT; break; |
565 | 0 | case 'u': nctype = NC_UINT; break; |
566 | 0 | case 'f': nctype = NC_FLOAT; break; |
567 | 0 | default: goto zerr; |
568 | 0 | } |
569 | 0 | break; |
570 | 0 | case 8: |
571 | 0 | switch (tchar) { |
572 | 0 | case 'i': nctype = NC_INT64; break; |
573 | 0 | case 'u': nctype = NC_UINT64; break; |
574 | 0 | case 'f': nctype = NC_DOUBLE; break; |
575 | 0 | default: goto zerr; |
576 | 0 | } |
577 | 0 | break; |
578 | 0 | default: goto zerr; |
579 | 0 | } |
580 | 0 | } |
581 | | |
582 | | #if 0 |
583 | | /* Convert NC_ENDIAN_NATIVE and NC_ENDIAN_NA */ |
584 | | if(endianness == NC_ENDIAN_NATIVE) |
585 | | endianness = (NC_isLittleEndian()?NC_ENDIAN_LITTLE:NC_ENDIAN_BIG); |
586 | | #endif |
587 | | |
588 | 0 | if(nctypep) *nctypep = nctype; |
589 | 0 | if(typelenp) *typelenp = typelen; |
590 | 0 | if(endianp) *endianp = endianness; |
591 | |
|
592 | 0 | done: |
593 | 0 | return stat; |
594 | 0 | zerr: |
595 | 0 | stat = NC_ENCZARR; |
596 | 0 | goto done; |
597 | 0 | } |
598 | | |
599 | | /* Infer the attribute's type based |
600 | | primarily on the first atomic value encountered |
601 | | recursively. |
602 | | */ |
603 | | int |
604 | | NCZ_inferattrtype(const NCjson* value, nc_type typehint, nc_type* typeidp) |
605 | 0 | { |
606 | 0 | int i,stat = NC_NOERR; |
607 | 0 | nc_type typeid; |
608 | 0 | NCjson* j = NULL; |
609 | 0 | unsigned long long u64; |
610 | 0 | long long i64; |
611 | 0 | int negative = 0; |
612 | |
|
613 | 0 | if(NCJsort(value) == NCJ_ARRAY && NCJarraylength(value) == 0) |
614 | 0 | {typeid = NC_NAT; goto done;} /* Empty array is illegal */ |
615 | | |
616 | 0 | if(NCJsort(value) == NCJ_NULL) |
617 | 0 | {typeid = NC_NAT; goto done;} /* NULL is also illegal */ |
618 | | |
619 | 0 | if(NCJsort(value) == NCJ_DICT) /* Complex JSON expr -- a dictionary */ |
620 | 0 | {typeid = NC_NAT; goto done;} |
621 | | |
622 | | /* If an array, make sure all the elements are simple */ |
623 | 0 | if(value->sort == NCJ_ARRAY) { |
624 | 0 | for(i=0;i<NCJarraylength(value);i++) { |
625 | 0 | j=NCJith(value,i); |
626 | 0 | if(!NCJisatomic(j)) |
627 | 0 | {typeid = NC_NAT; goto done;} |
628 | 0 | } |
629 | 0 | } |
630 | | |
631 | | /* Infer from first element */ |
632 | 0 | if(value->sort == NCJ_ARRAY) { |
633 | 0 | j=NCJith(value,0); |
634 | 0 | return NCZ_inferattrtype(j,typehint,typeidp); |
635 | 0 | } |
636 | | |
637 | | /* At this point, value is a primitive JSON Value */ |
638 | | |
639 | 0 | switch (NCJsort(value)) { |
640 | 0 | case NCJ_NULL: |
641 | 0 | typeid = NC_NAT; |
642 | 0 | return NC_NOERR; |
643 | 0 | case NCJ_DICT: |
644 | 0 | typeid = NC_CHAR; |
645 | 0 | goto done; |
646 | 0 | case NCJ_UNDEF: |
647 | 0 | return NC_EINVAL; |
648 | 0 | default: /* atomic */ |
649 | 0 | break; |
650 | 0 | } |
651 | | |
652 | 0 | if(NCJstring(value) != NULL) |
653 | 0 | negative = (NCJstring(value)[0] == '-'); |
654 | 0 | switch (value->sort) { |
655 | 0 | case NCJ_INT: |
656 | 0 | if(negative) { |
657 | 0 | sscanf(NCJstring(value),"%lld",&i64); |
658 | 0 | u64 = (unsigned long long)i64; |
659 | 0 | } else |
660 | 0 | sscanf(NCJstring(value),"%llu",&u64); |
661 | 0 | typeid = NCZ_inferinttype(u64,negative); |
662 | 0 | break; |
663 | 0 | case NCJ_DOUBLE: |
664 | 0 | typeid = NC_DOUBLE; |
665 | 0 | break; |
666 | 0 | case NCJ_BOOLEAN: |
667 | 0 | typeid = NC_UBYTE; |
668 | 0 | break; |
669 | 0 | case NCJ_STRING: /* requires special handling as an array of characters */ |
670 | 0 | typeid = NC_CHAR; |
671 | 0 | break; |
672 | 0 | default: |
673 | 0 | stat = NC_ENCZARR; |
674 | 0 | } |
675 | 0 | done: |
676 | 0 | if(typeidp) *typeidp = typeid; |
677 | 0 | return stat; |
678 | 0 | } |
679 | | |
680 | | /* Infer the int type from the value; |
681 | | minimum type will be int. |
682 | | */ |
683 | | int |
684 | | NCZ_inferinttype(unsigned long long u64, int negative) |
685 | 0 | { |
686 | 0 | long long i64 = (long long)u64; /* keep bit pattern */ |
687 | 0 | if(!negative && u64 >= NC_MAX_INT64) return NC_UINT64; |
688 | 0 | if(i64 < 0) { |
689 | 0 | if(i64 >= NC_MIN_INT) return NC_INT; |
690 | 0 | return NC_INT64; |
691 | 0 | } |
692 | 0 | if(i64 <= NC_MAX_INT) return NC_INT; |
693 | 0 | if(i64 <= NC_MAX_UINT) return NC_UINT; |
694 | 0 | return NC_INT64; |
695 | 0 | } |
696 | | |
697 | | /** |
698 | | @internal Similar to NCZ_grppath, but using group ids. |
699 | | @param gid - [in] group id |
700 | | @param pathp - [out] full path |
701 | | @return NC_NOERR |
702 | | @author Dennis Heimbigner |
703 | | */ |
704 | | int |
705 | | NCZ_grpname_full(int gid, char** pathp) |
706 | 0 | { |
707 | 0 | int stat = NC_NOERR; |
708 | 0 | size_t len; |
709 | 0 | char* path = NULL; |
710 | |
|
711 | 0 | if((stat = nc_inq_grpname_full(gid,&len,NULL))) return stat; |
712 | 0 | if((path=malloc(len+1)) == NULL) return NC_ENOMEM; |
713 | 0 | if((stat = nc_inq_grpname_full(gid,&len,path))) return stat; |
714 | 0 | path[len] = '\0'; /* ensure null terminated */ |
715 | 0 | if(pathp) {*pathp = path; path = NULL;} |
716 | 0 | return stat; |
717 | 0 | } |
718 | | |
719 | | /** |
720 | | @internal Parse a commified string list |
721 | | @param s [in] string to parse |
722 | | @param list - [in/out] storage for the parsed list |
723 | | @return NC_NOERR |
724 | | @author Dennis Heimbigner |
725 | | */ |
726 | | int |
727 | | NCZ_comma_parse(const char* s, NClist* list) |
728 | 0 | { |
729 | 0 | int stat = NC_NOERR; |
730 | 0 | const char* p = NULL; |
731 | 0 | const char* endp = NULL; |
732 | |
|
733 | 0 | if(s == NULL || *s == '\0') goto done; |
734 | | |
735 | | /* Split s at the commas or EOL */ |
736 | 0 | p = s; |
737 | 0 | for(;;) { |
738 | 0 | char* s; |
739 | 0 | ptrdiff_t slen; |
740 | 0 | endp = strchr(p,','); |
741 | 0 | if(endp == NULL) endp = p + strlen(p); |
742 | 0 | slen = (endp - p); |
743 | 0 | if((s = malloc((size_t)slen+1)) == NULL) {stat = NC_ENOMEM; goto done;} |
744 | 0 | memcpy(s,p,(size_t)slen); |
745 | 0 | s[slen] = '\0'; |
746 | 0 | if(nclistmatch(list,s,0)) { |
747 | 0 | nullfree(s); /* duplicate */ |
748 | 0 | } else { |
749 | 0 | nclistpush(list,s); |
750 | 0 | } |
751 | 0 | if(*endp == '\0') break; |
752 | 0 | p = endp+1; |
753 | 0 | } |
754 | | |
755 | 0 | done: |
756 | 0 | return stat; |
757 | 0 | } |
758 | | |
759 | | /**************************************************/ |
760 | | #if 0 |
761 | | /* Endianness support */ |
762 | | /* signature: void swapinline16(void* ip) */ |
763 | | #define swapinline16(ip) \ |
764 | | { \ |
765 | | union {char b[2]; unsigned short i;} u; \ |
766 | | char* src = (char*)(ip); \ |
767 | | u.b[0] = src[1]; \ |
768 | | u.b[1] = src[0]; \ |
769 | | *((unsigned short*)ip) = u.i; \ |
770 | | } |
771 | | |
772 | | /* signature: void swapinline32(void* ip) */ |
773 | | #define swapinline32(ip) \ |
774 | | { \ |
775 | | union {char b[4]; unsigned int i;} u; \ |
776 | | char* src = (char*)(ip); \ |
777 | | u.b[0] = src[3]; \ |
778 | | u.b[1] = src[2]; \ |
779 | | u.b[2] = src[1]; \ |
780 | | u.b[3] = src[0]; \ |
781 | | *((unsigned int*)ip) = u.i; \ |
782 | | } |
783 | | |
784 | | /* signature: void swapinline64(void* ip) */ |
785 | | #define swapinline64(ip) \ |
786 | | { \ |
787 | | union {char b[8]; unsigned long long i;} u; \ |
788 | | char* src = (char*)(ip); \ |
789 | | u.b[0] = src[7]; \ |
790 | | u.b[1] = src[6]; \ |
791 | | u.b[2] = src[5]; \ |
792 | | u.b[3] = src[4]; \ |
793 | | u.b[4] = src[3]; \ |
794 | | u.b[5] = src[2]; \ |
795 | | u.b[6] = src[1]; \ |
796 | | u.b[7] = src[0]; \ |
797 | | *((unsigned long long*)ip) = u.i; \ |
798 | | } |
799 | | #endif /*0*/ |
800 | | |
801 | | int |
802 | | NCZ_swapatomicdata(size_t datalen, void* data, int typesize) |
803 | 0 | { |
804 | 0 | int stat = NC_NOERR; |
805 | 0 | int i; |
806 | |
|
807 | 0 | assert(datalen % typesize == 0); |
808 | |
|
809 | 0 | if(typesize == 1) goto done; |
810 | | |
811 | | /*(typesize > 1)*/ |
812 | 0 | for(i=0;i<datalen;) { |
813 | 0 | char* p = ((char*)data) + i; |
814 | 0 | switch (typesize) { |
815 | 0 | case 2: swapinline16(p); break; |
816 | 0 | case 4: swapinline32(p); break; |
817 | 0 | case 8: swapinline64(p); break; |
818 | 0 | default: break; |
819 | 0 | } |
820 | 0 | i += typesize; |
821 | 0 | } |
822 | 0 | done: |
823 | 0 | return THROW(stat); |
824 | 0 | } |
825 | | |
826 | | char** |
827 | | NCZ_clonestringvec(size_t len, const char** vec) |
828 | 0 | { |
829 | 0 | char** clone = NULL; |
830 | 0 | size_t i; |
831 | 0 | if(vec == NULL) return NULL; |
832 | 0 | if(len == 0) { /* Figure out size as envv vector */ |
833 | 0 | const char** p; |
834 | 0 | for(p=vec;*p;p++) len++; |
835 | 0 | } |
836 | 0 | clone = malloc(sizeof(char*) * (1+len)); |
837 | 0 | if(clone == NULL) return NULL; |
838 | 0 | for(i=0;i<len;i++) { |
839 | 0 | char* s = strdup(vec[i]); |
840 | 0 | if(s == NULL) return NULL; |
841 | 0 | clone[i] = s; |
842 | 0 | } |
843 | 0 | clone[len] = NULL; |
844 | 0 | return clone; |
845 | 0 | } |
846 | | |
847 | | void |
848 | | NCZ_freestringvec(size_t len, char** vec) |
849 | 0 | { |
850 | 0 | size_t i; |
851 | 0 | if(vec == NULL) return; |
852 | 0 | if(len == 0) { /* Figure out size as envv vector */ |
853 | 0 | char** p; |
854 | 0 | for(p=vec;*p;p++) len++; |
855 | 0 | } |
856 | 0 | for(i=0;i<len;i++) { |
857 | 0 | nullfree(vec[i]); |
858 | 0 | } |
859 | 0 | nullfree(vec); |
860 | 0 | } |
861 | | |
862 | | int |
863 | | NCZ_ischunkname(const char* name,char dimsep) |
864 | 0 | { |
865 | 0 | int stat = NC_NOERR; |
866 | 0 | const char* p; |
867 | 0 | if(strchr("0123456789",name[0])== NULL) |
868 | 0 | stat = NC_ENCZARR; |
869 | 0 | else for(p=name;*p;p++) { |
870 | 0 | if(*p != dimsep && strchr("0123456789",*p) == NULL) /* approximate */ |
871 | 0 | {stat = NC_ENCZARR; break;} |
872 | 0 | } |
873 | 0 | return stat; |
874 | 0 | } |
875 | | |
876 | | char* |
877 | | NCZ_chunkpath(struct ChunkKey key) |
878 | 0 | { |
879 | 0 | size_t plen = nulllen(key.varkey)+1+nulllen(key.chunkkey); |
880 | 0 | char* path = (char*)malloc(plen+1); |
881 | | |
882 | 0 | if(path == NULL) return NULL; |
883 | 0 | path[0] = '\0'; |
884 | 0 | strlcat(path,key.varkey,plen+1); |
885 | 0 | strlcat(path,"/",plen+1); |
886 | 0 | strlcat(path,key.chunkkey,plen+1); |
887 | 0 | return path; |
888 | 0 | } |
889 | | |
890 | | int |
891 | | NCZ_reclaim_fill_value(NC_VAR_INFO_T* var) |
892 | 0 | { |
893 | 0 | int stat = NC_NOERR; |
894 | 0 | if(var->fill_value) { |
895 | 0 | int tid = var->type_info->hdr.id; |
896 | 0 | stat = NC_reclaim_data_all(var->container->nc4_info->controller,tid,var->fill_value,1); |
897 | 0 | var->fill_value = NULL; |
898 | 0 | } |
899 | | /* Reclaim any existing fill_chunk */ |
900 | 0 | if(!stat) stat = NCZ_reclaim_fill_chunk(((NCZ_VAR_INFO_T*)var->format_var_info)->cache); |
901 | 0 | return stat; |
902 | 0 | } |
903 | | |
904 | | int |
905 | | NCZ_copy_fill_value(NC_VAR_INFO_T* var, void** dstp) |
906 | 0 | { |
907 | 0 | int stat = NC_NOERR; |
908 | 0 | int tid = var->type_info->hdr.id; |
909 | 0 | void* dst = NULL; |
910 | |
|
911 | 0 | if(var->fill_value) { |
912 | 0 | if((stat = NC_copy_data_all(var->container->nc4_info->controller,tid,var->fill_value,1,&dst))) goto done; |
913 | 0 | } |
914 | 0 | if(dstp) {*dstp = dst; dst = NULL;} |
915 | 0 | done: |
916 | 0 | if(dst) (void)NC_reclaim_data_all(var->container->nc4_info->controller,tid,dst,1); |
917 | 0 | return stat; |
918 | 0 | } |
919 | | |
920 | | |
921 | | /* Get max str len for a variable or grp */ |
922 | | /* Has side effect of setting values in the |
923 | | internal data structures */ |
924 | | int |
925 | | NCZ_get_maxstrlen(NC_OBJ* obj) |
926 | 0 | { |
927 | 0 | int maxstrlen = 0; |
928 | 0 | assert(obj->sort == NCGRP || obj->sort == NCVAR); |
929 | 0 | if(obj->sort == NCGRP) { |
930 | 0 | NC_GRP_INFO_T* grp = (NC_GRP_INFO_T*)obj; |
931 | 0 | NC_FILE_INFO_T* file = grp->nc4_info; |
932 | 0 | NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; |
933 | 0 | if(zfile->default_maxstrlen == 0) |
934 | 0 | zfile->default_maxstrlen = NCZ_MAXSTR_DEFAULT; |
935 | 0 | maxstrlen = zfile->default_maxstrlen; |
936 | 0 | } else { /*(obj->sort == NCVAR)*/ |
937 | 0 | NC_VAR_INFO_T* var = (NC_VAR_INFO_T*)obj; |
938 | 0 | NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)var->format_var_info; |
939 | 0 | if(zvar->maxstrlen == 0) |
940 | 0 | zvar->maxstrlen = NCZ_get_maxstrlen((NC_OBJ*)var->container); |
941 | 0 | maxstrlen = zvar->maxstrlen; |
942 | 0 | } |
943 | 0 | return maxstrlen; |
944 | 0 | } |
945 | | |
946 | | int |
947 | | NCZ_fixed2char(const void* fixed, char** charp, size_t count, int maxstrlen) |
948 | 0 | { |
949 | 0 | size_t i; |
950 | 0 | unsigned char* sp = NULL; |
951 | 0 | const unsigned char* p = fixed; |
952 | 0 | memset((void*)charp,0,sizeof(char*)*count); |
953 | 0 | for(i=0;i<count;i++,p+=maxstrlen) { |
954 | 0 | if(p[0] == '\0') { |
955 | 0 | sp = NULL; |
956 | 0 | } else { |
957 | 0 | if((sp = (unsigned char*)malloc((size_t)maxstrlen+1))==NULL) /* ensure null terminated */ |
958 | 0 | return NC_ENOMEM; |
959 | 0 | memcpy(sp,p,(size_t)maxstrlen); |
960 | 0 | sp[maxstrlen] = '\0'; |
961 | 0 | } |
962 | 0 | charp[i] = (char*)sp; |
963 | 0 | sp = NULL; |
964 | 0 | } |
965 | 0 | return NC_NOERR; |
966 | 0 | } |
967 | | |
968 | | int |
969 | | NCZ_char2fixed(const char** charp, void* fixed, size_t count, int maxstrlen) |
970 | 0 | { |
971 | 0 | size_t i; |
972 | 0 | unsigned char* p = fixed; |
973 | 0 | memset(fixed,0,maxstrlen*count); /* clear target */ |
974 | 0 | for(i=0;i<count;i++,p+=maxstrlen) { |
975 | 0 | size_t len; |
976 | 0 | if(charp[i] != NULL) { |
977 | 0 | len = strlen(charp[i]); |
978 | 0 | if(len > maxstrlen) len = maxstrlen; |
979 | 0 | memcpy(p,charp[i],len); |
980 | 0 | } else { |
981 | 0 | memset(p,'\0',maxstrlen); |
982 | 0 | } |
983 | 0 | } |
984 | 0 | return NC_NOERR; |
985 | 0 | } |
986 | | |
987 | | /* |
988 | | Wrap NC_copy_data, but take string value into account when overwriting |
989 | | */ |
990 | | int |
991 | | NCZ_copy_data(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, const void* memory, size_t count, int reading, void* copy) |
992 | 0 | { |
993 | 0 | int stat = NC_NOERR; |
994 | 0 | NC_TYPE_INFO_T* xtype = var->type_info; |
995 | 0 | if(xtype->hdr.id == NC_STRING && !reading) { |
996 | 0 | size_t i; |
997 | 0 | char** scopy = (char**)copy; |
998 | | /* Reclaim any string fill values in copy */ |
999 | 0 | for(i=0;i<count;i++) { |
1000 | 0 | nullfree(scopy[i]); |
1001 | 0 | scopy[i] = NULL; |
1002 | 0 | } |
1003 | 0 | } |
1004 | 0 | stat = NC_copy_data(file->controller,xtype->hdr.id,memory,count,copy); |
1005 | 0 | return stat; |
1006 | 0 | } |
1007 | | |
1008 | | #if 0 |
1009 | | /* Recursive helper */ |
1010 | | static int |
1011 | | checksimplejson(NCjson* json, int depth) |
1012 | | { |
1013 | | int i; |
1014 | | |
1015 | | switch (NCJsort(json)) { |
1016 | | case NCJ_ARRAY: |
1017 | | if(depth > 0) return 0; /* e.g. [...,[...],...] or [...,{...},...] */ |
1018 | | for(i=0;i < NCJarraylength(json);i++) { |
1019 | | NCjson* j = NCJith(json,i); |
1020 | | if(!checksimplejson(j,depth+1)) return 0; |
1021 | | } |
1022 | | break; |
1023 | | case NCJ_DICT: |
1024 | | case NCJ_NULL: |
1025 | | case NCJ_UNDEF: |
1026 | | return 0; |
1027 | | default: break; |
1028 | | } |
1029 | | return 1; |
1030 | | } |
1031 | | #endif |
1032 | | |
1033 | | /* Return 1 if the attribute will be stored as a complex JSON valued attribute; return 0 otherwise */ |
1034 | | int |
1035 | | NCZ_iscomplexjson(const NCjson* json, nc_type typehint) |
1036 | 0 | { |
1037 | 0 | int i, stat = 0; |
1038 | |
|
1039 | 0 | switch (NCJsort(json)) { |
1040 | 0 | case NCJ_ARRAY: |
1041 | | /* If the typehint is NC_CHAR, then always treat it as complex */ |
1042 | 0 | if(typehint == NC_CHAR) {stat = 1; goto done;} |
1043 | | /* Otherwise see if it is a simple vector of atomic values */ |
1044 | 0 | for(i=0;i < NCJarraylength(json);i++) { |
1045 | 0 | NCjson* j = NCJith(json,i); |
1046 | 0 | if(!NCJisatomic(j)) {stat = 1; goto done;} |
1047 | 0 | } |
1048 | 0 | break; |
1049 | 0 | case NCJ_DICT: |
1050 | 0 | case NCJ_NULL: |
1051 | 0 | case NCJ_UNDEF: |
1052 | 0 | stat = 1; goto done; |
1053 | 0 | default: break; |
1054 | 0 | } |
1055 | 0 | done: |
1056 | 0 | return stat; |
1057 | 0 | } |