/src/netcdf-c/libdispatch/dinfermodel.c
Line | Count | Source (jump to first uncovered line) |
1 | | /** |
2 | | * @file |
3 | | * |
4 | | * Infer as much as possible from the omode + path. |
5 | | * Rewrite the path to a canonical form. |
6 | | * |
7 | | * Copyright 2018 University Corporation for Atmospheric |
8 | | * Research/Unidata. See COPYRIGHT file for more info. |
9 | | */ |
10 | | |
11 | | #include "config.h" |
12 | | #include <stdlib.h> |
13 | | #ifdef HAVE_UNISTD_H |
14 | | #include <unistd.h> |
15 | | #endif |
16 | | #ifdef HAVE_SYS_TYPES_H |
17 | | #include <sys/types.h> |
18 | | #endif |
19 | | |
20 | | #include "ncdispatch.h" |
21 | | #include "ncpathmgr.h" |
22 | | #include "netcdf_mem.h" |
23 | | #include "fbits.h" |
24 | | #include "ncbytes.h" |
25 | | #include "nclist.h" |
26 | | #include "nclog.h" |
27 | | #include "ncrc.h" |
28 | | #ifdef ENABLE_BYTERANGE |
29 | | #include "nchttp.h" |
30 | | #ifdef ENABLE_S3_SDK |
31 | | #include "ncs3sdk.h" |
32 | | #endif |
33 | | #endif |
34 | | |
35 | | #ifndef nulldup |
36 | | #define nulldup(x) ((x)?strdup(x):(x)) |
37 | | #endif |
38 | | |
39 | | #undef DEBUG |
40 | | |
41 | | /* If Defined, then use only stdio for all magic number io; |
42 | | otherwise use stdio or mpio as required. |
43 | | */ |
44 | | #undef USE_STDIO |
45 | | |
46 | | /** |
47 | | Sort info for open/read/close of |
48 | | file when searching for magic numbers |
49 | | */ |
50 | | struct MagicFile { |
51 | | const char* path; |
52 | | struct NCURI* uri; |
53 | | int omode; |
54 | | NCmodel* model; |
55 | | long long filelen; |
56 | | int use_parallel; |
57 | | int iss3; |
58 | | void* parameters; /* !NULL if inmemory && !diskless */ |
59 | | FILE* fp; |
60 | | #ifdef USE_PARALLEL |
61 | | MPI_File fh; |
62 | | #endif |
63 | | #ifdef ENABLE_BYTERANGE |
64 | | char* curlurl; /* url to use with CURLOPT_SET_URL */ |
65 | | NC_HTTP_STATE* state; |
66 | | #ifdef ENABLE_S3_SDK |
67 | | NCS3INFO s3; |
68 | | void* s3client; |
69 | | char* errmsg; |
70 | | #endif |
71 | | #endif |
72 | | }; |
73 | | |
74 | | /** @internal Magic number for HDF5 files. To be consistent with |
75 | | * H5Fis_hdf5, use the complete HDF5 magic number */ |
76 | | static char HDF5_SIGNATURE[MAGIC_NUMBER_LEN] = "\211HDF\r\n\032\n"; |
77 | | |
78 | 63 | #define modelcomplete(model) ((model)->impl != 0) |
79 | | |
80 | | #ifdef DEBUG |
81 | | static void dbgflush(void) |
82 | | { |
83 | | fflush(stdout); |
84 | | fflush(stderr); |
85 | | } |
86 | | |
87 | | static void |
88 | | fail(int err) |
89 | | { |
90 | | return; |
91 | | } |
92 | | |
93 | | static int |
94 | | check(int err) |
95 | | { |
96 | | if(err != NC_NOERR) |
97 | | fail(err); |
98 | | return err; |
99 | | } |
100 | | #else |
101 | 346 | #define check(err) (err) |
102 | | #endif |
103 | | |
104 | | /* |
105 | | Define a table of "mode=" string values |
106 | | from which the implementation can be inferred. |
107 | | Note that only cases that can currently |
108 | | take URLs are included. |
109 | | */ |
110 | | static struct FORMATMODES { |
111 | | const char* tag; |
112 | | const int impl; /* NC_FORMATX_XXX value */ |
113 | | const int format; /* NC_FORMAT_XXX value */ |
114 | | } formatmodes[] = { |
115 | | {"dap2",NC_FORMATX_DAP2,NC_FORMAT_CLASSIC}, |
116 | | {"dap4",NC_FORMATX_DAP4,NC_FORMAT_NETCDF4}, |
117 | | {"netcdf-3",NC_FORMATX_NC3,0}, /* Might be e.g. cdf5 */ |
118 | | {"classic",NC_FORMATX_NC3,0}, /* ditto */ |
119 | | {"netcdf-4",NC_FORMATX_NC4,NC_FORMAT_NETCDF4}, |
120 | | {"enhanced",NC_FORMATX_NC4,NC_FORMAT_NETCDF4}, |
121 | | {"udf0",NC_FORMATX_UDF0,NC_FORMAT_NETCDF4}, |
122 | | {"udf1",NC_FORMATX_UDF1,NC_FORMAT_NETCDF4}, |
123 | | {"nczarr",NC_FORMATX_NCZARR,NC_FORMAT_NETCDF4}, |
124 | | {"zarr",NC_FORMATX_NCZARR,NC_FORMAT_NETCDF4}, |
125 | | {"bytes",NC_FORMATX_NC4,NC_FORMAT_NETCDF4}, /* temporary until 3 vs 4 is determined */ |
126 | | {NULL,0}, |
127 | | }; |
128 | | |
129 | | /* Replace top-level name with defkey=defvalue */ |
130 | | static const struct MACRODEF { |
131 | | char* name; |
132 | | char* defkey; |
133 | | char* defvalues[4]; |
134 | | } macrodefs[] = { |
135 | | {"zarr","mode",{"nczarr","zarr",NULL}}, |
136 | | {"dap2","mode",{"dap2",NULL}}, |
137 | | {"dap4","mode",{"dap4",NULL}}, |
138 | | {"s3","mode",{"s3","nczarr",NULL}}, |
139 | | {"bytes","mode",{"bytes",NULL}}, |
140 | | {"xarray","mode",{"zarr", NULL}}, |
141 | | {"noxarray","mode",{"nczarr", "noxarray", NULL}}, |
142 | | {"zarr","mode",{"nczarr","zarr", NULL}}, |
143 | | {NULL,NULL,{NULL}} |
144 | | }; |
145 | | |
146 | | /* Mode inferences: if mode contains key, then add the inference and infer again */ |
147 | | static const struct MODEINFER { |
148 | | char* key; |
149 | | char* inference; |
150 | | } modeinferences[] = { |
151 | | {"zarr","nczarr"}, |
152 | | {"xarray","zarr"}, |
153 | | {"noxarray","nczarr"}, |
154 | | {NULL,NULL} |
155 | | }; |
156 | | |
157 | | /* Mode negations: if mode contains key, then remove all occurrences of the inference and repeat */ |
158 | | static const struct MODEINFER modenegations[] = { |
159 | | {"bytes","nczarr"}, /* bytes negates (nc)zarr */ |
160 | | {"bytes","zarr"}, |
161 | | {"noxarray","xarray"}, |
162 | | {NULL,NULL} |
163 | | }; |
164 | | |
165 | | /* Map FORMATX to readability to get magic number */ |
166 | | static struct Readable { |
167 | | int impl; |
168 | | int readable; |
169 | | } readable[] = { |
170 | | {NC_FORMATX_NC3,1}, |
171 | | {NC_FORMATX_NC_HDF5,1}, |
172 | | {NC_FORMATX_NC_HDF4,1}, |
173 | | {NC_FORMATX_PNETCDF,1}, |
174 | | {NC_FORMATX_DAP2,0}, |
175 | | {NC_FORMATX_DAP4,0}, |
176 | | {NC_FORMATX_UDF0,0}, |
177 | | {NC_FORMATX_UDF1,0}, |
178 | | {NC_FORMATX_NCZARR,0}, /* eventually make readable */ |
179 | | {0,0}, |
180 | | }; |
181 | | |
182 | | /* Define the known URL protocols and their interpretation */ |
183 | | static struct NCPROTOCOLLIST { |
184 | | const char* protocol; |
185 | | const char* substitute; |
186 | | const char* fragments; /* arbitrary fragment arguments */ |
187 | | } ncprotolist[] = { |
188 | | {"http",NULL,NULL}, |
189 | | {"https",NULL,NULL}, |
190 | | {"file",NULL,NULL}, |
191 | | {"dods","http","mode=dap2"}, |
192 | | {"dap4","http","mode=dap4"}, |
193 | | {"s3","s3","mode=s3"}, |
194 | | {NULL,NULL,NULL} /* Terminate search */ |
195 | | }; |
196 | | |
197 | | /* Forward */ |
198 | | static int NC_omodeinfer(int useparallel, int omode, NCmodel*); |
199 | | static int check_file_type(const char *path, int omode, int use_parallel, void *parameters, NCmodel* model, NCURI* uri); |
200 | | static int processuri(const char* path, NCURI** urip, NClist* fraglist); |
201 | | static int processmacros(NClist** fraglistp); |
202 | | static char* envvlist2string(NClist* pairs, const char*); |
203 | | static void set_default_mode(int* cmodep); |
204 | | static int parseonchar(const char* s, int ch, NClist* segments); |
205 | | |
206 | | static int openmagic(struct MagicFile* file); |
207 | | static int readmagic(struct MagicFile* file, long pos, char* magic); |
208 | | static int closemagic(struct MagicFile* file); |
209 | | static int NC_interpret_magic_number(char* magic, NCmodel* model); |
210 | | #ifdef DEBUG |
211 | | static void printmagic(const char* tag, char* magic,struct MagicFile*); |
212 | | static void printlist(NClist* list, const char* tag); |
213 | | #endif |
214 | | static int isreadable(NCURI*,NCmodel*); |
215 | | static char* list2string(NClist*); |
216 | | static int parsepair(const char* pair, char** keyp, char** valuep); |
217 | | static NClist* parsemode(const char* modeval); |
218 | | static const char* getmodekey(const NClist* envv); |
219 | | static int replacemode(NClist* envv, const char* newval); |
220 | | static int inferone(const char* mode, NClist* newmodes); |
221 | | static int negateone(const char* mode, NClist* modes); |
222 | | |
223 | | /* |
224 | | If the path looks like a URL, then parse it, reformat it. |
225 | | */ |
226 | | static int |
227 | | processuri(const char* path, NCURI** urip, NClist* fraglenv) |
228 | 34 | { |
229 | 34 | int stat = NC_NOERR; |
230 | 34 | int found = 0; |
231 | 34 | NClist* tmp = NULL; |
232 | 34 | struct NCPROTOCOLLIST* protolist; |
233 | 34 | NCURI* uri = NULL; |
234 | 34 | size_t pathlen = strlen(path); |
235 | 34 | char* str = NULL; |
236 | 34 | const char** ufrags; |
237 | 34 | const char** p; |
238 | | |
239 | 34 | if(path == NULL || pathlen == 0) {stat = NC_EURL; goto done;} |
240 | | |
241 | | /* Defaults */ |
242 | 34 | if(urip) *urip = NULL; |
243 | | |
244 | 34 | ncuriparse(path,&uri); |
245 | 34 | if(uri == NULL) goto done; /* not url */ |
246 | | |
247 | | /* Look up the protocol */ |
248 | 0 | for(found=0,protolist=ncprotolist;protolist->protocol;protolist++) { |
249 | 0 | if(strcmp(uri->protocol,protolist->protocol) == 0) { |
250 | 0 | found = 1; |
251 | 0 | break; |
252 | 0 | } |
253 | 0 | } |
254 | 0 | if(!found) |
255 | 0 | {stat = NC_EINVAL; goto done;} /* unrecognized URL form */ |
256 | | |
257 | | /* process the corresponding fragments for that protocol */ |
258 | 0 | if(protolist->fragments != NULL) { |
259 | 0 | int i; |
260 | 0 | tmp = nclistnew(); |
261 | 0 | if((stat = parseonchar(protolist->fragments,'&',tmp))) goto done; |
262 | 0 | for(i=0;i<nclistlength(tmp);i++) { |
263 | 0 | char* key=NULL; |
264 | 0 | char* value=NULL; |
265 | 0 | if((stat = parsepair(nclistget(tmp,i),&key,&value))) goto done; |
266 | 0 | if(value == NULL) value = strdup(""); |
267 | 0 | nclistpush(fraglenv,key); |
268 | 0 | nclistpush(fraglenv,value); |
269 | 0 | } |
270 | 0 | nclistfreeall(tmp); tmp = NULL; |
271 | 0 | } |
272 | | |
273 | | /* Substitute the protocol in any case */ |
274 | 0 | if(protolist->substitute) ncurisetprotocol(uri,protolist->substitute); |
275 | | |
276 | | /* capture the fragments of the url */ |
277 | 0 | ufrags = ncurifragmentparams(uri); |
278 | 0 | if(ufrags != NULL) { |
279 | 0 | for(p=ufrags;*p;p+=2) { |
280 | 0 | const char* key = p[0]; |
281 | 0 | const char* value = p[1]; |
282 | 0 | nclistpush(fraglenv,nulldup(key)); |
283 | 0 | value = (value==NULL?"":value); |
284 | 0 | nclistpush(fraglenv,strdup(value)); |
285 | 0 | } |
286 | 0 | } |
287 | 0 | if(urip) { |
288 | 0 | *urip = uri; |
289 | 0 | uri = NULL; |
290 | 0 | } |
291 | |
|
292 | 34 | done: |
293 | 34 | nclistfreeall(tmp); |
294 | 34 | nullfree(str); |
295 | 34 | if(uri != NULL) ncurifree(uri); |
296 | 34 | return check(stat); |
297 | 0 | } |
298 | | |
299 | | /* Split a key=value pair */ |
300 | | static int |
301 | | parsepair(const char* pair, char** keyp, char** valuep) |
302 | 0 | { |
303 | 0 | const char* p; |
304 | 0 | char* key = NULL; |
305 | 0 | char* value = NULL; |
306 | |
|
307 | 0 | if(pair == NULL) |
308 | 0 | return NC_EINVAL; /* empty pair */ |
309 | 0 | if(pair[0] == '\0' || pair[0] == '=') |
310 | 0 | return NC_EINVAL; /* no key */ |
311 | 0 | p = strchr(pair,'='); |
312 | 0 | if(p == NULL) { |
313 | 0 | value = NULL; |
314 | 0 | key = strdup(pair); |
315 | 0 | } else { |
316 | 0 | ptrdiff_t len = (p-pair); |
317 | 0 | if((key = malloc(len+1))==NULL) return NC_ENOMEM; |
318 | 0 | memcpy(key,pair,len); |
319 | 0 | key[len] = '\0'; |
320 | 0 | if(p[1] == '\0') |
321 | 0 | value = NULL; |
322 | 0 | else |
323 | 0 | value = strdup(p+1); |
324 | 0 | } |
325 | 0 | if(keyp) {*keyp = key; key = NULL;}; |
326 | 0 | if(valuep) {*valuep = value; value = NULL;}; |
327 | 0 | nullfree(key); |
328 | 0 | nullfree(value); |
329 | 0 | return NC_NOERR; |
330 | 0 | } |
331 | | |
332 | | #if 0 |
333 | | static int |
334 | | parseurlmode(const char* modestr, NClist* list) |
335 | | { |
336 | | int stat = NC_NOERR; |
337 | | const char* p = NULL; |
338 | | const char* endp = NULL; |
339 | | |
340 | | if(modestr == NULL || *modestr == '\0') goto done; |
341 | | |
342 | | /* Split modestr at the commas or EOL */ |
343 | | p = modestr; |
344 | | for(;;) { |
345 | | char* s; |
346 | | ptrdiff_t slen; |
347 | | endp = strchr(p,','); |
348 | | if(endp == NULL) endp = p + strlen(p); |
349 | | slen = (endp - p); |
350 | | if((s = malloc(slen+1)) == NULL) {stat = NC_ENOMEM; goto done;} |
351 | | memcpy(s,p,slen); |
352 | | s[slen] = '\0'; |
353 | | nclistpush(list,s); |
354 | | if(*endp == '\0') break; |
355 | | p = endp+1; |
356 | | } |
357 | | |
358 | | done: |
359 | | return check(stat); |
360 | | } |
361 | | #endif |
362 | | |
363 | | /* Split a string at a given char */ |
364 | | static int |
365 | | parseonchar(const char* s, int ch, NClist* segments) |
366 | 0 | { |
367 | 0 | int stat = NC_NOERR; |
368 | 0 | const char* p = NULL; |
369 | 0 | const char* endp = NULL; |
370 | |
|
371 | 0 | if(s == NULL || *s == '\0') goto done; |
372 | | |
373 | 0 | p = s; |
374 | 0 | for(;;) { |
375 | 0 | char* q; |
376 | 0 | ptrdiff_t slen; |
377 | 0 | endp = strchr(p,ch); |
378 | 0 | if(endp == NULL) endp = p + strlen(p); |
379 | 0 | slen = (endp - p); |
380 | 0 | if((q = malloc(slen+1)) == NULL) {stat = NC_ENOMEM; goto done;} |
381 | 0 | memcpy(q,p,slen); |
382 | 0 | q[slen] = '\0'; |
383 | 0 | nclistpush(segments,q); |
384 | 0 | if(*endp == '\0') break; |
385 | 0 | p = endp+1; |
386 | 0 | } |
387 | | |
388 | 0 | done: |
389 | 0 | return check(stat); |
390 | 0 | } |
391 | | |
392 | | /* Convert a key,value envv pairlist into a delimited string*/ |
393 | | static char* |
394 | | envvlist2string(NClist* envv, const char* delim) |
395 | 0 | { |
396 | 0 | int i; |
397 | 0 | NCbytes* buf = NULL; |
398 | 0 | char* result = NULL; |
399 | |
|
400 | 0 | if(envv == NULL || nclistlength(envv) == 0) return NULL; |
401 | 0 | buf = ncbytesnew(); |
402 | 0 | for(i=0;i<nclistlength(envv);i+=2) { |
403 | 0 | const char* key = nclistget(envv,i); |
404 | 0 | const char* val = nclistget(envv,i+1); |
405 | 0 | if(key == NULL || strlen(key) == 0) continue; |
406 | 0 | assert(val != NULL); |
407 | 0 | if(i > 0) ncbytescat(buf,"&"); |
408 | 0 | ncbytescat(buf,key); |
409 | 0 | if(val != NULL && val[0] != '\0') { |
410 | 0 | ncbytescat(buf,"="); |
411 | 0 | ncbytescat(buf,val); |
412 | 0 | } |
413 | 0 | } |
414 | 0 | result = ncbytesextract(buf); |
415 | 0 | ncbytesfree(buf); |
416 | 0 | return result; |
417 | 0 | } |
418 | | |
419 | | /* Convert a list into a comma'd string */ |
420 | | static char* |
421 | | list2string(NClist* list) |
422 | 0 | { |
423 | 0 | int i; |
424 | 0 | NCbytes* buf = NULL; |
425 | 0 | char* result = NULL; |
426 | |
|
427 | 0 | if(list == NULL || nclistlength(list)==0) return strdup(""); |
428 | 0 | buf = ncbytesnew(); |
429 | 0 | for(i=0;i<nclistlength(list);i++) { |
430 | 0 | const char* m = nclistget(list,i); |
431 | 0 | if(m == NULL || strlen(m) == 0) continue; |
432 | 0 | if(i > 0) ncbytescat(buf,","); |
433 | 0 | ncbytescat(buf,m); |
434 | 0 | } |
435 | 0 | result = ncbytesextract(buf); |
436 | 0 | ncbytesfree(buf); |
437 | 0 | if(result == NULL) result = strdup(""); |
438 | 0 | return result; |
439 | 0 | } |
440 | | |
441 | | /* Given a mode= argument, fill in the impl */ |
442 | | static int |
443 | | processmodearg(const char* arg, NCmodel* model) |
444 | 0 | { |
445 | 0 | int stat = NC_NOERR; |
446 | 0 | struct FORMATMODES* format = formatmodes; |
447 | 0 | for(;format->tag;format++) { |
448 | 0 | if(strcmp(format->tag,arg)==0) { |
449 | 0 | model->impl = format->impl; |
450 | 0 | if(format->format != 0) model->format = format->format; |
451 | 0 | } |
452 | 0 | } |
453 | 0 | return check(stat); |
454 | 0 | } |
455 | | |
456 | | /* Given an envv fragment list, do macro replacement */ |
457 | | static int |
458 | | processmacros(NClist** fraglenvp) |
459 | 0 | { |
460 | 0 | int stat = NC_NOERR; |
461 | 0 | const struct MACRODEF* macros = NULL; |
462 | 0 | NClist* fraglenv = NULL; |
463 | 0 | NClist* expanded = NULL; |
464 | |
|
465 | 0 | if(fraglenvp == NULL || nclistlength(*fraglenvp) == 0) goto done; |
466 | 0 | fraglenv = *fraglenvp; |
467 | 0 | expanded = nclistnew(); |
468 | 0 | while(nclistlength(fraglenv) > 0) { |
469 | 0 | int found = 0; |
470 | 0 | char* key = NULL; |
471 | 0 | char* value = NULL; |
472 | 0 | key = nclistremove(fraglenv,0); /* remove from changing front */ |
473 | 0 | value = nclistremove(fraglenv,0); /* remove from changing front */ |
474 | 0 | if(strlen(value) == 0) { /* must be a singleton */ |
475 | 0 | for(macros=macrodefs;macros->name;macros++) { |
476 | 0 | if(strcmp(macros->name,key)==0) { |
477 | 0 | char* const * p; |
478 | 0 | nclistpush(expanded,strdup(macros->defkey)); |
479 | 0 | for(p=macros->defvalues;*p;p++) |
480 | 0 | nclistpush(expanded,strdup(*p)); |
481 | 0 | found = 1; |
482 | 0 | break; |
483 | 0 | } |
484 | 0 | } |
485 | 0 | } |
486 | 0 | if(!found) {/* pass thru */ |
487 | 0 | nclistpush(expanded,strdup(key)); |
488 | 0 | nclistpush(expanded,strdup(value)); |
489 | 0 | } |
490 | 0 | nullfree(key); |
491 | 0 | nullfree(value); |
492 | 0 | } |
493 | 0 | *fraglenvp = expanded; expanded = NULL; |
494 | |
|
495 | 0 | done: |
496 | 0 | nclistfreeall(expanded); |
497 | 0 | nclistfreeall(fraglenv); |
498 | 0 | return check(stat); |
499 | 0 | } |
500 | | |
501 | | /* Process mode flag inferences */ |
502 | | static int |
503 | | processinferences(NClist* fraglenv) |
504 | 0 | { |
505 | 0 | int stat = NC_NOERR; |
506 | 0 | const char* modeval = NULL; |
507 | 0 | NClist* modes = NULL; |
508 | 0 | NClist* newmodes = nclistnew(); |
509 | 0 | int i,inferred = 0; |
510 | 0 | char* newmodeval = NULL; |
511 | |
|
512 | 0 | if(fraglenv == NULL || nclistlength(fraglenv) == 0) goto done; |
513 | | |
514 | | /* Get "mode" entry */ |
515 | 0 | if((modeval = getmodekey(fraglenv))==NULL) goto done; |
516 | | |
517 | | /* Get the mode as list */ |
518 | 0 | modes = parsemode(modeval); |
519 | | |
520 | | /* Repeatedly walk the mode list until no more new positive inferences */ |
521 | 0 | do { |
522 | 0 | for(i=0;i<nclistlength(modes);i++) { |
523 | 0 | const char* mode = nclistget(modes,i); |
524 | 0 | inferred = inferone(mode,newmodes); |
525 | 0 | nclistpush(newmodes,strdup(mode)); /* keep key */ |
526 | 0 | if(!inferred) nclistpush(newmodes,strdup(mode)); |
527 | 0 | } |
528 | 0 | } while(inferred); |
529 | | |
530 | | /* Remove negative inferences */ |
531 | 0 | for(i=0;i<nclistlength(modes);i++) { |
532 | 0 | const char* mode = nclistget(modes,i); |
533 | 0 | inferred = negateone(mode,newmodes); |
534 | 0 | } |
535 | | |
536 | | /* Store new mode value */ |
537 | 0 | if((newmodeval = list2string(newmodes))== NULL) |
538 | 0 | {stat = NC_ENOMEM; goto done;} |
539 | 0 | if((stat=replacemode(fraglenv,newmodeval))) goto done; |
540 | 0 | modeval = NULL; |
541 | |
|
542 | 0 | done: |
543 | 0 | nullfree(newmodeval); |
544 | 0 | nclistfreeall(modes); |
545 | 0 | nclistfreeall(newmodes); |
546 | 0 | return check(stat); |
547 | 0 | } |
548 | | |
549 | | static int |
550 | | negateone(const char* mode, NClist* newmodes) |
551 | 0 | { |
552 | 0 | const struct MODEINFER* tests = modenegations; |
553 | 0 | int changed = 0; |
554 | 0 | for(;tests->key;tests++) { |
555 | 0 | int i; |
556 | 0 | if(strcasecmp(tests->key,mode)==0) { |
557 | | /* Find and remove all instances of the inference value */ |
558 | 0 | for(i=nclistlength(newmodes)-1;i>=0;i--) { |
559 | 0 | char* candidate = nclistget(newmodes,i); |
560 | 0 | if(strcasecmp(candidate,tests->inference)==0) { |
561 | 0 | nclistremove(newmodes,i); |
562 | 0 | nullfree(candidate); |
563 | 0 | changed = 1; |
564 | 0 | } |
565 | 0 | } |
566 | 0 | } |
567 | 0 | } |
568 | 0 | return changed; |
569 | 0 | } |
570 | | |
571 | | static int |
572 | | inferone(const char* mode, NClist* newmodes) |
573 | 0 | { |
574 | 0 | const struct MODEINFER* tests = modeinferences; |
575 | 0 | int changed = 0; |
576 | 0 | for(;tests->key;tests++) { |
577 | 0 | if(strcasecmp(tests->key,mode)==0) { |
578 | | /* Append the inferred mode; dups removed later */ |
579 | 0 | nclistpush(newmodes,strdup(tests->inference)); |
580 | 0 | changed = 1; |
581 | 0 | } |
582 | 0 | } |
583 | 0 | return changed; |
584 | 0 | } |
585 | | |
586 | | static int |
587 | | mergekey(NClist** valuesp) |
588 | 0 | { |
589 | 0 | int i,j; |
590 | 0 | int stat = NC_NOERR; |
591 | 0 | NClist* values = *valuesp; |
592 | 0 | NClist* allvalues = nclistnew(); |
593 | 0 | NClist* newvalues = nclistnew(); |
594 | 0 | char* value = NULL; |
595 | |
|
596 | 0 | for(i=0;i<nclistlength(values);i++) { |
597 | 0 | char* val1 = nclistget(values,i); |
598 | | /* split on commas and put pieces into allvalues */ |
599 | 0 | if((stat=parseonchar(val1,',',allvalues))) goto done; |
600 | 0 | } |
601 | | /* Remove duplicates and "" */ |
602 | 0 | while(nclistlength(allvalues) > 0) { |
603 | 0 | value = nclistremove(allvalues,0); |
604 | 0 | if(strlen(value) == 0) { |
605 | 0 | nullfree(value); value = NULL; |
606 | 0 | } else { |
607 | 0 | for(j=0;j<nclistlength(newvalues);j++) { |
608 | 0 | char* candidate = nclistget(newvalues,j); |
609 | 0 | if(strcasecmp(candidate,value)==0) |
610 | 0 | {nullfree(value); value = NULL; break;} |
611 | 0 | } |
612 | 0 | } |
613 | 0 | if(value != NULL) {nclistpush(newvalues,value); value = NULL;} |
614 | 0 | } |
615 | | /* Make sure to have at least 1 value */ |
616 | 0 | if(nclistlength(newvalues)==0) nclistpush(newvalues,strdup("")); |
617 | 0 | *valuesp = values; values = NULL; |
618 | |
|
619 | 0 | done: |
620 | 0 | nclistfree(allvalues); |
621 | 0 | nclistfreeall(values); |
622 | 0 | nclistfreeall(newvalues); |
623 | 0 | return check(stat); |
624 | 0 | } |
625 | | |
626 | | static int |
627 | | lcontains(NClist* l, const char* key0) |
628 | 0 | { |
629 | 0 | int i; |
630 | 0 | for(i=0;i<nclistlength(l);i++) { |
631 | 0 | const char* key1 = nclistget(l,i); |
632 | 0 | if(strcasecmp(key0,key1)==0) return 1; |
633 | 0 | } |
634 | 0 | return 0; |
635 | 0 | } |
636 | | |
637 | | /* Warning values should not use nclistfreeall */ |
638 | | static void |
639 | | collectvaluesbykey(NClist* fraglenv, const char* key, NClist* values) |
640 | 0 | { |
641 | 0 | int i; |
642 | | /* collect all the values with the same key (including this one) */ |
643 | 0 | for(i=0;i<nclistlength(fraglenv);i+=2) { |
644 | 0 | const char* key2 = nclistget(fraglenv,i); |
645 | 0 | if(strcasecmp(key,key2)==0) { |
646 | 0 | const char* value2 = nclistget(fraglenv,i+1); |
647 | 0 | nclistpush(values,value2); value2 = NULL; |
648 | 0 | } |
649 | 0 | } |
650 | 0 | } |
651 | | |
652 | | /* Warning allkeys should not use nclistfreeall */ |
653 | | static void |
654 | | collectallkeys(NClist* fraglenv, NClist* allkeys) |
655 | 0 | { |
656 | 0 | int i; |
657 | | /* collect all the distinct keys */ |
658 | 0 | for(i=0;i<nclistlength(fraglenv);i+=2) { |
659 | 0 | char* key = nclistget(fraglenv,i); |
660 | 0 | if(!lcontains(allkeys,key)) { |
661 | 0 | nclistpush(allkeys,key); |
662 | 0 | } |
663 | 0 | } |
664 | 0 | } |
665 | | |
666 | | /* Given a fragment envv list, coalesce duplicate keys and remove duplicate values*/ |
667 | | static int |
668 | | cleanfragments(NClist** fraglenvp) |
669 | 0 | { |
670 | 0 | int i,stat = NC_NOERR; |
671 | 0 | NClist* fraglenv = NULL; |
672 | 0 | NClist* tmp = NULL; |
673 | 0 | NClist* allkeys = NULL; |
674 | 0 | NClist* newlist = NULL; |
675 | 0 | NCbytes* buf = NULL; |
676 | 0 | char* key = NULL; |
677 | 0 | char* value = NULL; |
678 | |
|
679 | 0 | if(fraglenvp == NULL || nclistlength(*fraglenvp) == 0) return NC_NOERR; |
680 | 0 | fraglenv = *fraglenvp; /* take control of this list */ |
681 | 0 | *fraglenvp = NULL; |
682 | 0 | newlist = nclistnew(); |
683 | 0 | buf = ncbytesnew(); |
684 | 0 | allkeys = nclistnew(); |
685 | 0 | tmp = nclistnew(); |
686 | | |
687 | | /* collect all unique keys */ |
688 | 0 | collectallkeys(fraglenv,allkeys); |
689 | | /* Collect all values for same key across all fragments */ |
690 | 0 | for(i=0;i<nclistlength(allkeys);i++) { |
691 | 0 | key = nclistget(allkeys,i); |
692 | 0 | collectvaluesbykey(fraglenv,key,tmp); |
693 | | /* merge the key values, remove duplicate */ |
694 | 0 | if((stat=mergekey(&tmp))) goto done; |
695 | | /* Construct key,value pair and insert into newlist */ |
696 | 0 | key = strdup(key); |
697 | 0 | nclistpush(newlist,key); |
698 | 0 | value = list2string(tmp); |
699 | 0 | nclistpush(newlist,value); |
700 | 0 | nclistclear(tmp); |
701 | 0 | } |
702 | 0 | *fraglenvp = newlist; newlist = NULL; |
703 | 0 | done: |
704 | 0 | nclistfree(allkeys); |
705 | 0 | nclistfree(tmp); |
706 | 0 | ncbytesfree(buf); |
707 | 0 | nclistfreeall(fraglenv); |
708 | 0 | nclistfreeall(newlist); |
709 | 0 | return check(stat); |
710 | 0 | } |
711 | | |
712 | | /* process non-mode fragment keys in case they hold significance; currently not */ |
713 | | static int |
714 | | processfragmentkeys(const char* key, const char* value, NCmodel* model) |
715 | 0 | { |
716 | 0 | return NC_NOERR; |
717 | 0 | } |
718 | | |
719 | | /* |
720 | | Infer from the mode + useparallel |
721 | | only call if iscreate or file is not easily readable. |
722 | | */ |
723 | | static int |
724 | | NC_omodeinfer(int useparallel, int cmode, NCmodel* model) |
725 | 34 | { |
726 | 34 | int stat = NC_NOERR; |
727 | | |
728 | | /* If no format flags are set, then use default */ |
729 | 34 | if(!fIsSet(cmode,NC_FORMAT_ALL)) |
730 | 34 | set_default_mode(&cmode); |
731 | | |
732 | | /* Process the cmode; may override some already set flags. The |
733 | | * user-defined formats must be checked first. They may choose to |
734 | | * use some of the other flags, like NC_NETCDF4, so we must first |
735 | | * check NC_UDF0 and NC_UDF1 before checking for any other |
736 | | * flag. */ |
737 | 34 | if(fIsSet(cmode,(NC_UDF0|NC_UDF1))) { |
738 | 0 | model->format = NC_FORMAT_NETCDF4; |
739 | 0 | if(fIsSet(cmode,NC_UDF0)) { |
740 | 0 | model->impl = NC_FORMATX_UDF0; |
741 | 0 | } else { |
742 | 0 | model->impl = NC_FORMATX_UDF1; |
743 | 0 | } |
744 | 0 | goto done; |
745 | 0 | } |
746 | | |
747 | 34 | if(fIsSet(cmode,NC_64BIT_OFFSET)) { |
748 | 0 | model->impl = NC_FORMATX_NC3; |
749 | 0 | model->format = NC_FORMAT_64BIT_OFFSET; |
750 | 0 | goto done; |
751 | 0 | } |
752 | | |
753 | 34 | if(fIsSet(cmode,NC_64BIT_DATA)) { |
754 | 0 | model->impl = NC_FORMATX_NC3; |
755 | 0 | model->format = NC_FORMAT_64BIT_DATA; |
756 | 0 | goto done; |
757 | 0 | } |
758 | | |
759 | 34 | if(fIsSet(cmode,NC_NETCDF4)) { |
760 | 0 | model->impl = NC_FORMATX_NC4; |
761 | 0 | if(fIsSet(cmode,NC_CLASSIC_MODEL)) |
762 | 0 | model->format = NC_FORMAT_NETCDF4_CLASSIC; |
763 | 0 | else |
764 | 0 | model->format = NC_FORMAT_NETCDF4; |
765 | 0 | goto done; |
766 | 0 | } |
767 | | |
768 | | /* Default to classic model */ |
769 | 34 | model->format = NC_FORMAT_CLASSIC; |
770 | 34 | model->impl = NC_FORMATX_NC3; |
771 | | |
772 | 34 | done: |
773 | | /* Apply parallel flag */ |
774 | 34 | if(useparallel) { |
775 | 0 | if(model->impl == NC_FORMATX_NC3) |
776 | 0 | model->impl = NC_FORMATX_PNETCDF; |
777 | 0 | } |
778 | 34 | return check(stat); |
779 | 34 | } |
780 | | |
781 | | /* |
782 | | If the mode flags do not necessarily specify the |
783 | | format, then default it by adding in appropriate flags. |
784 | | */ |
785 | | |
786 | | static void |
787 | | set_default_mode(int* modep) |
788 | 34 | { |
789 | 34 | int mode = *modep; |
790 | 34 | int dfaltformat; |
791 | | |
792 | 34 | dfaltformat = nc_get_default_format(); |
793 | 34 | switch (dfaltformat) { |
794 | 0 | case NC_FORMAT_64BIT_OFFSET: mode |= NC_64BIT_OFFSET; break; |
795 | 0 | case NC_FORMAT_64BIT_DATA: mode |= NC_64BIT_DATA; break; |
796 | 0 | case NC_FORMAT_NETCDF4: mode |= NC_NETCDF4; break; |
797 | 0 | case NC_FORMAT_NETCDF4_CLASSIC: mode |= (NC_NETCDF4|NC_CLASSIC_MODEL); break; |
798 | 34 | case NC_FORMAT_CLASSIC: /* fall thru */ |
799 | 34 | default: break; /* default to classic */ |
800 | 34 | } |
801 | 34 | *modep = mode; /* final result */ |
802 | 34 | } |
803 | | |
804 | | /**************************************************/ |
805 | | /* |
806 | | Infer model for this dataset using some |
807 | | combination of cmode, path, and reading the dataset. |
808 | | See the documentation in docs/internal.dox. |
809 | | |
810 | | @param path |
811 | | @param omode |
812 | | @param iscreate |
813 | | @param useparallel |
814 | | @param params |
815 | | @param model |
816 | | @param newpathp |
817 | | */ |
818 | | |
819 | | int |
820 | | NC_infermodel(const char* path, int* omodep, int iscreate, int useparallel, void* params, NCmodel* model, char** newpathp) |
821 | 34 | { |
822 | 34 | int i,stat = NC_NOERR; |
823 | 34 | NCURI* uri = NULL; |
824 | 34 | int omode = *omodep; |
825 | 34 | NClist* fraglenv = nclistnew(); |
826 | 34 | NClist* modeargs = nclistnew(); |
827 | 34 | char* sfrag = NULL; |
828 | 34 | const char* modeval = NULL; |
829 | 34 | char* abspath = NULL; |
830 | | |
831 | | /* Phase 1: |
832 | | 1. convert special protocols to http|https |
833 | | 2. begin collecting fragments |
834 | | */ |
835 | 34 | if((stat = processuri(path, &uri, fraglenv))) goto done; |
836 | | |
837 | 34 | if(uri != NULL) { |
838 | | #ifdef DEBUG |
839 | | printlist(fraglenv,"processuri"); |
840 | | #endif |
841 | | |
842 | | /* Phase 2: Expand macros and add to fraglenv */ |
843 | 0 | if((stat = processmacros(&fraglenv))) goto done; |
844 | | #ifdef DEBUG |
845 | | printlist(fraglenv,"processmacros"); |
846 | | #endif |
847 | | |
848 | | /* Cleanup the fragment list */ |
849 | 0 | if((stat = cleanfragments(&fraglenv))) goto done; |
850 | | |
851 | | /* Phase 2a: Expand mode inferences and add to fraglenv */ |
852 | 0 | if((stat = processinferences(fraglenv))) goto done; |
853 | | #ifdef DEBUG |
854 | | printlist(fraglenv,"processinferences"); |
855 | | #endif |
856 | | |
857 | | /* Phase 3: coalesce duplicate fragment keys and remove duplicate values */ |
858 | 0 | if((stat = cleanfragments(&fraglenv))) goto done; |
859 | | #ifdef DEBUG |
860 | | printlist(fraglenv,"cleanfragments"); |
861 | | #endif |
862 | | |
863 | | /* Phase 4: Rebuild the url fragment and rebuilt the url */ |
864 | 0 | sfrag = envvlist2string(fraglenv,"&"); |
865 | 0 | nclistfreeall(fraglenv); fraglenv = NULL; |
866 | | #ifdef DEBUG |
867 | | fprintf(stderr,"frag final: %s\n",sfrag); |
868 | | #endif |
869 | 0 | ncurisetfragments(uri,sfrag); |
870 | 0 | nullfree(sfrag); sfrag = NULL; |
871 | | |
872 | | /* If s3, then rebuild the url */ |
873 | 0 | if(NC_iss3(uri)) { |
874 | 0 | NCURI* newuri = NULL; |
875 | 0 | if((stat = NC_s3urlrebuild(uri,&newuri,NULL,NULL))) goto done; |
876 | 0 | ncurifree(uri); |
877 | 0 | uri = newuri; |
878 | 0 | } else if(strcmp(uri->protocol,"file")==0) { |
879 | | /* convert path to absolute */ |
880 | 0 | char* canon = NULL; |
881 | 0 | abspath = NCpathabsolute(uri->path); |
882 | 0 | if((stat = NCpathcanonical(abspath,&canon))) goto done; |
883 | 0 | nullfree(abspath); |
884 | 0 | abspath = canon; canon = NULL; |
885 | 0 | if((stat = ncurisetpath(uri,abspath))) goto done; |
886 | 0 | } |
887 | | |
888 | | /* rebuild the path */ |
889 | 0 | if(newpathp) { |
890 | 0 | *newpathp = ncuribuild(uri,NULL,NULL,NCURIALL); |
891 | | #ifdef DEBUG |
892 | | fprintf(stderr,"newpath=|%s|\n",*newpathp); fflush(stderr); |
893 | | #endif |
894 | 0 | } |
895 | | |
896 | | /* Phase 5: Process the mode key to see if we can tell the formatx */ |
897 | 0 | modeval = ncurifragmentlookup(uri,"mode"); |
898 | 0 | if(modeval != NULL) { |
899 | 0 | if((stat = parseonchar(modeval,',',modeargs))) goto done; |
900 | 0 | for(i=0;i<nclistlength(modeargs);i++) { |
901 | 0 | const char* arg = nclistget(modeargs,i); |
902 | 0 | if((stat=processmodearg(arg,model))) goto done; |
903 | 0 | } |
904 | 0 | } |
905 | | |
906 | | /* Phase 6: Process the non-mode keys to see if we can tell the formatx */ |
907 | 0 | if(!modelcomplete(model)) { |
908 | 0 | const char** p = ncurifragmentparams(uri); /* envv format */ |
909 | 0 | if(p != NULL) { |
910 | 0 | for(;*p;p+=2) { |
911 | 0 | const char* key = p[0]; |
912 | 0 | const char* value = p[1];; |
913 | 0 | if((stat=processfragmentkeys(key,value,model))) goto done; |
914 | 0 | } |
915 | 0 | } |
916 | 0 | } |
917 | | |
918 | | /* Phase 7: Special cases: if this is a URL and model.impl is still not defined */ |
919 | | /* Phase7a: Default is DAP2 */ |
920 | 0 | if(!modelcomplete(model)) { |
921 | 0 | model->impl = NC_FORMATX_DAP2; |
922 | 0 | model->format = NC_FORMAT_NC3; |
923 | 0 | } |
924 | |
|
925 | 34 | } else {/* Not URL */ |
926 | 34 | if(*newpathp) *newpathp = NULL; |
927 | 34 | } |
928 | | |
929 | | /* Phase 8: mode inference from mode flags */ |
930 | | /* The modeargs did not give us a model (probably not a URL). |
931 | | So look at the combination of mode flags and the useparallel flag */ |
932 | 34 | if(!modelcomplete(model)) { |
933 | 34 | if((stat = NC_omodeinfer(useparallel,omode,model))) goto done; |
934 | 34 | } |
935 | | |
936 | | /* Phase 9: Infer from file content, if possible; |
937 | | this has highest precedence, so it may override |
938 | | previous decisions. Note that we do this last |
939 | | because we need previously determined model info |
940 | | to guess if this file is readable. |
941 | | */ |
942 | 34 | if(!iscreate && isreadable(uri,model)) { |
943 | | /* Ok, we need to try to read the file */ |
944 | 34 | if((stat = check_file_type(path, omode, useparallel, params, model, uri))) goto done; |
945 | 34 | } |
946 | | |
947 | | /* Need a decision */ |
948 | 29 | if(!modelcomplete(model)) |
949 | 0 | {stat = NC_ENOTNC; goto done;} |
950 | | |
951 | | /* Force flag consistency */ |
952 | 29 | switch (model->impl) { |
953 | 0 | case NC_FORMATX_NC4: |
954 | 0 | case NC_FORMATX_NC_HDF4: |
955 | 0 | case NC_FORMATX_DAP4: |
956 | 0 | case NC_FORMATX_UDF0: |
957 | 0 | case NC_FORMATX_UDF1: |
958 | 0 | case NC_FORMATX_NCZARR: |
959 | 0 | omode |= NC_NETCDF4; |
960 | 0 | if(model->format == NC_FORMAT_NETCDF4_CLASSIC) |
961 | 0 | omode |= NC_CLASSIC_MODEL; |
962 | 0 | break; |
963 | 29 | case NC_FORMATX_NC3: |
964 | 29 | omode &= ~NC_NETCDF4; /* must be netcdf-3 (CDF-1, CDF-2, CDF-5) */ |
965 | 29 | if(model->format == NC_FORMAT_64BIT_OFFSET) omode |= NC_64BIT_OFFSET; |
966 | 27 | else if(model->format == NC_FORMAT_64BIT_DATA) omode |= NC_64BIT_DATA; |
967 | 29 | break; |
968 | 0 | case NC_FORMATX_PNETCDF: |
969 | 0 | omode &= ~NC_NETCDF4; /* must be netcdf-3 (CDF-1, CDF-2, CDF-5) */ |
970 | 0 | if(model->format == NC_FORMAT_64BIT_OFFSET) omode |= NC_64BIT_OFFSET; |
971 | 0 | else if(model->format == NC_FORMAT_64BIT_DATA) omode |= NC_64BIT_DATA; |
972 | 0 | break; |
973 | 0 | case NC_FORMATX_DAP2: |
974 | 0 | omode &= ~(NC_NETCDF4|NC_64BIT_OFFSET|NC_64BIT_DATA|NC_CLASSIC_MODEL); |
975 | 0 | break; |
976 | 0 | default: |
977 | 0 | {stat = NC_ENOTNC; goto done;} |
978 | 29 | } |
979 | | |
980 | 34 | done: |
981 | 34 | nullfree(sfrag); |
982 | 34 | nullfree(abspath); |
983 | 34 | ncurifree(uri); |
984 | 34 | nclistfreeall(modeargs); |
985 | 34 | nclistfreeall(fraglenv); |
986 | 34 | *omodep = omode; /* in/out */ |
987 | 34 | return check(stat); |
988 | 29 | } |
989 | | |
990 | | static int |
991 | | isreadable(NCURI* uri, NCmodel* model) |
992 | 34 | { |
993 | 34 | int canread = 0; |
994 | 34 | struct Readable* r; |
995 | | /* Step 1: Look up the implementation */ |
996 | 34 | for(r=readable;r->impl;r++) { |
997 | 34 | if(model->impl == r->impl) {canread = r->readable; break;} |
998 | 34 | } |
999 | | /* Step 2: check for bytes mode */ |
1000 | 34 | if(!canread && NC_testmode(uri,"bytes") && (model->impl == NC_FORMATX_NC4 || model->impl == NC_FORMATX_NC_HDF5)) |
1001 | 0 | canread = 1; |
1002 | 34 | return canread; |
1003 | 34 | } |
1004 | | |
1005 | | #if 0 |
1006 | | static char* |
1007 | | emptyify(char* s) |
1008 | | { |
1009 | | if(s == NULL) s = strdup(""); |
1010 | | return strdup(s); |
1011 | | } |
1012 | | |
1013 | | static const char* |
1014 | | nullify(const char* s) |
1015 | | { |
1016 | | if(s != NULL && strlen(s) == 0) |
1017 | | return NULL; |
1018 | | return s; |
1019 | | } |
1020 | | #endif |
1021 | | |
1022 | | /**************************************************/ |
1023 | | /**************************************************/ |
1024 | | /** |
1025 | | * Provide a hidden interface to allow utilities |
1026 | | * to check if a given path name is really a url. |
1027 | | * If not, put null in basenamep, else put basename of the url path |
1028 | | * minus any extension into basenamep; caller frees. |
1029 | | * Return 1 if it looks like a url, 0 otherwise. |
1030 | | */ |
1031 | | |
1032 | | int |
1033 | | nc__testurl(const char* path0, char** basenamep) |
1034 | 0 | { |
1035 | 0 | NCURI* uri = NULL; |
1036 | 0 | int ok = 0; |
1037 | 0 | char* path = NULL; |
1038 | |
|
1039 | 0 | if(!ncuriparse(path0,&uri)) { |
1040 | 0 | char* p; |
1041 | 0 | char* q; |
1042 | 0 | path = strdup(uri->path); |
1043 | 0 | if(path == NULL||strlen(path)==0) goto done; |
1044 | 0 | p = strrchr(path, '/'); |
1045 | 0 | if(p == NULL) p = path; else p++; |
1046 | 0 | q = strrchr(p,'.'); |
1047 | 0 | if(q != NULL) *q = '\0'; |
1048 | 0 | if(strlen(p) == 0) goto done; |
1049 | 0 | if(basenamep) |
1050 | 0 | *basenamep = strdup(p); |
1051 | 0 | ok = 1; |
1052 | 0 | } |
1053 | 0 | done: |
1054 | 0 | ncurifree(uri); |
1055 | 0 | nullfree(path); |
1056 | 0 | return ok; |
1057 | 0 | } |
1058 | | |
1059 | | /**************************************************/ |
1060 | | /* Envv list utilities */ |
1061 | | |
1062 | | static const char* |
1063 | | getmodekey(const NClist* envv) |
1064 | 0 | { |
1065 | 0 | int i; |
1066 | | /* Get "mode" entry */ |
1067 | 0 | for(i=0;i<nclistlength(envv);i+=2) { |
1068 | 0 | char* key = NULL; |
1069 | 0 | key = nclistget(envv,i); |
1070 | 0 | if(strcasecmp(key,"mode")==0) |
1071 | 0 | return nclistget(envv,i+1); |
1072 | 0 | } |
1073 | 0 | return NULL; |
1074 | 0 | } |
1075 | | |
1076 | | static int |
1077 | | replacemode(NClist* envv, const char* newval) |
1078 | 0 | { |
1079 | 0 | int i; |
1080 | | /* Get "mode" entry */ |
1081 | 0 | for(i=0;i<nclistlength(envv);i+=2) { |
1082 | 0 | char* key = NULL; |
1083 | 0 | char* val = NULL; |
1084 | 0 | key = nclistget(envv,i); |
1085 | 0 | if(strcasecmp(key,"mode")==0) { |
1086 | 0 | val = nclistget(envv,i+1); |
1087 | 0 | nclistset(envv,i+1,strdup(newval)); |
1088 | 0 | nullfree(val); |
1089 | 0 | return NC_NOERR; |
1090 | 0 | } |
1091 | 0 | } |
1092 | 0 | return NC_EINVAL; |
1093 | 0 | } |
1094 | | |
1095 | | static NClist* |
1096 | | parsemode(const char* modeval) |
1097 | 0 | { |
1098 | 0 | NClist* modes = nclistnew(); |
1099 | 0 | if(modeval) |
1100 | 0 | (void)parseonchar(modeval,',',modes);/* split on commas */ |
1101 | 0 | return modes; |
1102 | 0 | } |
1103 | | |
1104 | | /**************************************************/ |
1105 | | /** |
1106 | | * @internal Given an existing file, figure out its format and return |
1107 | | * that format value (NC_FORMATX_XXX) in model arg. Assume any path |
1108 | | * conversion was already performed at a higher level. |
1109 | | * |
1110 | | * @param path File name. |
1111 | | * @param flags |
1112 | | * @param use_parallel |
1113 | | * @param parameters |
1114 | | * @param model Pointer that gets the model to use for the dispatch table. |
1115 | | * @param version Pointer that gets version of the file. |
1116 | | * |
1117 | | * @return ::NC_NOERR No error. |
1118 | | * @author Dennis Heimbigner |
1119 | | */ |
1120 | | static int |
1121 | | check_file_type(const char *path, int omode, int use_parallel, |
1122 | | void *parameters, NCmodel* model, NCURI* uri) |
1123 | 34 | { |
1124 | 34 | char magic[NC_MAX_MAGIC_NUMBER_LEN]; |
1125 | 34 | int status = NC_NOERR; |
1126 | 34 | struct MagicFile magicinfo; |
1127 | | #ifdef _WIN32 |
1128 | | NC* nc = NULL; |
1129 | | #endif |
1130 | | |
1131 | 34 | memset((void*)&magicinfo,0,sizeof(magicinfo)); |
1132 | | |
1133 | | #ifdef _WIN32 /* including MINGW */ |
1134 | | /* Windows does not handle well multiple handles to the same file. |
1135 | | So if file is already open/created, then find it and just get the |
1136 | | model from that. */ |
1137 | | if((nc = find_in_NCList_by_name(path)) != NULL) { |
1138 | | int format = 0; |
1139 | | /* Get the model from this NC */ |
1140 | | if((status = nc_inq_format_extended(nc->ext_ncid,&format,NULL))) goto done; |
1141 | | model->impl = format; |
1142 | | if((status = nc_inq_format(nc->ext_ncid,&format))) goto done; |
1143 | | model->format = format; |
1144 | | goto done; |
1145 | | } |
1146 | | #endif |
1147 | | |
1148 | 34 | magicinfo.path = path; /* do not free */ |
1149 | 34 | magicinfo.uri = uri; /* do not free */ |
1150 | 34 | magicinfo.omode = omode; |
1151 | 34 | magicinfo.model = model; /* do not free */ |
1152 | 34 | magicinfo.parameters = parameters; /* do not free */ |
1153 | | #ifdef USE_STDIO |
1154 | | magicinfo.use_parallel = 0; |
1155 | | #else |
1156 | 34 | magicinfo.use_parallel = use_parallel; |
1157 | 34 | #endif |
1158 | | |
1159 | 34 | if((status = openmagic(&magicinfo))) goto done; |
1160 | | |
1161 | | /* Verify we have a large enough file */ |
1162 | 34 | if(magicinfo.filelen < (unsigned long long)MAGIC_NUMBER_LEN) |
1163 | 0 | {status = NC_ENOTNC; goto done;} |
1164 | 34 | if((status = readmagic(&magicinfo,0L,magic)) != NC_NOERR) { |
1165 | 0 | status = NC_ENOTNC; |
1166 | 0 | goto done; |
1167 | 0 | } |
1168 | | |
1169 | | /* Look at the magic number */ |
1170 | 34 | if(NC_interpret_magic_number(magic,model) == NC_NOERR |
1171 | 34 | && model->format != 0) { |
1172 | 29 | if (use_parallel && (model->format == NC_FORMAT_NC3 || model->impl == NC_FORMATX_NC3)) |
1173 | | /* this is called from nc_open_par() and file is classic */ |
1174 | 0 | model->impl = NC_FORMATX_PNETCDF; |
1175 | 29 | goto done; /* found something */ |
1176 | 29 | } |
1177 | | |
1178 | | /* Remaining case when implementation is an HDF5 file; |
1179 | | search forward at starting at 512 |
1180 | | and doubling to see if we have HDF5 magic number */ |
1181 | 5 | { |
1182 | 5 | long pos = 512L; |
1183 | 59 | for(;;) { |
1184 | 59 | if((pos+MAGIC_NUMBER_LEN) > magicinfo.filelen) |
1185 | 5 | {status = NC_ENOTNC; goto done;} |
1186 | 54 | if((status = readmagic(&magicinfo,pos,magic)) != NC_NOERR) |
1187 | 0 | {status = NC_ENOTNC; goto done; } |
1188 | 54 | NC_interpret_magic_number(magic,model); |
1189 | 54 | if(model->impl == NC_FORMATX_NC4) break; |
1190 | | /* double and try again */ |
1191 | 54 | pos = 2*pos; |
1192 | 54 | } |
1193 | 5 | } |
1194 | 34 | done: |
1195 | 34 | closemagic(&magicinfo); |
1196 | 34 | return check(status); |
1197 | 5 | } |
1198 | | |
1199 | | /** |
1200 | | \internal |
1201 | | \ingroup datasets |
1202 | | Provide open, read and close for use when searching for magic numbers |
1203 | | */ |
1204 | | static int |
1205 | | openmagic(struct MagicFile* file) |
1206 | 34 | { |
1207 | 34 | int status = NC_NOERR; |
1208 | | |
1209 | 34 | if(fIsSet(file->omode,NC_INMEMORY)) { |
1210 | | /* Get its length */ |
1211 | 34 | NC_memio* meminfo = (NC_memio*)file->parameters; |
1212 | 34 | assert(meminfo != NULL); |
1213 | 34 | file->filelen = (long long)meminfo->size; |
1214 | | #ifdef ENABLE_BYTERANGE |
1215 | | } else if(file->uri != NULL) { |
1216 | | #ifdef ENABLE_S3_SDK |
1217 | | /* If this is an S3 URL, then handle specially */ |
1218 | | if(NC_iss3(file->uri)) { |
1219 | | if((status = NC_s3urlprocess(file->uri,&file->s3))) goto done; |
1220 | | if((file->s3client = NC_s3sdkcreateclient(&file->s3))==NULL) {status = NC_EURL; goto done;} |
1221 | | if((status = NC_s3sdkinfo(file->s3client,file->s3.bucket,file->s3.rootkey,&file->filelen,&file->errmsg))) |
1222 | | goto done; |
1223 | | file->iss3 = 1; |
1224 | | } else |
1225 | | #endif |
1226 | | { |
1227 | | /* Construct a URL minus any fragment */ |
1228 | | file->curlurl = ncuribuild(file->uri,NULL,NULL,NCURISVC); |
1229 | | /* Open the curl handle */ |
1230 | | if((status=nc_http_init(&file->state))) goto done; |
1231 | | if((status=nc_http_size(file->state,file->curlurl,&file->filelen))) goto done; |
1232 | | } |
1233 | | #endif /*BYTERANGE*/ |
1234 | 34 | } else { |
1235 | | #ifdef USE_PARALLEL |
1236 | | if (file->use_parallel) { |
1237 | | int retval; |
1238 | | MPI_Offset size; |
1239 | | assert(file->parameters != NULL); |
1240 | | if((retval = MPI_File_open(((NC_MPI_INFO*)file->parameters)->comm, |
1241 | | (char*)file->path,MPI_MODE_RDONLY, |
1242 | | ((NC_MPI_INFO*)file->parameters)->info, |
1243 | | &file->fh)) != MPI_SUCCESS) { |
1244 | | #ifdef MPI_ERR_NO_SUCH_FILE |
1245 | | int errorclass; |
1246 | | MPI_Error_class(retval, &errorclass); |
1247 | | if (errorclass == MPI_ERR_NO_SUCH_FILE) |
1248 | | #ifdef NC_ENOENT |
1249 | | status = NC_ENOENT; |
1250 | | #else |
1251 | | status = errno; |
1252 | | #endif |
1253 | | else |
1254 | | #endif |
1255 | | status = NC_EPARINIT; |
1256 | | file->fh = MPI_FILE_NULL; |
1257 | | goto done; |
1258 | | } |
1259 | | /* Get its length */ |
1260 | | if((retval=MPI_File_get_size(file->fh, &size)) != MPI_SUCCESS) |
1261 | | {status = NC_EPARINIT; goto done;} |
1262 | | file->filelen = (long long)size; |
1263 | | } else |
1264 | | #endif /* USE_PARALLEL */ |
1265 | 0 | { |
1266 | 0 | if (file->path == NULL || strlen(file->path) == 0) |
1267 | 0 | {status = NC_EINVAL; goto done;} |
1268 | 0 | file->fp = NCfopen(file->path, "r"); |
1269 | 0 | if(file->fp == NULL) |
1270 | 0 | {status = errno; goto done;} |
1271 | | /* Get its length */ |
1272 | 0 | { |
1273 | 0 | int fd = fileno(file->fp); |
1274 | | #ifdef _WIN32 |
1275 | | __int64 len64 = _filelengthi64(fd); |
1276 | | if(len64 < 0) |
1277 | | {status = errno; goto done;} |
1278 | | file->filelen = (long long)len64; |
1279 | | #else |
1280 | 0 | off_t size; |
1281 | 0 | size = lseek(fd, 0, SEEK_END); |
1282 | 0 | if(size == -1) |
1283 | 0 | {status = errno; goto done;} |
1284 | 0 | file->filelen = (long long)size; |
1285 | 0 | #endif |
1286 | 0 | } |
1287 | 0 | rewind(file->fp); |
1288 | 0 | } |
1289 | 0 | } |
1290 | 34 | done: |
1291 | 34 | return check(status); |
1292 | 34 | } |
1293 | | |
1294 | | static int |
1295 | | readmagic(struct MagicFile* file, long pos, char* magic) |
1296 | 88 | { |
1297 | 88 | int status = NC_NOERR; |
1298 | 88 | NCbytes* buf = ncbytesnew(); |
1299 | | |
1300 | 88 | memset(magic,0,MAGIC_NUMBER_LEN); |
1301 | 88 | if(fIsSet(file->omode,NC_INMEMORY)) { |
1302 | 88 | char* mempos; |
1303 | 88 | NC_memio* meminfo = (NC_memio*)file->parameters; |
1304 | 88 | if((pos + MAGIC_NUMBER_LEN) > meminfo->size) |
1305 | 0 | {status = NC_EINMEMORY; goto done;} |
1306 | 88 | mempos = ((char*)meminfo->memory) + pos; |
1307 | 88 | memcpy((void*)magic,mempos,MAGIC_NUMBER_LEN); |
1308 | | #ifdef DEBUG |
1309 | | printmagic("XXX: readmagic",magic,file); |
1310 | | #endif |
1311 | | #ifdef ENABLE_BYTERANGE |
1312 | | } else if(file->uri != NULL) { |
1313 | | fileoffset_t start = (size_t)pos; |
1314 | | fileoffset_t count = MAGIC_NUMBER_LEN; |
1315 | | #ifdef ENABLE_S3_SDK |
1316 | | if(file->iss3) { |
1317 | | if((status = NC_s3sdkread(file->s3client,file->s3.bucket,file->s3.rootkey,start,count,(void*)magic,&file->errmsg))) |
1318 | | {goto done;} |
1319 | | } |
1320 | | else |
1321 | | #endif |
1322 | | { |
1323 | | status = nc_http_read(file->state, file->curlurl, start, count, buf); |
1324 | | if (status == NC_NOERR) { |
1325 | | if (ncbyteslength(buf) != count) |
1326 | | status = NC_EINVAL; |
1327 | | else |
1328 | | memcpy(magic, ncbytescontents(buf), count); |
1329 | | } |
1330 | | } |
1331 | | #endif |
1332 | 88 | } else { |
1333 | | #ifdef USE_PARALLEL |
1334 | | if (file->use_parallel) { |
1335 | | MPI_Status mstatus; |
1336 | | int retval; |
1337 | | if((retval = MPI_File_read_at_all(file->fh, pos, magic, |
1338 | | MAGIC_NUMBER_LEN, MPI_CHAR, &mstatus)) != MPI_SUCCESS) |
1339 | | {status = NC_EPARINIT; goto done;} |
1340 | | } |
1341 | | else |
1342 | | #endif /* USE_PARALLEL */ |
1343 | 0 | { /* Ordinary read */ |
1344 | 0 | long i; |
1345 | 0 | i = fseek(file->fp, pos, SEEK_SET); |
1346 | 0 | if (i < 0) { status = errno; goto done; } |
1347 | 0 | ncbytessetlength(buf, 0); |
1348 | 0 | if ((status = NC_readfileF(file->fp, buf, MAGIC_NUMBER_LEN))) goto done; |
1349 | 0 | memcpy(magic, ncbytescontents(buf), MAGIC_NUMBER_LEN); |
1350 | 0 | } |
1351 | 0 | } |
1352 | | |
1353 | 88 | done: |
1354 | 88 | ncbytesfree(buf); |
1355 | 88 | if(file && file->fp) clearerr(file->fp); |
1356 | 88 | return check(status); |
1357 | 88 | } |
1358 | | |
1359 | | /** |
1360 | | * Close the file opened to check for magic number. |
1361 | | * |
1362 | | * @param file pointer to the MagicFile struct for this open file. |
1363 | | * @returns NC_NOERR for success |
1364 | | * @returns NC_EPARINIT if there was a problem closing file with MPI |
1365 | | * (parallel builds only). |
1366 | | * @author Dennis Heimbigner |
1367 | | */ |
1368 | | static int |
1369 | | closemagic(struct MagicFile* file) |
1370 | 34 | { |
1371 | 34 | int status = NC_NOERR; |
1372 | | |
1373 | 34 | if(fIsSet(file->omode,NC_INMEMORY)) { |
1374 | | /* noop */ |
1375 | | #ifdef ENABLE_BYTERANGE |
1376 | | } else if(file->uri != NULL) { |
1377 | | #ifdef ENABLE_S3_SDK |
1378 | | if(file->iss3) { |
1379 | | NC_s3sdkclose(file->s3client, &file->s3, 0, &file->errmsg); |
1380 | | NC_s3clear(&file->s3); |
1381 | | nullfree(file->errmsg); |
1382 | | } else |
1383 | | #endif |
1384 | | { |
1385 | | status = nc_http_close(file->state); |
1386 | | nullfree(file->curlurl); |
1387 | | } |
1388 | | #endif |
1389 | 34 | } else { |
1390 | | #ifdef USE_PARALLEL |
1391 | | if (file->use_parallel) { |
1392 | | int retval; |
1393 | | if(file->fh != MPI_FILE_NULL |
1394 | | && (retval = MPI_File_close(&file->fh)) != MPI_SUCCESS) |
1395 | | {status = NC_EPARINIT; return status;} |
1396 | | } else |
1397 | | #endif |
1398 | 0 | { |
1399 | 0 | if(file->fp) fclose(file->fp); |
1400 | 0 | } |
1401 | 0 | } |
1402 | 34 | return status; |
1403 | 34 | } |
1404 | | |
1405 | | /*! |
1406 | | Interpret the magic number found in the header of a netCDF file. |
1407 | | This function interprets the magic number/string contained in the header of a netCDF file and sets the appropriate NC_FORMATX flags. |
1408 | | |
1409 | | @param[in] magic Pointer to a character array with the magic number block. |
1410 | | @param[out] model Pointer to an integer to hold the corresponding netCDF type. |
1411 | | @param[out] version Pointer to an integer to hold the corresponding netCDF version. |
1412 | | @returns NC_NOERR if a legitimate file type found |
1413 | | @returns NC_ENOTNC otherwise |
1414 | | |
1415 | | \internal |
1416 | | \ingroup datasets |
1417 | | |
1418 | | */ |
1419 | | static int |
1420 | | NC_interpret_magic_number(char* magic, NCmodel* model) |
1421 | 88 | { |
1422 | 88 | int status = NC_NOERR; |
1423 | | /* Look at the magic number */ |
1424 | 88 | #ifdef USE_NETCDF4 |
1425 | 88 | if (strlen(UDF0_magic_number) && !strncmp(UDF0_magic_number, magic, |
1426 | 0 | strlen(UDF0_magic_number))) |
1427 | 0 | { |
1428 | 0 | model->impl = NC_FORMATX_UDF0; |
1429 | 0 | model->format = NC_FORMAT_NETCDF4; |
1430 | 0 | goto done; |
1431 | 0 | } |
1432 | 88 | if (strlen(UDF1_magic_number) && !strncmp(UDF1_magic_number, magic, |
1433 | 0 | strlen(UDF1_magic_number))) |
1434 | 0 | { |
1435 | 0 | model->impl = NC_FORMATX_UDF1; |
1436 | 0 | model->format = NC_FORMAT_NETCDF4; |
1437 | 0 | goto done; |
1438 | 0 | } |
1439 | 88 | #endif /* USE_NETCDF4 */ |
1440 | | |
1441 | | /* Use the complete magic number string for HDF5 */ |
1442 | 88 | if(memcmp(magic,HDF5_SIGNATURE,sizeof(HDF5_SIGNATURE))==0) { |
1443 | 0 | model->impl = NC_FORMATX_NC4; |
1444 | 0 | model->format = NC_FORMAT_NETCDF4; |
1445 | 0 | goto done; |
1446 | 0 | } |
1447 | 88 | if(magic[0] == '\016' && magic[1] == '\003' |
1448 | 88 | && magic[2] == '\023' && magic[3] == '\001') { |
1449 | 0 | model->impl = NC_FORMATX_NC_HDF4; |
1450 | 0 | model->format = NC_FORMAT_NETCDF4; |
1451 | 0 | goto done; |
1452 | 0 | } |
1453 | 88 | if(magic[0] == 'C' && magic[1] == 'D' && magic[2] == 'F') { |
1454 | 29 | if(magic[3] == '\001') { |
1455 | 8 | model->impl = NC_FORMATX_NC3; |
1456 | 8 | model->format = NC_FORMAT_CLASSIC; |
1457 | 8 | goto done; |
1458 | 8 | } |
1459 | 21 | if(magic[3] == '\002') { |
1460 | 2 | model->impl = NC_FORMATX_NC3; |
1461 | 2 | model->format = NC_FORMAT_64BIT_OFFSET; |
1462 | 2 | goto done; |
1463 | 2 | } |
1464 | 19 | if(magic[3] == '\005') { |
1465 | 19 | model->impl = NC_FORMATX_NC3; |
1466 | 19 | model->format = NC_FORMAT_64BIT_DATA; |
1467 | 19 | goto done; |
1468 | 19 | } |
1469 | 19 | } |
1470 | | /* No match */ |
1471 | 59 | status = NC_ENOTNC; |
1472 | 59 | goto done; |
1473 | | |
1474 | 88 | done: |
1475 | 88 | return check(status); |
1476 | 88 | } |
1477 | | |
1478 | | #ifdef DEBUG |
1479 | | static void |
1480 | | printmagic(const char* tag, char* magic, struct MagicFile* f) |
1481 | | { |
1482 | | int i; |
1483 | | fprintf(stderr,"%s: ispar=%d magic=",tag,f->use_parallel); |
1484 | | for(i=0;i<MAGIC_NUMBER_LEN;i++) { |
1485 | | unsigned int c = (unsigned int)magic[i]; |
1486 | | c = c & 0x000000FF; |
1487 | | if(c == '\n') |
1488 | | fprintf(stderr," 0x%0x/'\\n'",c); |
1489 | | else if(c == '\r') |
1490 | | fprintf(stderr," 0x%0x/'\\r'",c); |
1491 | | else if(c < ' ') |
1492 | | fprintf(stderr," 0x%0x/'?'",c); |
1493 | | else |
1494 | | fprintf(stderr," 0x%0x/'%c'",c,c); |
1495 | | } |
1496 | | fprintf(stderr,"\n"); |
1497 | | fflush(stderr); |
1498 | | } |
1499 | | |
1500 | | static void |
1501 | | printlist(NClist* list, const char* tag) |
1502 | | { |
1503 | | int i; |
1504 | | fprintf(stderr,"%s:",tag); |
1505 | | for(i=0;i<nclistlength(list);i++) |
1506 | | fprintf(stderr," %s",(char*)nclistget(list,i)); |
1507 | | fprintf(stderr,"\n"); |
1508 | | dbgflush(); |
1509 | | } |
1510 | | |
1511 | | |
1512 | | #endif |