/src/gdal/netcdf-c-4.7.4/libdispatch/dinfermodel.c
Line | Count | Source (jump to first uncovered line) |
1 | | /** |
2 | | * @file |
3 | | * |
4 | | * Infer as much as possible from the omode + path. |
5 | | * Rewrite the path to a canonical form. |
6 | | * |
7 | | * Copyright 2018 University Corporation for Atmospheric |
8 | | * Research/Unidata. See COPYRIGHT file for more info. |
9 | | */ |
10 | | |
11 | | #include "config.h" |
12 | | #include <stdlib.h> |
13 | | #ifdef HAVE_UNISTD_H |
14 | | #include <unistd.h> |
15 | | #endif |
16 | | #ifdef HAVE_SYS_TYPES_H |
17 | | #include <sys/types.h> |
18 | | #endif |
19 | | |
20 | | #include "ncdispatch.h" |
21 | | #include "ncwinpath.h" |
22 | | #include "netcdf_mem.h" |
23 | | #include "fbits.h" |
24 | | #include "ncbytes.h" |
25 | | #include "nclist.h" |
26 | | #include "nclog.h" |
27 | | #ifdef ENABLE_BYTERANGE |
28 | | #include "nchttp.h" |
29 | | #endif |
30 | | |
31 | | #undef DEBUG |
32 | | |
33 | | /* If Defined, then use only stdio for all magic number io; |
34 | | otherwise use stdio or mpio as required. |
35 | | */ |
36 | | #undef USE_STDIO |
37 | | |
38 | | /** |
39 | | Sort info for open/read/close of |
40 | | file when searching for magic numbers |
41 | | */ |
42 | | struct MagicFile { |
43 | | const char* path; |
44 | | struct NCURI* uri; |
45 | | int omode; |
46 | | NCmodel* model; |
47 | | fileoffset_t filelen; |
48 | | int use_parallel; |
49 | | void* parameters; /* !NULL if inmemory && !diskless */ |
50 | | FILE* fp; |
51 | | #ifdef USE_PARALLEL |
52 | | MPI_File fh; |
53 | | #endif |
54 | | #ifdef ENABLE_BYTERANGE |
55 | | void* curl; /* avoid need to include curl.h */ |
56 | | char* curlurl; /* url to use with CURLOPT_SET_URL */ |
57 | | #endif |
58 | | }; |
59 | | |
60 | | /** @internal Magic number for HDF5 files. To be consistent with |
61 | | * H5Fis_hdf5, use the complete HDF5 magic number */ |
62 | | static char HDF5_SIGNATURE[MAGIC_NUMBER_LEN] = "\211HDF\r\n\032\n"; |
63 | | |
64 | 0 | #define modelcomplete(model) ((model)->impl != 0) |
65 | | |
66 | | #ifdef DEBUG |
67 | | static void dbgflush(void) |
68 | | { |
69 | | fflush(stdout); |
70 | | fflush(stderr); |
71 | | } |
72 | | |
73 | | static void |
74 | | fail(int err) |
75 | | { |
76 | | return; |
77 | | } |
78 | | |
79 | | static int |
80 | | check(int err) |
81 | | { |
82 | | if(err != NC_NOERR) |
83 | | fail(err); |
84 | | return err; |
85 | | } |
86 | | #else |
87 | 0 | #define check(err) (err) |
88 | | #endif |
89 | | |
90 | | /* |
91 | | Define a table of "mode=" string values |
92 | | from which the implementation can be inferred. |
93 | | Note that only cases that can currently |
94 | | take URLs are included. |
95 | | */ |
96 | | static struct FORMATMODES { |
97 | | const char* tag; |
98 | | const int impl; /* NC_FORMATX_XXX value */ |
99 | | } formatmodes[] = { |
100 | | {"dap2",NC_FORMATX_DAP2}, |
101 | | {"dap4",NC_FORMATX_DAP4}, |
102 | | {"netcdf-3",NC_FORMATX_NC3}, |
103 | | {"classic",NC_FORMATX_NC3}, |
104 | | {"netcdf-4",NC_FORMATX_NC4}, |
105 | | {"enhanced",NC_FORMATX_NC4}, |
106 | | {"udf0",NC_FORMATX_UDF0}, |
107 | | {"udf1",NC_FORMATX_UDF1}, |
108 | | {"zarr",NC_FORMATX_ZARR}, |
109 | | {NULL,0}, |
110 | | }; |
111 | | |
112 | | /* Define the legal singleton mode tags; |
113 | | thse should also appear in the above mode table. */ |
114 | | static const char* modesingles[] = { |
115 | | "dap2", "dap4", "bytes", "zarr", NULL |
116 | | }; |
117 | | |
118 | | /* Map FORMATX to readability to get magic number */ |
119 | | static struct Readable { |
120 | | int impl; |
121 | | int readable; |
122 | | } readable[] = { |
123 | | {NC_FORMATX_NC3,1}, |
124 | | {NC_FORMATX_NC_HDF5,1}, |
125 | | {NC_FORMATX_NC_HDF4,1}, |
126 | | {NC_FORMATX_PNETCDF,1}, |
127 | | {NC_FORMATX_DAP2,0}, |
128 | | {NC_FORMATX_DAP4,0}, |
129 | | {NC_FORMATX_UDF0,0}, |
130 | | {NC_FORMATX_UDF1,0}, |
131 | | {NC_FORMATX_ZARR,0}, |
132 | | {0,0}, |
133 | | }; |
134 | | |
135 | | /* Define the known URL protocols and their interpretation */ |
136 | | static struct NCPROTOCOLLIST { |
137 | | const char* protocol; |
138 | | const char* substitute; |
139 | | const char* mode; |
140 | | } ncprotolist[] = { |
141 | | {"http",NULL,NULL}, |
142 | | {"https",NULL,NULL}, |
143 | | {"file",NULL,NULL}, |
144 | | {"dods","http","dap2"}, |
145 | | {"dap4","http","dap4"}, |
146 | | {NULL,NULL,NULL} /* Terminate search */ |
147 | | }; |
148 | | |
149 | | /* Forward */ |
150 | | static int NC_omodeinfer(int useparallel, int omode, NCmodel*); |
151 | | static int check_file_type(const char *path, int omode, int use_parallel, void *parameters, NCmodel* model, NCURI* uri); |
152 | | static int parseurlmode(const char* modestr, NClist* list); |
153 | | static int processuri(const char* path, NCURI** urip, char** newpathp, NClist* modeargs); |
154 | | static char* list2string(NClist* modelist); |
155 | | static char* envv2string(NClist* envv); |
156 | | static int issingleton(const char* tag);; |
157 | | static void set_default_mode(int* cmodep); |
158 | | |
159 | | static int openmagic(struct MagicFile* file); |
160 | | static int readmagic(struct MagicFile* file, long pos, char* magic); |
161 | | static int closemagic(struct MagicFile* file); |
162 | | static int NC_interpret_magic_number(char* magic, NCmodel* model); |
163 | | #ifdef DEBUG |
164 | | static void printmagic(const char* tag, char* magic,struct MagicFile*); |
165 | | #endif |
166 | | static int isreadable(NCmodel*); |
167 | | |
168 | | |
169 | | /* |
170 | | If the path looks like a URL, then parse it, reformat it, |
171 | | and compute the mode= flags. Then return it and the the reformatted path. |
172 | | */ |
173 | | static int |
174 | | processuri(const char* path, NCURI** urip, char** newpathp, NClist* modeargs) |
175 | 0 | { |
176 | 0 | int i,j,stat = NC_NOERR; |
177 | 0 | int found = 0; |
178 | 0 | const char** fragp = NULL; |
179 | 0 | NClist* fraglist = NULL; |
180 | 0 | struct NCPROTOCOLLIST* protolist; |
181 | 0 | NCURI* uri = NULL; |
182 | 0 | size_t pathlen = strlen(path); |
183 | 0 | char* str = NULL; |
184 | |
|
185 | 0 | if(path == NULL || pathlen == 0) {stat = NC_EURL; goto done;} |
186 | | |
187 | | /* Defaults */ |
188 | 0 | if(newpathp) *newpathp = NULL; |
189 | 0 | if(urip) *urip = NULL; |
190 | |
|
191 | 0 | if(ncuriparse(path,&uri)) goto done; /* not url */ |
192 | | |
193 | | /* Look up the protocol */ |
194 | 0 | for(found=0,protolist=ncprotolist;protolist->protocol;protolist++) { |
195 | 0 | if(strcmp(uri->protocol,protolist->protocol) == 0) { |
196 | 0 | found = 1; |
197 | 0 | break; |
198 | 0 | } |
199 | 0 | } |
200 | 0 | if(!found) |
201 | 0 | {stat = NC_EINVAL; goto done;} /* unrecognized URL form */ |
202 | | |
203 | | /* process the corresponding mode arg */ |
204 | 0 | if(protolist->mode != NULL) |
205 | 0 | nclistpush(modeargs,strdup(protolist->mode)); |
206 | | |
207 | | /* Substitute the protocol in any case */ |
208 | 0 | if(protolist->substitute) ncurisetprotocol(uri,protolist->substitute); |
209 | | |
210 | | /* Iterate over the url fragment parameters and collect, |
211 | | but remove mode= proto= and protocol= */ |
212 | 0 | fraglist = nclistnew(); |
213 | 0 | for(fragp=ncurifragmentparams(uri);fragp && *fragp;fragp+=2) { |
214 | 0 | int elide = 0; |
215 | 0 | const char* name = fragp[0]; |
216 | 0 | const char* value = fragp[1]; |
217 | 0 | if(strcmp(name,"protocol")==0 |
218 | 0 | || strcmp(name,"proto")==0) { /* for back compatibility */ |
219 | 0 | nclistpush(modeargs,strdup(value)); |
220 | 0 | elide = 1; |
221 | 0 | } else if(strcmp(name,"mode")==0) { |
222 | | /* Capture the list of mode arguments */ |
223 | 0 | if((stat = parseurlmode(value,modeargs))) goto done; |
224 | 0 | elide = 1; |
225 | 0 | } else if(issingleton(name) && (value == NULL || strlen(value)==0)) { |
226 | 0 | nclistpush(modeargs,strdup(name)); |
227 | 0 | elide = 1; |
228 | 0 | } /*else ignore*/ |
229 | 0 | if(!elide) { |
230 | | /* Copy over */ |
231 | 0 | nclistpush(fraglist,strdup(name)); |
232 | 0 | if(value == NULL) value = ""; |
233 | 0 | nclistpush(fraglist,strdup(value)); |
234 | 0 | } |
235 | 0 | } |
236 | | |
237 | | /* At this point modeargs should contain all mode-like args from the URL */ |
238 | | |
239 | | /* Remove duplicates */ |
240 | 0 | for(i=nclistlength(modeargs)-1;i>=0;i--) { |
241 | 0 | const char* mode = nclistget(modeargs,i); |
242 | 0 | for(j=0;j<i;j++) { |
243 | 0 | const char* other = nclistget(modeargs,i); |
244 | 0 | if(strcasecmp(mode,other)==0) { |
245 | 0 | nclistremove(modeargs,i); /* duplicate */ |
246 | 0 | break; |
247 | 0 | } |
248 | 0 | } |
249 | 0 | } |
250 | | |
251 | | /* Convert the modelist to a new mode= fragment */ |
252 | 0 | if(nclistlength(modeargs) > 0) { |
253 | 0 | str = list2string(modeargs); |
254 | | /* Re-insert mode into fraglist */ |
255 | 0 | nclistinsert(fraglist,0,str); |
256 | 0 | nclistinsert(fraglist,0,strdup("mode")); |
257 | 0 | } |
258 | | |
259 | | /* Convert frag list to a string */ |
260 | 0 | str = envv2string(fraglist); |
261 | 0 | ncurisetfragments(uri,str); |
262 | | |
263 | | /* Rebuild the path (including fragment)*/ |
264 | 0 | if(newpathp) |
265 | 0 | *newpathp = ncuribuild(uri,NULL,NULL,NCURIALL); |
266 | 0 | if(urip) { |
267 | 0 | *urip = uri; |
268 | 0 | uri = NULL; |
269 | 0 | } |
270 | | #ifdef DEBUG |
271 | | fprintf(stderr,"newpath=|%s|\n",*newpathp); fflush(stderr); |
272 | | #endif |
273 | |
|
274 | 0 | done: |
275 | 0 | nclistfreeall(fraglist); |
276 | 0 | nullfree(str); |
277 | 0 | if(uri != NULL) ncurifree(uri); |
278 | 0 | return check(stat); |
279 | 0 | } |
280 | | |
281 | | /* Parse a mode string at the commas */ |
282 | | static int |
283 | | parseurlmode(const char* modestr, NClist* list) |
284 | 0 | { |
285 | 0 | int stat = NC_NOERR; |
286 | 0 | const char* p = NULL; |
287 | 0 | const char* endp = NULL; |
288 | |
|
289 | 0 | if(modestr == NULL || *modestr == '\0') goto done; |
290 | | |
291 | | /* Split modestr at the commas or EOL */ |
292 | 0 | p = modestr; |
293 | 0 | for(;;) { |
294 | 0 | char* s; |
295 | 0 | ptrdiff_t slen; |
296 | 0 | endp = strchr(p,','); |
297 | 0 | if(endp == NULL) endp = p + strlen(p); |
298 | 0 | slen = (endp - p); |
299 | 0 | if((s = malloc(slen+1)) == NULL) {stat = NC_ENOMEM; goto done;} |
300 | 0 | memcpy(s,p,slen); |
301 | 0 | s[slen] = '\0'; |
302 | 0 | nclistpush(list,s); |
303 | 0 | if(*endp == '\0') break; |
304 | 0 | p = endp+1; |
305 | 0 | } |
306 | | |
307 | 0 | done: |
308 | 0 | return check(stat); |
309 | 0 | } |
310 | | |
311 | | /* Convert an envv into a comma'd string*/ |
312 | | static char* |
313 | | envv2string(NClist* envv) |
314 | 0 | { |
315 | 0 | int i; |
316 | 0 | NCbytes* buf = NULL; |
317 | 0 | char* result = NULL; |
318 | |
|
319 | 0 | if(envv == NULL || nclistlength(envv) == 0) return NULL; |
320 | 0 | buf = ncbytesnew(); |
321 | 0 | for(i=0;i<nclistlength(envv);i+=2) { |
322 | 0 | const char* key = nclistget(envv,i); |
323 | 0 | const char* val = nclistget(envv,i+1); |
324 | 0 | if(key == NULL || strlen(key) == 0) continue; |
325 | 0 | if(val == NULL) val = ""; |
326 | 0 | if(i > 0) ncbytescat(buf,"&"); |
327 | 0 | ncbytescat(buf,key); |
328 | 0 | ncbytescat(buf,"="); |
329 | 0 | ncbytescat(buf,val); |
330 | 0 | } |
331 | 0 | result = ncbytesextract(buf); |
332 | 0 | ncbytesfree(buf); |
333 | 0 | return result; |
334 | 0 | } |
335 | | |
336 | | /* Convert a list into a comma'd string */ |
337 | | static char* |
338 | | list2string(NClist* modelist) |
339 | 0 | { |
340 | 0 | int i; |
341 | 0 | NCbytes* buf = NULL; |
342 | 0 | char* result = NULL; |
343 | |
|
344 | 0 | if(modelist == NULL || nclistlength(modelist)==0) return NULL; |
345 | 0 | buf = ncbytesnew(); |
346 | 0 | for(i=0;i<nclistlength(modelist);i++) { |
347 | 0 | const char* m = nclistget(modelist,i); |
348 | 0 | if(m == NULL || strlen(m) == 0) continue; |
349 | 0 | if(i > 0) ncbytescat(buf,","); |
350 | 0 | ncbytescat(buf,m); |
351 | 0 | } |
352 | 0 | result = ncbytesextract(buf); |
353 | 0 | ncbytesfree(buf); |
354 | 0 | return result; |
355 | 0 | } |
356 | | |
357 | | /* Given a mode= argument, fill in the impl and possibly mode flags */ |
358 | | static int |
359 | | processmodearg(const char* arg, NCmodel* model) |
360 | 0 | { |
361 | 0 | int stat = NC_NOERR; |
362 | 0 | struct FORMATMODES* format = formatmodes; |
363 | 0 | for(;format->tag;format++) { |
364 | 0 | if(strcmp(format->tag,arg)==0) { |
365 | 0 | model->impl = format->impl; |
366 | 0 | } |
367 | 0 | } |
368 | |
|
369 | 0 | return check(stat); |
370 | 0 | } |
371 | | |
372 | | /* Search singleton list */ |
373 | | static int |
374 | | issingleton(const char* tag) |
375 | 0 | { |
376 | 0 | const char** p; |
377 | 0 | for(p=modesingles;*p;p++) { |
378 | 0 | if(strcmp(*p,tag)==0) return 1; |
379 | 0 | } |
380 | 0 | return 0; |
381 | 0 | } |
382 | | |
383 | | /* |
384 | | Infer from the mode + useparallel |
385 | | only call if iscreate or file is not easily readable. |
386 | | */ |
387 | | static int |
388 | | NC_omodeinfer(int useparallel, int cmode, NCmodel* model) |
389 | 0 | { |
390 | 0 | int stat = NC_NOERR; |
391 | | |
392 | | /* If no format flags are set, then use default */ |
393 | 0 | if(!fIsSet(cmode,NC_FORMAT_ALL)) |
394 | 0 | set_default_mode(&cmode); |
395 | | |
396 | | /* Process the cmode; may override some already set flags. The |
397 | | * user-defined formats must be checked first. They may choose to |
398 | | * use some of the other flags, like NC_NETCDF4, so we must fist |
399 | | * check NC_UDF0 and NC_UDF1 before checking for any other |
400 | | * flag. */ |
401 | 0 | if(fIsSet(cmode,(NC_UDF0|NC_UDF1))) { |
402 | 0 | model->format = NC_FORMAT_NETCDF4; |
403 | 0 | if(fIsSet(cmode,NC_UDF0)) { |
404 | 0 | model->impl = NC_FORMATX_UDF0; |
405 | 0 | } else { |
406 | 0 | model->impl = NC_FORMATX_UDF1; |
407 | 0 | } |
408 | 0 | goto done; |
409 | 0 | } |
410 | | |
411 | 0 | if(fIsSet(cmode,NC_64BIT_OFFSET)) { |
412 | 0 | model->impl = NC_FORMATX_NC3; |
413 | 0 | model->format = NC_FORMAT_64BIT_OFFSET; |
414 | 0 | goto done; |
415 | 0 | } |
416 | | |
417 | 0 | if(fIsSet(cmode,NC_64BIT_DATA)) { |
418 | 0 | model->impl = NC_FORMATX_NC3; |
419 | 0 | model->format = NC_FORMAT_64BIT_DATA; |
420 | 0 | goto done; |
421 | 0 | } |
422 | | |
423 | 0 | if(fIsSet(cmode,NC_NETCDF4)) { |
424 | 0 | model->impl = NC_FORMATX_NC4; |
425 | 0 | if(fIsSet(cmode,NC_CLASSIC_MODEL)) |
426 | 0 | model->format = NC_FORMAT_NETCDF4_CLASSIC; |
427 | 0 | else |
428 | 0 | model->format = NC_FORMAT_NETCDF4; |
429 | 0 | goto done; |
430 | 0 | } |
431 | | |
432 | | /* Default to classic model */ |
433 | 0 | model->format = NC_FORMAT_CLASSIC; |
434 | 0 | model->impl = NC_FORMATX_NC3; |
435 | |
|
436 | 0 | done: |
437 | | /* Apply parallel flag */ |
438 | 0 | if(useparallel) { |
439 | 0 | if(model->impl == NC_FORMATX_NC3) |
440 | 0 | model->impl = NC_FORMATX_PNETCDF; |
441 | 0 | } |
442 | 0 | return check(stat); |
443 | 0 | } |
444 | | |
445 | | /* |
446 | | If the mode flags do not necessarily specify the |
447 | | format, then default it by adding in appropriate flags. |
448 | | */ |
449 | | |
450 | | static void |
451 | | set_default_mode(int* modep) |
452 | 0 | { |
453 | 0 | int mode = *modep; |
454 | 0 | int dfaltformat; |
455 | |
|
456 | 0 | dfaltformat = nc_get_default_format(); |
457 | 0 | switch (dfaltformat) { |
458 | 0 | case NC_FORMAT_64BIT_OFFSET: mode |= NC_64BIT_OFFSET; break; |
459 | 0 | case NC_FORMAT_64BIT_DATA: mode |= NC_64BIT_DATA; break; |
460 | 0 | case NC_FORMAT_NETCDF4: mode |= NC_NETCDF4; break; |
461 | 0 | case NC_FORMAT_NETCDF4_CLASSIC: mode |= (NC_NETCDF4|NC_CLASSIC_MODEL); break; |
462 | 0 | case NC_FORMAT_CLASSIC: /* fall thru */ |
463 | 0 | default: break; /* default to classic */ |
464 | 0 | } |
465 | 0 | *modep = mode; /* final result */ |
466 | 0 | } |
467 | | |
468 | | /**************************************************/ |
469 | | /* |
470 | | Infer model for this dataset using some |
471 | | combination of cmode, path, and reading the dataset. |
472 | | See the documentation in docs/internal.dox. |
473 | | |
474 | | @param path |
475 | | @param omode |
476 | | @param iscreate |
477 | | @param useparallel |
478 | | @param params |
479 | | @param model |
480 | | @param newpathp |
481 | | |
482 | | */ |
483 | | |
484 | | int |
485 | | NC_infermodel(const char* path, int* omodep, int iscreate, int useparallel, void* params, NCmodel* model, char** newpathp) |
486 | 0 | { |
487 | 0 | int i,stat = NC_NOERR; |
488 | 0 | char* newpath = NULL; |
489 | 0 | NCURI* uri = NULL; |
490 | 0 | int omode = *omodep; |
491 | 0 | NClist* modeargs = nclistnew(); |
492 | | |
493 | | /* Phase 1: Reformat the uri to canonical form; store canonical form |
494 | | into newpath. Return the "mode=" list in modeargs */ |
495 | 0 | if((stat = processuri(path, &uri, &newpath, modeargs))) goto done; |
496 | 0 | if(newpath == NULL) newpath = strdup(path); /* No change */ |
497 | | |
498 | | /* Phase 2: Process the modeargs list to see if we can tell the formatx */ |
499 | | /* Note that if the path was not a URL, then modeargs will be empty list*/ |
500 | 0 | for(i=0;i<nclistlength(modeargs);i++) { |
501 | 0 | const char* arg = nclistget(modeargs,i); |
502 | 0 | if((stat=processmodearg(arg,model))) goto done; |
503 | 0 | } |
504 | | |
505 | | /* Phase 2.5: Special case: if this is a URL, and there are no mode args |
506 | | and model.impl is still not defined, default to DAP2 */ |
507 | 0 | if(uri != NULL && nclistlength(modeargs) == 0 && !modelcomplete(model)) { |
508 | 0 | model->impl = NC_FORMATX_DAP2; |
509 | 0 | model->format = NC_FORMAT_NC3; |
510 | 0 | } |
511 | | |
512 | | /* Phase 3: mode inference from mode flags */ |
513 | | /* The modeargs did not give us a model (probably not a URL). |
514 | | So look at the combination of mode flags and the useparallel flag */ |
515 | 0 | if(!modelcomplete(model)) { |
516 | 0 | if((stat = NC_omodeinfer(useparallel,omode,model))) goto done; |
517 | 0 | } |
518 | | |
519 | | /* Phase 4: Infer from file content, if possible; |
520 | | this has highest precedence, so it may override |
521 | | previous decisions. Note that we do this last |
522 | | because we need previously determined model info |
523 | | to guess if this file is readable. |
524 | | */ |
525 | 0 | if(!iscreate && isreadable(model)) { |
526 | | /* Ok, we need to try to read the file */ |
527 | 0 | if((stat = check_file_type(path, omode, useparallel, params, model, uri))) goto done; |
528 | 0 | } |
529 | | |
530 | | /* Need a decision */ |
531 | 0 | if(!modelcomplete(model)) |
532 | 0 | {stat = NC_ENOTNC; goto done;} |
533 | | |
534 | | /* Force flag consistency */ |
535 | 0 | switch (model->impl) { |
536 | 0 | case NC_FORMATX_NC4: |
537 | 0 | case NC_FORMATX_NC_HDF4: |
538 | 0 | case NC_FORMATX_DAP4: |
539 | 0 | case NC_FORMATX_UDF0: |
540 | 0 | case NC_FORMATX_UDF1: |
541 | 0 | omode |= NC_NETCDF4; |
542 | 0 | if(model->format == NC_FORMAT_NETCDF4_CLASSIC) |
543 | 0 | omode |= NC_CLASSIC_MODEL; |
544 | 0 | break; |
545 | 0 | case NC_FORMATX_NC3: |
546 | 0 | omode &= ~NC_NETCDF4; /* must be netcdf-3 (CDF-1, CDF-2, CDF-5) */ |
547 | 0 | if(model->format == NC_FORMAT_64BIT_OFFSET) omode |= NC_64BIT_OFFSET; |
548 | 0 | else if(model->format == NC_FORMAT_64BIT_DATA) omode |= NC_64BIT_DATA; |
549 | 0 | break; |
550 | 0 | case NC_FORMATX_PNETCDF: |
551 | 0 | omode &= ~NC_NETCDF4; /* must be netcdf-3 (CDF-1, CDF-2, CDF-5) */ |
552 | 0 | if(model->format == NC_FORMAT_64BIT_OFFSET) omode |= NC_64BIT_OFFSET; |
553 | 0 | else if(model->format == NC_FORMAT_64BIT_DATA) omode |= NC_64BIT_DATA; |
554 | 0 | break; |
555 | 0 | case NC_FORMATX_DAP2: |
556 | 0 | omode &= ~(NC_NETCDF4|NC_64BIT_OFFSET|NC_64BIT_DATA|NC_CLASSIC_MODEL); |
557 | 0 | break; |
558 | 0 | default: |
559 | 0 | {stat = NC_ENOTNC; goto done;} |
560 | 0 | } |
561 | | |
562 | 0 | done: |
563 | 0 | if(uri) ncurifree(uri); |
564 | 0 | nclistfreeall(modeargs); |
565 | 0 | if(stat == NC_NOERR && newpathp) {*newpathp = newpath; newpath = NULL;} |
566 | 0 | nullfree(newpath); |
567 | 0 | *omodep = omode; /* in/out */ |
568 | 0 | return check(stat); |
569 | 0 | } |
570 | | |
571 | | static int |
572 | | isreadable(NCmodel* model) |
573 | 0 | { |
574 | 0 | struct Readable* r; |
575 | | /* Look up the protocol */ |
576 | 0 | for(r=readable;r->impl;r++) { |
577 | 0 | if(model->impl == r->impl) return r->readable; |
578 | 0 | } |
579 | 0 | return 0; |
580 | 0 | } |
581 | | |
582 | | /**************************************************/ |
583 | | #if 0 |
584 | | /* return 1 if path looks like a url; 0 otherwise */ |
585 | | int |
586 | | NC_testurl(const char* path) |
587 | | { |
588 | | int isurl = 0; |
589 | | NCURI* tmpurl = NULL; |
590 | | |
591 | | if(path == NULL) return 0; |
592 | | |
593 | | /* Ok, try to parse as a url */ |
594 | | if(ncuriparse(path,&tmpurl)==NCU_OK) { |
595 | | /* Do some extra testing to make sure this really is a url */ |
596 | | /* Look for a known/accepted protocol */ |
597 | | struct NCPROTOCOLLIST* protolist; |
598 | | for(protolist=ncprotolist;protolist->protocol;protolist++) { |
599 | | if(strcmp(tmpurl->protocol,protolist->protocol) == 0) { |
600 | | isurl=1; |
601 | | break; |
602 | | } |
603 | | } |
604 | | ncurifree(tmpurl); |
605 | | return isurl; |
606 | | } |
607 | | return 0; |
608 | | } |
609 | | #endif |
610 | | |
611 | | /**************************************************/ |
612 | | /** |
613 | | * Provide a hidden interface to allow utilities |
614 | | * to check if a given path name is really a url. |
615 | | * If not, put null in basenamep, else put basename of the url |
616 | | * minus any extension into basenamep; caller frees. |
617 | | * Return 1 if it looks like a url, 0 otherwise. |
618 | | */ |
619 | | |
620 | | int |
621 | | nc__testurl(const char* path, char** basenamep) |
622 | 0 | { |
623 | 0 | NCURI* uri; |
624 | 0 | int ok = 0; |
625 | 0 | if(!ncuriparse(path,&uri)) { |
626 | 0 | char* slash = (uri->path == NULL ? NULL : strrchr(uri->path, '/')); |
627 | 0 | char* dot; |
628 | 0 | if(slash == NULL) slash = (char*)path; else slash++; |
629 | 0 | slash = nulldup(slash); |
630 | 0 | if(slash == NULL) |
631 | 0 | dot = NULL; |
632 | 0 | else |
633 | 0 | dot = strrchr(slash, '.'); |
634 | 0 | if(dot != NULL && dot != slash) *dot = '\0'; |
635 | 0 | if(basenamep) |
636 | 0 | *basenamep=slash; |
637 | 0 | else if(slash) |
638 | 0 | free(slash); |
639 | 0 | ncurifree(uri); |
640 | 0 | ok = 1; |
641 | 0 | } |
642 | 0 | return ok; |
643 | 0 | } |
644 | | |
645 | | |
646 | | |
647 | | /**************************************************/ |
648 | | /** |
649 | | * @internal Given an existing file, figure out its format and return |
650 | | * that format value (NC_FORMATX_XXX) in model arg. Assume any path |
651 | | * conversion was already performed at a higher level. |
652 | | * |
653 | | * @param path File name. |
654 | | * @param flags |
655 | | * @param use_parallel |
656 | | * @param parameters |
657 | | * @param model Pointer that gets the model to use for the dispatch table. |
658 | | * @param version Pointer that gets version of the file. |
659 | | * |
660 | | * @return ::NC_NOERR No error. |
661 | | * @author Dennis Heimbigner |
662 | | */ |
663 | | static int |
664 | | check_file_type(const char *path, int omode, int use_parallel, |
665 | | void *parameters, NCmodel* model, NCURI* uri) |
666 | 0 | { |
667 | 0 | char magic[NC_MAX_MAGIC_NUMBER_LEN]; |
668 | 0 | int status = NC_NOERR; |
669 | 0 | struct MagicFile magicinfo; |
670 | |
|
671 | 0 | memset((void*)&magicinfo,0,sizeof(magicinfo)); |
672 | 0 | magicinfo.path = path; /* do not free */ |
673 | 0 | magicinfo.uri = uri; /* do not free */ |
674 | 0 | magicinfo.omode = omode; |
675 | 0 | magicinfo.model = model; /* do not free */ |
676 | 0 | magicinfo.parameters = parameters; /* do not free */ |
677 | | #ifdef USE_STDIO |
678 | | magicinfo.use_parallel = 0; |
679 | | #else |
680 | 0 | magicinfo.use_parallel = use_parallel; |
681 | 0 | #endif |
682 | |
|
683 | 0 | if((status = openmagic(&magicinfo))) goto done; |
684 | | |
685 | | /* Verify we have a large enough file */ |
686 | 0 | if(magicinfo.filelen < MAGIC_NUMBER_LEN) |
687 | 0 | {status = NC_ENOTNC; goto done;} |
688 | 0 | if((status = readmagic(&magicinfo,0L,magic)) != NC_NOERR) { |
689 | 0 | status = NC_ENOTNC; |
690 | 0 | goto done; |
691 | 0 | } |
692 | | |
693 | | /* Look at the magic number */ |
694 | 0 | if(NC_interpret_magic_number(magic,model) == NC_NOERR |
695 | 0 | && model->format != 0) { |
696 | 0 | if (model->format == NC_FORMAT_NC3 && use_parallel) |
697 | | /* this is called from nc_open_par() and file is classic */ |
698 | 0 | model->impl = NC_FORMATX_PNETCDF; |
699 | 0 | goto done; /* found something */ |
700 | 0 | } |
701 | | |
702 | | /* Remaining case when implementation is an HDF5 file; |
703 | | search forward at starting at 512 |
704 | | and doubling to see if we have HDF5 magic number */ |
705 | 0 | { |
706 | 0 | long pos = 512L; |
707 | 0 | for(;;) { |
708 | 0 | if((pos+MAGIC_NUMBER_LEN) > magicinfo.filelen) |
709 | 0 | {status = NC_ENOTNC; goto done;} |
710 | 0 | if((status = readmagic(&magicinfo,pos,magic)) != NC_NOERR) |
711 | 0 | {status = NC_ENOTNC; goto done; } |
712 | 0 | NC_interpret_magic_number(magic,model); |
713 | 0 | if(model->impl == NC_FORMATX_NC4) break; |
714 | | /* double and try again */ |
715 | 0 | pos = 2*pos; |
716 | 0 | } |
717 | 0 | } |
718 | 0 | done: |
719 | 0 | closemagic(&magicinfo); |
720 | 0 | return check(status); |
721 | 0 | } |
722 | | |
723 | | /** |
724 | | \internal |
725 | | \ingroup datasets |
726 | | Provide open, read and close for use when searching for magic numbers |
727 | | */ |
728 | | static int |
729 | | openmagic(struct MagicFile* file) |
730 | 0 | { |
731 | 0 | int status = NC_NOERR; |
732 | |
|
733 | 0 | if(fIsSet(file->omode,NC_INMEMORY)) { |
734 | | /* Get its length */ |
735 | 0 | NC_memio* meminfo = (NC_memio*)file->parameters; |
736 | 0 | assert(meminfo != NULL); |
737 | 0 | file->filelen = (long long)meminfo->size; |
738 | | #ifdef ENABLE_BYTERANGE |
739 | | } else if(file->uri != NULL) { |
740 | | /* Construct a URL minus any fragment */ |
741 | | file->curlurl = ncuribuild(file->uri,NULL,NULL,NCURISVC); |
742 | | /* Open the curl handle */ |
743 | | if((status=nc_http_open(file->curlurl,&file->curl,&file->filelen))) goto done; |
744 | | #endif |
745 | 0 | } else { |
746 | | #ifdef USE_PARALLEL |
747 | | if (file->use_parallel) { |
748 | | int retval; |
749 | | MPI_Offset size; |
750 | | assert(file->parameters != NULL); |
751 | | if((retval = MPI_File_open(((NC_MPI_INFO*)file->parameters)->comm, |
752 | | (char*)file->path,MPI_MODE_RDONLY, |
753 | | ((NC_MPI_INFO*)file->parameters)->info, |
754 | | &file->fh)) != MPI_SUCCESS) { |
755 | | #ifdef MPI_ERR_NO_SUCH_FILE |
756 | | int errorclass; |
757 | | MPI_Error_class(retval, &errorclass); |
758 | | if (errorclass == MPI_ERR_NO_SUCH_FILE) |
759 | | #ifdef NC_ENOENT |
760 | | status = NC_ENOENT; |
761 | | #else |
762 | | status = errno; |
763 | | #endif |
764 | | else |
765 | | #endif |
766 | | status = NC_EPARINIT; |
767 | | goto done; |
768 | | } |
769 | | /* Get its length */ |
770 | | if((retval=MPI_File_get_size(file->fh, &size)) != MPI_SUCCESS) |
771 | | {status = NC_EPARINIT; goto done;} |
772 | | file->filelen = (long long)size; |
773 | | } else |
774 | | #endif /* USE_PARALLEL */ |
775 | 0 | { |
776 | 0 | if(file->path == NULL || strlen(file->path)==0) |
777 | 0 | {status = NC_EINVAL; goto done;} |
778 | | |
779 | | #ifdef _WIN32 |
780 | | file->fp = NCfopen(file->path, "rb"); |
781 | | #else |
782 | 0 | file->fp = NCfopen(file->path, "r"); |
783 | 0 | #endif |
784 | 0 | if(file->fp == NULL) |
785 | 0 | {status = errno; goto done;} |
786 | | /* Get its length */ |
787 | 0 | { |
788 | 0 | int fd = fileno(file->fp); |
789 | | #ifdef _WIN32 |
790 | | __int64 len64 = _filelengthi64(fd); |
791 | | if(len64 < 0) |
792 | | {status = errno; goto done;} |
793 | | file->filelen = (long long)len64; |
794 | | #else |
795 | 0 | off_t size; |
796 | 0 | size = lseek(fd, 0, SEEK_END); |
797 | 0 | if(size == -1) |
798 | 0 | {status = errno; goto done;} |
799 | 0 | file->filelen = (long long)size; |
800 | 0 | #endif |
801 | 0 | } |
802 | 0 | rewind(file->fp); |
803 | 0 | } |
804 | 0 | } |
805 | 0 | done: |
806 | 0 | return check(status); |
807 | 0 | } |
808 | | |
809 | | static int |
810 | | readmagic(struct MagicFile* file, long pos, char* magic) |
811 | 0 | { |
812 | 0 | int status = NC_NOERR; |
813 | 0 | memset(magic,0,MAGIC_NUMBER_LEN); |
814 | 0 | if(fIsSet(file->omode,NC_INMEMORY)) { |
815 | 0 | char* mempos; |
816 | 0 | NC_memio* meminfo = (NC_memio*)file->parameters; |
817 | 0 | if((pos + MAGIC_NUMBER_LEN) > meminfo->size) |
818 | 0 | {status = NC_EINMEMORY; goto done;} |
819 | 0 | mempos = ((char*)meminfo->memory) + pos; |
820 | 0 | memcpy((void*)magic,mempos,MAGIC_NUMBER_LEN); |
821 | | #ifdef DEBUG |
822 | | printmagic("XXX: readmagic",magic,file); |
823 | | #endif |
824 | | #ifdef ENABLE_BYTERANGE |
825 | | } else if(file->uri != NULL) { |
826 | | NCbytes* buf = ncbytesnew(); |
827 | | fileoffset_t start = (size_t)pos; |
828 | | fileoffset_t count = MAGIC_NUMBER_LEN; |
829 | | status = nc_http_read(file->curl,file->curlurl,start,count,buf); |
830 | | if(status == NC_NOERR) { |
831 | | if(ncbyteslength(buf) != count) |
832 | | status = NC_EINVAL; |
833 | | else |
834 | | memcpy(magic,ncbytescontents(buf),count); |
835 | | } |
836 | | ncbytesfree(buf); |
837 | | #endif |
838 | 0 | } else { |
839 | | #ifdef USE_PARALLEL |
840 | | if (file->use_parallel) { |
841 | | MPI_Status mstatus; |
842 | | int retval; |
843 | | if((retval = MPI_File_read_at_all(file->fh, pos, magic, |
844 | | MAGIC_NUMBER_LEN, MPI_CHAR, &mstatus)) != MPI_SUCCESS) |
845 | | {status = NC_EPARINIT; goto done;} |
846 | | } else |
847 | | #endif /* USE_PARALLEL */ |
848 | 0 | { |
849 | 0 | int count; |
850 | 0 | int i = fseek(file->fp,pos,SEEK_SET); |
851 | 0 | if(i < 0) |
852 | 0 | {status = errno; goto done;} |
853 | 0 | for(i=0;i<MAGIC_NUMBER_LEN;) {/* make sure to read proper # of bytes */ |
854 | 0 | count=fread(&magic[i],1,(size_t)(MAGIC_NUMBER_LEN-i),file->fp); |
855 | 0 | if(count == 0 || ferror(file->fp)) |
856 | 0 | {status = errno; goto done;} |
857 | 0 | i += count; |
858 | 0 | } |
859 | 0 | } |
860 | 0 | } |
861 | | |
862 | 0 | done: |
863 | 0 | if(file && file->fp) clearerr(file->fp); |
864 | 0 | return check(status); |
865 | 0 | } |
866 | | |
867 | | /** |
868 | | * Close the file opened to check for magic number. |
869 | | * |
870 | | * @param file pointer to the MagicFile struct for this open file. |
871 | | * @returns NC_NOERR for success |
872 | | * @returns NC_EPARINIT if there was a problem closing file with MPI |
873 | | * (parallel builds only). |
874 | | * @author Dennis Heimbigner |
875 | | */ |
876 | | static int |
877 | | closemagic(struct MagicFile* file) |
878 | 0 | { |
879 | 0 | int status = NC_NOERR; |
880 | 0 | if(fIsSet(file->omode,NC_INMEMORY)) { |
881 | | /* noop */ |
882 | | #ifdef ENABLE_BYTERANGE |
883 | | } else if(file->uri != NULL) { |
884 | | status = nc_http_close(file->curl); |
885 | | nullfree(file->curlurl); |
886 | | #endif |
887 | 0 | } else { |
888 | | #ifdef USE_PARALLEL |
889 | | if (file->use_parallel) { |
890 | | int retval; |
891 | | if((retval = MPI_File_close(&file->fh)) != MPI_SUCCESS) |
892 | | {status = NC_EPARINIT; return status;} |
893 | | } else |
894 | | #endif |
895 | 0 | { |
896 | 0 | if(file->fp) fclose(file->fp); |
897 | 0 | } |
898 | 0 | } |
899 | 0 | return status; |
900 | 0 | } |
901 | | |
902 | | /*! |
903 | | Interpret the magic number found in the header of a netCDF file. |
904 | | This function interprets the magic number/string contained in the header of a netCDF file and sets the appropriate NC_FORMATX flags. |
905 | | |
906 | | @param[in] magic Pointer to a character array with the magic number block. |
907 | | @param[out] model Pointer to an integer to hold the corresponding netCDF type. |
908 | | @param[out] version Pointer to an integer to hold the corresponding netCDF version. |
909 | | @returns NC_NOERR if a legitimate file type found |
910 | | @returns NC_ENOTNC otherwise |
911 | | |
912 | | \internal |
913 | | \ingroup datasets |
914 | | |
915 | | */ |
916 | | static int |
917 | | NC_interpret_magic_number(char* magic, NCmodel* model) |
918 | 0 | { |
919 | 0 | int status = NC_NOERR; |
920 | | /* Look at the magic number */ |
921 | 0 | #ifdef USE_NETCDF4 |
922 | 0 | if (strlen(UDF0_magic_number) && !strncmp(UDF0_magic_number, magic, |
923 | 0 | strlen(UDF0_magic_number))) |
924 | 0 | { |
925 | 0 | model->impl = NC_FORMATX_UDF0; |
926 | 0 | model->format = NC_FORMAT_NETCDF4; |
927 | 0 | goto done; |
928 | 0 | } |
929 | 0 | if (strlen(UDF1_magic_number) && !strncmp(UDF1_magic_number, magic, |
930 | 0 | strlen(UDF1_magic_number))) |
931 | 0 | { |
932 | 0 | model->impl = NC_FORMATX_UDF1; |
933 | 0 | model->format = NC_FORMAT_NETCDF4; |
934 | 0 | goto done; |
935 | 0 | } |
936 | 0 | #endif /* USE_NETCDF4 */ |
937 | | |
938 | | /* Use the complete magic number string for HDF5 */ |
939 | 0 | if(memcmp(magic,HDF5_SIGNATURE,sizeof(HDF5_SIGNATURE))==0) { |
940 | 0 | model->impl = NC_FORMATX_NC4; |
941 | 0 | model->format = NC_FORMAT_NETCDF4; |
942 | 0 | goto done; |
943 | 0 | } |
944 | 0 | if(magic[0] == '\016' && magic[1] == '\003' |
945 | 0 | && magic[2] == '\023' && magic[3] == '\001') { |
946 | 0 | model->impl = NC_FORMATX_NC_HDF4; |
947 | 0 | model->format = NC_FORMAT_NETCDF4; |
948 | 0 | goto done; |
949 | 0 | } |
950 | 0 | if(magic[0] == 'C' && magic[1] == 'D' && magic[2] == 'F') { |
951 | 0 | if(magic[3] == '\001') { |
952 | 0 | model->impl = NC_FORMATX_NC3; |
953 | 0 | model->format = NC_FORMAT_CLASSIC; |
954 | 0 | goto done; |
955 | 0 | } |
956 | 0 | if(magic[3] == '\002') { |
957 | 0 | model->impl = NC_FORMATX_NC3; |
958 | 0 | model->format = NC_FORMAT_64BIT_OFFSET; |
959 | 0 | goto done; |
960 | 0 | } |
961 | 0 | if(magic[3] == '\005') { |
962 | 0 | model->impl = NC_FORMATX_NC3; |
963 | 0 | model->format = NC_FORMAT_64BIT_DATA; |
964 | 0 | goto done; |
965 | 0 | } |
966 | 0 | } |
967 | | /* No match */ |
968 | 0 | status = NC_ENOTNC; |
969 | 0 | goto done; |
970 | | |
971 | 0 | done: |
972 | 0 | return check(status); |
973 | 0 | } |
974 | | |
975 | | #ifdef DEBUG |
976 | | static void |
977 | | printmagic(const char* tag, char* magic, struct MagicFile* f) |
978 | | { |
979 | | int i; |
980 | | fprintf(stderr,"%s: ispar=%d magic=",tag,f->use_parallel); |
981 | | for(i=0;i<MAGIC_NUMBER_LEN;i++) { |
982 | | unsigned int c = (unsigned int)magic[i]; |
983 | | c = c & 0x000000FF; |
984 | | if(c == '\n') |
985 | | fprintf(stderr," 0x%0x/'\\n'",c); |
986 | | else if(c == '\r') |
987 | | fprintf(stderr," 0x%0x/'\\r'",c); |
988 | | else if(c < ' ') |
989 | | fprintf(stderr," 0x%0x/'?'",c); |
990 | | else |
991 | | fprintf(stderr," 0x%0x/'%c'",c,c); |
992 | | } |
993 | | fprintf(stderr,"\n"); |
994 | | fflush(stderr); |
995 | | } |
996 | | #endif |