Coverage Report

Created: 2023-05-28 06:42

/src/netcdf-c/libdispatch/dinfermodel.c
Line
Count
Source (jump to first uncovered line)
1
/**
2
 * @file
3
 *
4
 * Infer as much as possible from the omode + path.
5
 * Rewrite the path to a canonical form.
6
 *
7
 * Copyright 2018 University Corporation for Atmospheric
8
 * Research/Unidata. See COPYRIGHT file for more info.
9
*/
10
11
#include "config.h"
12
#include <stdlib.h>
13
#ifdef HAVE_UNISTD_H
14
#include <unistd.h>
15
#endif
16
#ifdef HAVE_SYS_TYPES_H
17
#include <sys/types.h>
18
#endif
19
20
#include "ncdispatch.h"
21
#include "ncpathmgr.h"
22
#include "netcdf_mem.h"
23
#include "fbits.h"
24
#include "ncbytes.h"
25
#include "nclist.h"
26
#include "nclog.h"
27
#include "ncrc.h"
28
#include "nchttp.h"
29
#ifdef ENABLE_S3
30
#include "ncs3sdk.h"
31
#endif
32
33
#ifndef nulldup
34
 #define nulldup(x) ((x)?strdup(x):(x))
35
#endif
36
37
#undef DEBUG
38
39
/* If Defined, then use only stdio for all magic number io;
40
   otherwise use stdio or mpio as required.
41
 */
42
#undef USE_STDIO
43
44
/**
45
Sort info for open/read/close of
46
file when searching for magic numbers
47
*/
48
struct MagicFile {
49
    const char* path;
50
    struct NCURI* uri;
51
    int omode;
52
    NCmodel* model;
53
    long long filelen;
54
    int use_parallel;
55
    int iss3;
56
    void* parameters; /* !NULL if inmemory && !diskless */
57
    FILE* fp;
58
#ifdef USE_PARALLEL
59
    MPI_File fh;
60
#endif
61
    char* curlurl; /* url to use with CURLOPT_SET_URL */
62
    NC_HTTP_STATE* state;
63
#ifdef ENABLE_S3
64
    NCS3INFO s3;
65
    void* s3client;
66
    char* errmsg;
67
#endif
68
};
69
70
/** @internal Magic number for HDF5 files. To be consistent with
71
 * H5Fis_hdf5, use the complete HDF5 magic number */
72
static char HDF5_SIGNATURE[MAGIC_NUMBER_LEN] = "\211HDF\r\n\032\n";
73
74
218
#define modelcomplete(model) ((model)->impl != 0)
75
76
#ifdef DEBUG
77
static void dbgflush(void)
78
{
79
    fflush(stdout);
80
    fflush(stderr);
81
}
82
83
static void
84
fail(int err)
85
{
86
    return;
87
}
88
89
static int
90
check(int err)
91
{
92
    if(err != NC_NOERR)
93
  fail(err);
94
    return err;
95
}
96
#else
97
980
#define check(err) (err)
98
#endif
99
100
/*
101
Define a table of "mode=" string values
102
from which the implementation can be inferred.
103
Note that only cases that can currently
104
take URLs are included.
105
*/
106
static struct FORMATMODES {
107
    const char* tag;
108
    const int impl; /* NC_FORMATX_XXX value */
109
    const int format; /* NC_FORMAT_XXX value */
110
} formatmodes[] = {
111
{"dap2",NC_FORMATX_DAP2,NC_FORMAT_CLASSIC},
112
{"dap4",NC_FORMATX_DAP4,NC_FORMAT_NETCDF4},
113
{"netcdf-3",NC_FORMATX_NC3,0}, /* Might be e.g. cdf5 */
114
{"classic",NC_FORMATX_NC3,0}, /* ditto */
115
{"netcdf-4",NC_FORMATX_NC4,NC_FORMAT_NETCDF4},
116
{"enhanced",NC_FORMATX_NC4,NC_FORMAT_NETCDF4},
117
{"udf0",NC_FORMATX_UDF0,0},
118
{"udf1",NC_FORMATX_UDF1,0},
119
{"nczarr",NC_FORMATX_NCZARR,NC_FORMAT_NETCDF4},
120
{"zarr",NC_FORMATX_NCZARR,NC_FORMAT_NETCDF4},
121
{"bytes",NC_FORMATX_NC4,NC_FORMAT_NETCDF4}, /* temporary until 3 vs 4 is determined */
122
{NULL,0},
123
};
124
125
/* Replace top-level name with defkey=defvalue */
126
static const struct MACRODEF {
127
    char* name;
128
    char* defkey;
129
    char* defvalues[4];
130
} macrodefs[] = {
131
{"zarr","mode",{"nczarr","zarr",NULL}},
132
{"dap2","mode",{"dap2",NULL}},
133
{"dap4","mode",{"dap4",NULL}},
134
{"s3","mode",{"s3","nczarr",NULL}},
135
{"bytes","mode",{"bytes",NULL}},
136
{"xarray","mode",{"zarr", NULL}},
137
{"noxarray","mode",{"nczarr", "noxarray", NULL}},
138
{"zarr","mode",{"nczarr","zarr", NULL}},
139
{NULL,NULL,{NULL}}
140
};
141
142
/*
143
Mode inferences: if mode contains key value, then add the inferred value;
144
Warning: be careful how this list is constructed to avoid infinite inferences.
145
In order to (mostly) avoid that consequence, any attempt to
146
infer a value that is already present will be ignored.
147
This effectively means that the inference graph
148
must be a DAG and may not have cycles.
149
You have been warned.
150
*/
151
static const struct MODEINFER {
152
    char* key;
153
    char* inference;
154
} modeinferences[] = {
155
{"zarr","nczarr"},
156
{"xarray","zarr"},
157
{"noxarray","nczarr"},
158
{"noxarray","zarr"},
159
{NULL,NULL}
160
};
161
162
/* Mode negations: if mode contains key, then remove all occurrences of the inference and repeat */
163
static const struct MODEINFER modenegations[] = {
164
{"bytes","nczarr"}, /* bytes negates (nc)zarr */
165
{"bytes","zarr"},
166
{"noxarray","xarray"},
167
{NULL,NULL}
168
};
169
170
/* Map FORMATX to readability to get magic number */
171
static struct Readable {
172
    int impl;
173
    int readable;
174
} readable[] = {
175
{NC_FORMATX_NC3,1},
176
{NC_FORMATX_NC_HDF5,1},
177
{NC_FORMATX_NC_HDF4,1},
178
{NC_FORMATX_PNETCDF,1},
179
{NC_FORMATX_DAP2,0},
180
{NC_FORMATX_DAP4,0},
181
{NC_FORMATX_UDF0,1},
182
{NC_FORMATX_UDF1,1},
183
{NC_FORMATX_NCZARR,0}, /* eventually make readable */
184
{0,0},
185
};
186
187
/* Define the known URL protocols and their interpretation */
188
static struct NCPROTOCOLLIST {
189
    const char* protocol;
190
    const char* substitute;
191
    const char* fragments; /* arbitrary fragment arguments */
192
} ncprotolist[] = {
193
    {"http",NULL,NULL},
194
    {"https",NULL,NULL},
195
    {"file",NULL,NULL},
196
    {"dods","http","mode=dap2"},
197
    {"dap4","http","mode=dap4"},
198
    {"s3","s3","mode=s3"},
199
    {NULL,NULL,NULL} /* Terminate search */
200
};
201
202
/* Forward */
203
static int NC_omodeinfer(int useparallel, int omode, NCmodel*);
204
static int check_file_type(const char *path, int omode, int use_parallel, void *parameters, NCmodel* model, NCURI* uri);
205
static int processuri(const char* path, NCURI** urip, NClist* fraglist);
206
static int processmacros(NClist** fraglistp);
207
static char* envvlist2string(NClist* pairs, const char*);
208
static void set_default_mode(int* cmodep);
209
static int parseonchar(const char* s, int ch, NClist* segments);
210
static int mergelist(NClist** valuesp);
211
212
static int openmagic(struct MagicFile* file);
213
static int readmagic(struct MagicFile* file, long pos, char* magic);
214
static int closemagic(struct MagicFile* file);
215
static int NC_interpret_magic_number(char* magic, NCmodel* model);
216
#ifdef DEBUG
217
static void printmagic(const char* tag, char* magic,struct MagicFile*);
218
static void printlist(NClist* list, const char* tag);
219
#endif
220
static int isreadable(NCURI*,NCmodel*);
221
static char* list2string(NClist*);
222
static int parsepair(const char* pair, char** keyp, char** valuep);
223
static NClist* parsemode(const char* modeval);
224
static const char* getmodekey(const NClist* envv);
225
static int replacemode(NClist* envv, const char* newval);
226
static void infernext(NClist* current, NClist* next);
227
static int negateone(const char* mode, NClist* modes);
228
static void cleanstringlist(NClist* strs, int caseinsensitive);
229
230
/*
231
If the path looks like a URL, then parse it, reformat it.
232
*/
233
static int
234
processuri(const char* path, NCURI** urip, NClist* fraglenv)
235
114
{
236
114
    int stat = NC_NOERR;
237
114
    int found = 0;
238
114
    NClist* tmp = NULL;
239
114
    struct NCPROTOCOLLIST* protolist;
240
114
    NCURI* uri = NULL;
241
114
    size_t pathlen = strlen(path);
242
114
    char* str = NULL;
243
114
    const char** ufrags;
244
114
    const char** p;
245
246
114
    if(path == NULL || pathlen == 0) {stat = NC_EURL; goto done;}
247
248
    /* Defaults */
249
114
    if(urip) *urip = NULL;
250
251
114
    ncuriparse(path,&uri);
252
114
    if(uri == NULL) goto done; /* not url */
253
254
    /* Look up the protocol */
255
0
    for(found=0,protolist=ncprotolist;protolist->protocol;protolist++) {
256
0
        if(strcmp(uri->protocol,protolist->protocol) == 0) {
257
0
      found = 1;
258
0
      break;
259
0
  }
260
0
    }
261
0
    if(!found)
262
0
  {stat = NC_EINVAL; goto done;} /* unrecognized URL form */
263
264
    /* process the corresponding fragments for that protocol */
265
0
    if(protolist->fragments != NULL) {
266
0
  int i;
267
0
  tmp = nclistnew();
268
0
  if((stat = parseonchar(protolist->fragments,'&',tmp))) goto done;
269
0
  for(i=0;i<nclistlength(tmp);i++) {
270
0
      char* key=NULL;
271
0
          char* value=NULL;
272
0
      if((stat = parsepair(nclistget(tmp,i),&key,&value))) goto done;
273
0
      if(value == NULL) value = strdup("");
274
0
      nclistpush(fraglenv,key);
275
0
          nclistpush(fraglenv,value);
276
0
  }
277
0
  nclistfreeall(tmp); tmp = NULL;
278
0
    }
279
280
    /* Substitute the protocol in any case */
281
0
    if(protolist->substitute) ncurisetprotocol(uri,protolist->substitute);
282
283
    /* capture the fragments of the url */
284
0
    ufrags = ncurifragmentparams(uri);
285
0
    if(ufrags != NULL) {
286
0
        for(p=ufrags;*p;p+=2) {
287
0
      const char* key = p[0];
288
0
      const char* value = p[1];
289
0
      nclistpush(fraglenv,nulldup(key));
290
0
      value = (value==NULL?"":value);
291
0
      nclistpush(fraglenv,strdup(value));
292
0
  }
293
0
    }
294
0
    if(urip) {
295
0
  *urip = uri;
296
0
  uri = NULL;
297
0
    }
298
299
114
done:
300
114
    nclistfreeall(tmp);
301
114
    nullfree(str);
302
114
    if(uri != NULL) ncurifree(uri);
303
114
    return check(stat);
304
0
}
305
306
/* Split a key=value pair */
307
static int
308
parsepair(const char* pair, char** keyp, char** valuep)
309
0
{
310
0
    const char* p;
311
0
    char* key = NULL;
312
0
    char* value = NULL;
313
314
0
    if(pair == NULL)
315
0
        return NC_EINVAL; /* empty pair */
316
0
    if(pair[0] == '\0' || pair[0] == '=')
317
0
        return NC_EINVAL; /* no key */
318
0
    p = strchr(pair,'=');
319
0
    if(p == NULL) {
320
0
  value = NULL;
321
0
  key = strdup(pair);
322
0
    } else {
323
0
  ptrdiff_t len = (p-pair);
324
0
  if((key = malloc(len+1))==NULL) return NC_ENOMEM;
325
0
  memcpy(key,pair,len);
326
0
  key[len] = '\0';
327
0
  if(p[1] == '\0')
328
0
      value = NULL;
329
0
  else
330
0
      value = strdup(p+1);
331
0
    }
332
0
    if(keyp) {*keyp = key; key = NULL;};
333
0
    if(valuep) {*valuep = value; value = NULL;};
334
0
    nullfree(key);
335
0
    nullfree(value);
336
0
    return NC_NOERR;
337
0
}
338
339
#if 0
340
static int
341
parseurlmode(const char* modestr, NClist* list)
342
{
343
    int stat = NC_NOERR;
344
    const char* p = NULL;
345
    const char* endp = NULL;
346
347
    if(modestr == NULL || *modestr == '\0') goto done;
348
349
    /* Split modestr at the commas or EOL */
350
    p = modestr;
351
    for(;;) {
352
  char* s;
353
  ptrdiff_t slen;
354
  endp = strchr(p,',');
355
  if(endp == NULL) endp = p + strlen(p);
356
  slen = (endp - p);
357
  if((s = malloc(slen+1)) == NULL) {stat = NC_ENOMEM; goto done;}
358
  memcpy(s,p,slen);
359
  s[slen] = '\0';
360
  nclistpush(list,s);
361
  if(*endp == '\0') break;
362
  p = endp+1;
363
    }
364
365
done:
366
    return check(stat);
367
}
368
#endif
369
370
/* Split a string at a given char */
371
static int
372
parseonchar(const char* s, int ch, NClist* segments)
373
0
{
374
0
    int stat = NC_NOERR;
375
0
    const char* p = NULL;
376
0
    const char* endp = NULL;
377
378
0
    if(s == NULL || *s == '\0') goto done;
379
380
0
    p = s;
381
0
    for(;;) {
382
0
  char* q;
383
0
  ptrdiff_t slen;
384
0
  endp = strchr(p,ch);
385
0
  if(endp == NULL) endp = p + strlen(p);
386
0
  slen = (endp - p);
387
0
  if((q = malloc(slen+1)) == NULL) {stat = NC_ENOMEM; goto done;}
388
0
  memcpy(q,p,slen);
389
0
  q[slen] = '\0';
390
0
  nclistpush(segments,q);
391
0
  if(*endp == '\0') break;
392
0
  p = endp+1;
393
0
    }
394
395
0
done:
396
0
    return check(stat);
397
0
}
398
399
/* Convert a key,value envv pairlist into a delimited string*/
400
static char*
401
envvlist2string(NClist* envv, const char* delim)
402
0
{
403
0
    int i;
404
0
    NCbytes* buf = NULL;
405
0
    char* result = NULL;
406
407
0
    if(envv == NULL || nclistlength(envv) == 0) return NULL;
408
0
    buf = ncbytesnew();
409
0
    for(i=0;i<nclistlength(envv);i+=2) {
410
0
  const char* key = nclistget(envv,i);
411
0
  const char* val = nclistget(envv,i+1);
412
0
  if(key == NULL || strlen(key) == 0) continue;
413
0
  assert(val != NULL);
414
0
  if(i > 0) ncbytescat(buf,"&");
415
0
  ncbytescat(buf,key);
416
0
  if(val != NULL && val[0] != '\0') {
417
0
      ncbytescat(buf,"=");
418
0
      ncbytescat(buf,val);
419
0
  }
420
0
    }
421
0
    result = ncbytesextract(buf);
422
0
    ncbytesfree(buf);
423
0
    return result;
424
0
}
425
426
/* Given a mode= argument, fill in the impl */
427
static int
428
processmodearg(const char* arg, NCmodel* model)
429
0
{
430
0
    int stat = NC_NOERR;
431
0
    struct FORMATMODES* format = formatmodes;
432
0
    for(;format->tag;format++) {
433
0
  if(strcmp(format->tag,arg)==0) {
434
0
            model->impl = format->impl;
435
0
      if(format->format != 0) model->format = format->format;
436
0
  }
437
0
    }
438
0
    return check(stat);
439
0
}
440
441
/* Given an envv fragment list, do macro replacement */
442
static int
443
processmacros(NClist** fraglenvp)
444
0
{
445
0
    int stat = NC_NOERR;
446
0
    const struct MACRODEF* macros = NULL;
447
0
    NClist*  fraglenv = NULL;
448
0
    NClist* expanded = NULL;
449
450
0
    if(fraglenvp == NULL || nclistlength(*fraglenvp) == 0) goto done;
451
0
    fraglenv = *fraglenvp;
452
0
    expanded = nclistnew();
453
0
    while(nclistlength(fraglenv) > 0) {
454
0
  int found = 0;
455
0
  char* key = NULL;
456
0
  char* value = NULL;
457
0
  key = nclistremove(fraglenv,0); /* remove from changing front */
458
0
  value = nclistremove(fraglenv,0); /* remove from changing front */
459
0
  if(strlen(value) == 0) { /* must be a singleton  */
460
0
            for(macros=macrodefs;macros->name;macros++) {
461
0
                if(strcmp(macros->name,key)==0) {
462
0
        char* const * p;
463
0
        nclistpush(expanded,strdup(macros->defkey));
464
0
        for(p=macros->defvalues;*p;p++) 
465
0
      nclistpush(expanded,strdup(*p));
466
0
        found = 1;        
467
0
        break;
468
0
          }
469
0
      }
470
0
  }
471
0
  if(!found) {/* pass thru */
472
0
      nclistpush(expanded,strdup(key));
473
0
          nclistpush(expanded,strdup(value));
474
0
  }
475
0
  nullfree(key);
476
0
  nullfree(value);
477
0
    }
478
0
    *fraglenvp = expanded; expanded = NULL;
479
480
0
done:
481
0
    nclistfreeall(expanded);
482
0
    nclistfreeall(fraglenv);
483
0
    return check(stat);
484
0
}
485
486
/* Process mode flag inferences */
487
static int
488
processinferences(NClist* fraglenv)
489
0
{
490
0
    int stat = NC_NOERR;
491
0
    const char* modeval = NULL;
492
0
    NClist* newmodes = nclistnew();
493
0
    NClist* currentmodes = NULL;
494
0
    NClist* nextmodes = nclistnew();
495
0
    int i;
496
0
    char* newmodeval = NULL;
497
498
0
    if(fraglenv == NULL || nclistlength(fraglenv) == 0) goto done;
499
500
    /* Get "mode" entry */
501
0
    if((modeval = getmodekey(fraglenv))==NULL) goto done;
502
503
    /* Get the mode as list */
504
0
    currentmodes = parsemode(modeval);
505
506
#ifdef DEBUG
507
    printlist(currentmodes,"processinferences: initial mode list");
508
#endif
509
510
    /* Do what amounts to breadth first inferencing down the inference DAG. */
511
512
0
    for(;;) {
513
0
        NClist* tmp = NULL;
514
        /* Compute the next set of inferred modes */
515
#ifdef DEBUG
516
printlist(currentmodes,"processinferences: current mode list");
517
#endif
518
0
        infernext(currentmodes,nextmodes);
519
#ifdef DEBUG
520
printlist(nextmodes,"processinferences: next mode list");
521
#endif
522
        /* move current modes into list of newmodes */
523
0
        for(i=0;i<nclistlength(currentmodes);i++) {
524
0
      nclistpush(newmodes,nclistget(currentmodes,i));
525
0
  }
526
0
        nclistsetlength(currentmodes,0); /* clear current mode list */
527
0
        if(nclistlength(nextmodes) == 0) break; /* nothing more to do */
528
#ifdef DEBUG
529
printlist(newmodes,"processinferences: new mode list");
530
#endif
531
  /* Swap current and next */
532
0
        tmp = currentmodes;
533
0
  currentmodes = nextmodes;
534
0
  nextmodes = tmp;
535
0
        tmp = NULL;
536
0
    }
537
    /* cleanup any unused elements in currenmodes */
538
0
    nclistclearall(currentmodes);
539
540
    /* Ensure no duplicates */
541
0
    cleanstringlist(newmodes,1);
542
543
#ifdef DEBUG
544
    printlist(newmodes,"processinferences: final inferred mode list");
545
#endif
546
547
   /* Remove negative inferences */
548
0
   for(i=0;i<nclistlength(newmodes);i++) {
549
0
  const char* mode = nclistget(newmodes,i);
550
0
  negateone(mode,newmodes);
551
0
    }
552
553
    /* Store new mode value */
554
0
    if((newmodeval = list2string(newmodes))== NULL)
555
0
  {stat = NC_ENOMEM; goto done;}        
556
0
    if((stat=replacemode(fraglenv,newmodeval))) goto done;
557
0
    modeval = NULL;
558
559
0
done:
560
0
    nullfree(newmodeval);
561
0
    nclistfreeall(newmodes);
562
0
    nclistfreeall(currentmodes);
563
0
    nclistfreeall(nextmodes);
564
0
    return check(stat);
565
0
}
566
567
568
static int
569
negateone(const char* mode, NClist* newmodes)
570
0
{
571
0
    const struct MODEINFER* tests = modenegations;
572
0
    int changed = 0;
573
0
    for(;tests->key;tests++) {
574
0
  int i;
575
0
  if(strcasecmp(tests->key,mode)==0) {
576
      /* Find and remove all instances of the inference value */
577
0
      for(i=nclistlength(newmodes)-1;i>=0;i--) {
578
0
    char* candidate = nclistget(newmodes,i);
579
0
    if(strcasecmp(candidate,tests->inference)==0) {
580
0
        nclistremove(newmodes,i);
581
0
        nullfree(candidate);
582
0
              changed = 1;
583
0
    }
584
0
      }
585
0
        }
586
0
    }
587
0
    return changed;
588
0
}
589
590
static void
591
infernext(NClist* current, NClist* next)
592
0
{
593
0
    int i;
594
0
    for(i=0;i<nclistlength(current);i++) {
595
0
        const struct MODEINFER* tests = NULL;
596
0
  const char* cur = nclistget(current,i);
597
0
        for(tests=modeinferences;tests->key;tests++) {
598
0
      if(strcasecmp(tests->key,cur)==0) {
599
          /* Append the inferred mode unless dup */
600
0
    if(!nclistmatch(next,tests->inference,1))
601
0
              nclistpush(next,strdup(tests->inference));
602
0
      }
603
0
        }
604
0
    }
605
0
}
606
607
/*
608
Given a list of strings, remove nulls and duplicates
609
*/
610
static int
611
mergelist(NClist** valuesp)
612
0
{
613
0
    int i,j;
614
0
    int stat = NC_NOERR;
615
0
    NClist* values = *valuesp;
616
0
    NClist* allvalues = nclistnew();
617
0
    NClist* newvalues = nclistnew();
618
0
    char* value = NULL;
619
620
0
    for(i=0;i<nclistlength(values);i++) {
621
0
  char* val1 = nclistget(values,i);
622
  /* split on commas and put pieces into allvalues */
623
0
  if((stat=parseonchar(val1,',',allvalues))) goto done;
624
0
    }
625
    /* Remove duplicates and "" */
626
0
    while(nclistlength(allvalues) > 0) {
627
0
  value = nclistremove(allvalues,0);
628
0
  if(strlen(value) == 0) {
629
0
      nullfree(value); value = NULL;
630
0
  } else {
631
0
      for(j=0;j<nclistlength(newvalues);j++) {
632
0
          char* candidate = nclistget(newvalues,j);
633
0
          if(strcasecmp(candidate,value)==0)
634
0
              {nullfree(value); value = NULL; break;}
635
0
       }
636
0
  }
637
0
  if(value != NULL) {nclistpush(newvalues,value); value = NULL;}
638
0
    }
639
    /* Make sure to have at least 1 value */
640
0
    if(nclistlength(newvalues)==0) nclistpush(newvalues,strdup(""));
641
0
    *valuesp = values; values = NULL;
642
643
0
done:
644
0
    nclistfree(allvalues);
645
0
    nclistfreeall(values);
646
0
    nclistfreeall(newvalues);
647
0
    return check(stat);
648
0
}
649
650
static int
651
lcontains(NClist* l, const char* key0)
652
0
{
653
0
    int i;
654
0
    for(i=0;i<nclistlength(l);i++) {
655
0
        const char* key1 = nclistget(l,i);
656
0
  if(strcasecmp(key0,key1)==0) return 1;
657
0
    }
658
0
    return 0;
659
0
}
660
661
/* Warning values should not use nclistfreeall */
662
static void
663
collectvaluesbykey(NClist* fraglenv, const char* key, NClist* values)
664
0
{
665
0
    int i;
666
    /* collect all the values with the same key (including this one) */
667
0
    for(i=0;i<nclistlength(fraglenv);i+=2) {
668
0
        const char* key2 = nclistget(fraglenv,i);
669
0
        if(strcasecmp(key,key2)==0) {
670
0
      const char* value2 = nclistget(fraglenv,i+1);
671
0
      nclistpush(values,value2); value2 = NULL;
672
0
  }
673
0
    }
674
0
}
675
676
/* Warning allkeys should not use nclistfreeall */
677
static void
678
collectallkeys(NClist* fraglenv, NClist* allkeys)
679
0
{
680
0
    int i;
681
    /* collect all the distinct keys */
682
0
    for(i=0;i<nclistlength(fraglenv);i+=2) {
683
0
  char* key = nclistget(fraglenv,i);
684
0
  if(!lcontains(allkeys,key)) {
685
0
      nclistpush(allkeys,key);
686
0
  }
687
0
    }
688
0
}
689
690
/* Given a fragment envv list, coalesce duplicate keys and remove duplicate values*/
691
static int
692
cleanfragments(NClist** fraglenvp)
693
0
{
694
0
    int i,stat = NC_NOERR;
695
0
    NClist*  fraglenv = NULL;
696
0
    NClist* tmp = NULL;
697
0
    NClist* allkeys = NULL;
698
0
    NClist* newlist = NULL;
699
0
    NCbytes* buf = NULL;
700
0
    char* key = NULL;
701
0
    char* value = NULL;
702
703
0
    if(fraglenvp == NULL || nclistlength(*fraglenvp) == 0) return NC_NOERR;
704
0
    fraglenv = *fraglenvp; /* take control of this list */
705
0
    *fraglenvp = NULL;
706
0
    newlist = nclistnew();
707
0
    buf = ncbytesnew();
708
0
    allkeys = nclistnew();
709
0
    tmp = nclistnew();
710
711
    /* collect all unique keys */
712
0
    collectallkeys(fraglenv,allkeys);
713
    /* Collect all values for same key across all fragment pairs */
714
0
    for(i=0;i<nclistlength(allkeys);i++) {
715
0
  key = nclistget(allkeys,i);
716
0
  collectvaluesbykey(fraglenv,key,tmp);
717
  /* merge the key values, remove duplicate */
718
0
  if((stat=mergelist(&tmp))) goto done;
719
        /* Construct key,value pair and insert into newlist */
720
0
  key = strdup(key);
721
0
  nclistpush(newlist,key);
722
0
  value = list2string(tmp);
723
0
  nclistpush(newlist,value);
724
0
  nclistclear(tmp);
725
0
    }
726
0
    *fraglenvp = newlist; newlist = NULL;
727
0
done:
728
0
    nclistfree(allkeys);
729
0
    nclistfree(tmp);
730
0
    ncbytesfree(buf);
731
0
    nclistfreeall(fraglenv);
732
0
    nclistfreeall(newlist);
733
0
    return check(stat);
734
0
}
735
736
/* process non-mode fragment keys in case they hold significance; currently not */
737
static int
738
processfragmentkeys(const char* key, const char* value, NCmodel* model)
739
0
{
740
0
    return NC_NOERR;
741
0
}
742
743
/*
744
Infer from the mode + useparallel
745
only call if iscreate or file is not easily readable.
746
*/
747
static int
748
NC_omodeinfer(int useparallel, int cmode, NCmodel* model)
749
114
{
750
114
    int stat = NC_NOERR;
751
752
    /* If no format flags are set, then use default */
753
114
    if(!fIsSet(cmode,NC_FORMAT_ALL))
754
114
  set_default_mode(&cmode);
755
756
    /* Process the cmode; may override some already set flags. The
757
     * user-defined formats must be checked first. They may choose to
758
     * use some of the other flags, like NC_NETCDF4, so we must first
759
     * check NC_UDF0 and NC_UDF1 before checking for any other
760
     * flag. */
761
114
    if(fIsSet(cmode, NC_UDF0)  || fIsSet(cmode, NC_UDF1))
762
0
    {
763
0
        if(fIsSet(cmode, NC_UDF0))
764
0
        {
765
0
      model->impl = NC_FORMATX_UDF0;
766
0
  } else {
767
0
      model->impl = NC_FORMATX_UDF1;
768
0
  }
769
0
        if(fIsSet(cmode,NC_64BIT_OFFSET)) 
770
0
        {
771
0
            model->format = NC_FORMAT_64BIT_OFFSET;
772
0
        }
773
0
        else if(fIsSet(cmode,NC_64BIT_DATA))
774
0
        {
775
0
            model->format = NC_FORMAT_64BIT_DATA;
776
0
        }
777
0
        else if(fIsSet(cmode,NC_NETCDF4))
778
0
        {
779
0
            if(fIsSet(cmode,NC_CLASSIC_MODEL))
780
0
                model->format = NC_FORMAT_NETCDF4_CLASSIC;
781
0
            else
782
0
                model->format = NC_FORMAT_NETCDF4;
783
0
        }
784
0
        if(! model->format)
785
0
            model->format = NC_FORMAT_CLASSIC;
786
0
  goto done;
787
0
    }
788
789
114
    if(fIsSet(cmode,NC_64BIT_OFFSET)) {
790
0
  model->impl = NC_FORMATX_NC3;
791
0
  model->format = NC_FORMAT_64BIT_OFFSET;
792
0
        goto done;
793
0
    }
794
795
114
    if(fIsSet(cmode,NC_64BIT_DATA)) {
796
0
  model->impl = NC_FORMATX_NC3;
797
0
  model->format = NC_FORMAT_64BIT_DATA;
798
0
        goto done;
799
0
    }
800
801
114
    if(fIsSet(cmode,NC_NETCDF4)) {
802
0
  model->impl = NC_FORMATX_NC4;
803
0
        if(fIsSet(cmode,NC_CLASSIC_MODEL))
804
0
      model->format = NC_FORMAT_NETCDF4_CLASSIC;
805
0
  else
806
0
      model->format = NC_FORMAT_NETCDF4;
807
0
        goto done;
808
0
    }
809
810
    /* Default to classic model */
811
114
    model->format = NC_FORMAT_CLASSIC;
812
114
    model->impl = NC_FORMATX_NC3;
813
814
114
done:
815
    /* Apply parallel flag */
816
114
    if(useparallel) {
817
0
        if(model->impl == NC_FORMATX_NC3)
818
0
      model->impl = NC_FORMATX_PNETCDF;
819
0
    }
820
114
    return check(stat);
821
114
}
822
823
/*
824
If the mode flags do not necessarily specify the
825
format, then default it by adding in appropriate flags.
826
*/
827
828
static void
829
set_default_mode(int* modep)
830
114
{
831
114
    int mode = *modep;
832
114
    int dfaltformat;
833
834
114
    dfaltformat = nc_get_default_format();
835
114
    switch (dfaltformat) {
836
0
    case NC_FORMAT_64BIT_OFFSET: mode |= NC_64BIT_OFFSET; break;
837
0
    case NC_FORMAT_64BIT_DATA: mode |= NC_64BIT_DATA; break;
838
0
    case NC_FORMAT_NETCDF4: mode |= NC_NETCDF4; break;
839
0
    case NC_FORMAT_NETCDF4_CLASSIC: mode |= (NC_NETCDF4|NC_CLASSIC_MODEL); break;
840
114
    case NC_FORMAT_CLASSIC: /* fall thru */
841
114
    default: break; /* default to classic */
842
114
    }
843
114
    *modep = mode; /* final result */
844
114
}
845
846
/**************************************************/
847
/*
848
   Infer model for this dataset using some
849
   combination of cmode, path, and reading the dataset.
850
   See the documentation in docs/internal.dox.
851
852
@param path
853
@param omode
854
@param iscreate
855
@param useparallel
856
@param params
857
@param model
858
@param newpathp
859
*/
860
861
int
862
NC_infermodel(const char* path, int* omodep, int iscreate, int useparallel, void* params, NCmodel* model, char** newpathp)
863
114
{
864
114
    int i,stat = NC_NOERR;
865
114
    NCURI* uri = NULL;
866
114
    int omode = *omodep;
867
114
    NClist* fraglenv = nclistnew();
868
114
    NClist* modeargs = nclistnew();
869
114
    char* sfrag = NULL;
870
114
    const char* modeval = NULL;
871
114
    char* abspath = NULL;
872
873
    /* Phase 1:
874
       1. convert special protocols to http|https
875
       2. begin collecting fragments
876
    */
877
114
    if((stat = processuri(path, &uri, fraglenv))) goto done;
878
879
114
    if(uri != NULL) {
880
#ifdef DEBUG
881
  printlist(fraglenv,"processuri");
882
#endif
883
884
        /* Phase 2: Expand macros and add to fraglenv */
885
0
        if((stat = processmacros(&fraglenv))) goto done;
886
#ifdef DEBUG
887
  printlist(fraglenv,"processmacros");
888
#endif
889
890
  /* Cleanup the fragment list */
891
0
  if((stat = cleanfragments(&fraglenv))) goto done;
892
893
        /* Phase 2a: Expand mode inferences and add to fraglenv */
894
0
        if((stat = processinferences(fraglenv))) goto done;
895
#ifdef DEBUG
896
  printlist(fraglenv,"processinferences");
897
#endif
898
899
        /* Phase 3: coalesce duplicate fragment keys and remove duplicate values */
900
0
        if((stat = cleanfragments(&fraglenv))) goto done;
901
#ifdef DEBUG
902
  printlist(fraglenv,"cleanfragments");
903
#endif
904
905
        /* Phase 4: Rebuild the url fragment and rebuilt the url */
906
0
        sfrag = envvlist2string(fraglenv,"&");
907
0
        nclistfreeall(fraglenv); fraglenv = NULL;
908
#ifdef DEBUG
909
  fprintf(stderr,"frag final: %s\n",sfrag);
910
#endif
911
0
        ncurisetfragments(uri,sfrag);
912
0
        nullfree(sfrag); sfrag = NULL;
913
914
  /* If s3, then rebuild the url */
915
0
  if(NC_iss3(uri)) {
916
0
      NCURI* newuri = NULL;
917
0
      if((stat = NC_s3urlrebuild(uri,NULL,NULL,&newuri))) goto done;
918
0
      ncurifree(uri);
919
0
      uri = newuri;
920
0
  } else if(strcmp(uri->protocol,"file")==0) {
921
            /* convert path to absolute */
922
0
      char* canon = NULL;
923
0
      abspath = NCpathabsolute(uri->path);
924
0
      if((stat = NCpathcanonical(abspath,&canon))) goto done;
925
0
      nullfree(abspath);
926
0
      abspath = canon; canon = NULL;
927
0
      if((stat = ncurisetpath(uri,abspath))) goto done;
928
0
  }
929
  
930
  /* rebuild the path */
931
0
        if(newpathp) {
932
0
            *newpathp = ncuribuild(uri,NULL,NULL,NCURIALL);
933
#ifdef DEBUG
934
      fprintf(stderr,"newpath=|%s|\n",*newpathp); fflush(stderr);
935
#endif    
936
0
  }
937
938
        /* Phase 5: Process the mode key to see if we can tell the formatx */
939
0
        modeval = ncurifragmentlookup(uri,"mode");
940
0
        if(modeval != NULL) {
941
0
      if((stat = parseonchar(modeval,',',modeargs))) goto done;
942
0
            for(i=0;i<nclistlength(modeargs);i++) {
943
0
          const char* arg = nclistget(modeargs,i);
944
0
          if((stat=processmodearg(arg,model))) goto done;
945
0
            }
946
0
  }
947
948
        /* Phase 6: Process the non-mode keys to see if we can tell the formatx */
949
0
  if(!modelcomplete(model)) {
950
0
      const char** p = ncurifragmentparams(uri); /* envv format */
951
0
      if(p != NULL) {
952
0
          for(;*p;p+=2) {
953
0
        const char* key = p[0];
954
0
        const char* value = p[1];;
955
0
              if((stat=processfragmentkeys(key,value,model))) goto done;
956
0
          }
957
0
      }
958
0
  }
959
960
        /* Phase 7: Special cases: if this is a URL and model.impl is still not defined */
961
        /* Phase7a: Default is DAP2 */
962
0
        if(!modelcomplete(model)) {
963
0
      model->impl = NC_FORMATX_DAP2;
964
0
      model->format = NC_FORMAT_NC3;
965
0
        }
966
967
114
    } else {/* Not URL */
968
114
  if(newpathp) *newpathp = NULL;
969
114
    }
970
971
    /* Phase 8: mode inference from mode flags */
972
    /* The modeargs did not give us a model (probably not a URL).
973
       So look at the combination of mode flags and the useparallel flag */
974
114
    if(!modelcomplete(model)) {
975
114
        if((stat = NC_omodeinfer(useparallel,omode,model))) goto done;
976
114
    }
977
978
    /* Phase 9: Infer from file content, if possible;
979
       this has highest precedence, so it may override
980
       previous decisions. Note that we do this last
981
       because we need previously determined model info
982
       to guess if this file is readable.
983
    */
984
114
    if(!iscreate && isreadable(uri,model)) {
985
  /* Ok, we need to try to read the file */
986
114
  if((stat = check_file_type(path, omode, useparallel, params, model, uri))) goto done;
987
114
    }
988
989
    /* Need a decision */
990
104
    if(!modelcomplete(model))
991
0
  {stat = NC_ENOTNC; goto done;}
992
993
    /* Force flag consistency */
994
104
    switch (model->impl) {
995
0
    case NC_FORMATX_NC4:
996
0
    case NC_FORMATX_NC_HDF4:
997
0
    case NC_FORMATX_DAP4:
998
0
    case NC_FORMATX_NCZARR:
999
0
  omode |= NC_NETCDF4;
1000
0
  if(model->format == NC_FORMAT_NETCDF4_CLASSIC)
1001
0
      omode |= NC_CLASSIC_MODEL;
1002
0
  break;
1003
104
    case NC_FORMATX_NC3:
1004
104
  omode &= ~NC_NETCDF4; /* must be netcdf-3 (CDF-1, CDF-2, CDF-5) */
1005
104
  if(model->format == NC_FORMAT_64BIT_OFFSET) omode |= NC_64BIT_OFFSET;
1006
62
  else if(model->format == NC_FORMAT_64BIT_DATA) omode |= NC_64BIT_DATA;
1007
104
  break;
1008
0
    case NC_FORMATX_PNETCDF:
1009
0
  omode &= ~NC_NETCDF4; /* must be netcdf-3 (CDF-1, CDF-2, CDF-5) */
1010
0
  if(model->format == NC_FORMAT_64BIT_OFFSET) omode |= NC_64BIT_OFFSET;
1011
0
  else if(model->format == NC_FORMAT_64BIT_DATA) omode |= NC_64BIT_DATA;
1012
0
  break;
1013
0
    case NC_FORMATX_DAP2:
1014
0
  omode &= ~(NC_NETCDF4|NC_64BIT_OFFSET|NC_64BIT_DATA|NC_CLASSIC_MODEL);
1015
0
  break;
1016
0
    case NC_FORMATX_UDF0:
1017
0
    case NC_FORMATX_UDF1:
1018
0
        if(model->format == NC_FORMAT_64BIT_OFFSET) 
1019
0
            omode |= NC_64BIT_OFFSET;
1020
0
        else if(model->format == NC_FORMAT_64BIT_DATA)
1021
0
            omode |= NC_64BIT_DATA;
1022
0
        else if(model->format == NC_FORMAT_NETCDF4)  
1023
0
            omode |= NC_NETCDF4;
1024
0
        else if(model->format == NC_FORMAT_NETCDF4_CLASSIC)  
1025
0
            omode |= NC_NETCDF4|NC_CLASSIC_MODEL;
1026
0
        break;
1027
0
    default:
1028
0
  {stat = NC_ENOTNC; goto done;}
1029
104
    }
1030
1031
114
done:
1032
114
    nullfree(sfrag);
1033
114
    nullfree(abspath);
1034
114
    ncurifree(uri);
1035
114
    nclistfreeall(modeargs);
1036
114
    nclistfreeall(fraglenv);
1037
114
    *omodep = omode; /* in/out */
1038
114
    return check(stat);
1039
104
}
1040
1041
static int
1042
isreadable(NCURI* uri, NCmodel* model)
1043
114
{
1044
114
    int canread = 0;
1045
114
    struct Readable* r;
1046
    /* Step 1: Look up the implementation */
1047
114
    for(r=readable;r->impl;r++) {
1048
114
  if(model->impl == r->impl) {canread = r->readable; break;}
1049
114
    }
1050
    /* Step 2: check for bytes mode */
1051
114
    if(!canread && NC_testmode(uri,"bytes") && (model->impl == NC_FORMATX_NC4 || model->impl == NC_FORMATX_NC_HDF5))
1052
0
        canread = 1;
1053
114
    return canread;
1054
114
}
1055
1056
#if 0
1057
static char*
1058
emptyify(char* s)
1059
{
1060
    if(s == NULL) s = strdup("");
1061
    return strdup(s);
1062
}
1063
1064
static const char*
1065
nullify(const char* s)
1066
{
1067
    if(s != NULL && strlen(s) == 0)
1068
        return NULL;
1069
    return s;
1070
}
1071
#endif
1072
1073
/**************************************************/
1074
/**************************************************/
1075
/**
1076
 * Provide a hidden interface to allow utilities
1077
 * to check if a given path name is really a url.
1078
 * If not, put null in basenamep, else put basename of the url path
1079
 * minus any extension into basenamep; caller frees.
1080
 * Return 1 if it looks like a url, 0 otherwise.
1081
 */
1082
1083
int
1084
nc__testurl(const char* path0, char** basenamep)
1085
0
{
1086
0
    NCURI* uri = NULL;
1087
0
    int ok = 0;
1088
0
    char* path = NULL;
1089
1090
0
    if(!ncuriparse(path0,&uri)) {
1091
0
  char* p;
1092
0
  char* q;
1093
0
  path = strdup(uri->path);
1094
0
  if(path == NULL||strlen(path)==0) goto done;
1095
0
        p = strrchr(path, '/');
1096
0
  if(p == NULL) p = path; else p++;
1097
0
  q = strrchr(p,'.');
1098
0
        if(q != NULL) *q = '\0';
1099
0
  if(strlen(p) == 0) goto done;
1100
0
  if(basenamep)
1101
0
            *basenamep = strdup(p);
1102
0
  ok = 1;
1103
0
    }
1104
0
done:
1105
0
    ncurifree(uri);
1106
0
    nullfree(path);
1107
0
    return ok;
1108
0
}
1109
1110
/**************************************************/
1111
/* Envv list utilities */
1112
1113
static const char*
1114
getmodekey(const NClist* envv)
1115
0
{
1116
0
    int i;
1117
    /* Get "mode" entry */
1118
0
    for(i=0;i<nclistlength(envv);i+=2) {
1119
0
  char* key = NULL;
1120
0
  key = nclistget(envv,i);
1121
0
  if(strcasecmp(key,"mode")==0)
1122
0
      return nclistget(envv,i+1);
1123
0
    }
1124
0
    return NULL;
1125
0
}
1126
1127
static int
1128
replacemode(NClist* envv, const char* newval)
1129
0
{
1130
0
    int i;
1131
    /* Get "mode" entry */
1132
0
    for(i=0;i<nclistlength(envv);i+=2) {
1133
0
  char* key = NULL;
1134
0
  char* val = NULL;
1135
0
  key = nclistget(envv,i);
1136
0
  if(strcasecmp(key,"mode")==0) {
1137
0
      val = nclistget(envv,i+1);      
1138
0
      nclistset(envv,i+1,strdup(newval));
1139
0
      nullfree(val);
1140
0
      return NC_NOERR;
1141
0
  }
1142
0
    }
1143
0
    return NC_EINVAL;
1144
0
}
1145
1146
static NClist*
1147
parsemode(const char* modeval)
1148
0
{
1149
0
    NClist* modes = nclistnew();
1150
0
    if(modeval)
1151
0
        (void)parseonchar(modeval,',',modes);/* split on commas */
1152
0
    return modes;    
1153
0
}
1154
1155
/* Convert a list into a comma'd string */
1156
static char*
1157
list2string(NClist* list)
1158
0
{
1159
0
    int i;
1160
0
    NCbytes* buf = NULL;
1161
0
    char* result = NULL;
1162
1163
0
    if(list == NULL || nclistlength(list)==0) return strdup("");
1164
0
    buf = ncbytesnew();
1165
0
    for(i=0;i<nclistlength(list);i++) {
1166
0
  const char* m = nclistget(list,i);
1167
0
  if(m == NULL || strlen(m) == 0) continue;
1168
0
  if(i > 0) ncbytescat(buf,",");
1169
0
  ncbytescat(buf,m);
1170
0
    }
1171
0
    result = ncbytesextract(buf);
1172
0
    ncbytesfree(buf);
1173
0
    if(result == NULL) result = strdup("");
1174
0
    return result;
1175
0
}
1176
1177
#if 0
1178
/* Given a comma separated string, remove duplicates; mostly used to cleanup mode list */
1179
static char* 
1180
cleancommalist(const char* commalist, int caseinsensitive)
1181
{
1182
    NClist* tmp = nclistnew();
1183
    char* newlist = NULL;
1184
    if(commalist == NULL || strlen(commalist)==0) return nulldup(commalist);
1185
    (void)parseonchar(commalist,',',tmp);/* split on commas */
1186
    cleanstringlist(tmp,caseinsensitive);
1187
    newlist = list2string(tmp);
1188
    nclistfreeall(tmp);
1189
    return newlist;
1190
}
1191
#endif
1192
1193
/* Given a list of strings, remove nulls and duplicated */
1194
static void
1195
cleanstringlist(NClist* strs, int caseinsensitive)
1196
0
{
1197
0
    int i,j;
1198
0
    if(nclistlength(strs) == 0) return;
1199
    /* Remove nulls */
1200
0
    for(i=nclistlength(strs)-1;i>=0;i--) {
1201
0
        if(nclistget(strs,i)==NULL) nclistremove(strs,i);
1202
0
    }
1203
    /* Remove duplicates*/
1204
0
    for(i=0;i<nclistlength(strs);i++) {
1205
0
        const char* value = nclistget(strs,i);
1206
  /* look ahead for duplicates */
1207
0
        for(j=nclistlength(strs)-1;j>i;j--) {
1208
0
      int match;
1209
0
            const char* candidate = nclistget(strs,j);
1210
0
            if(caseinsensitive)
1211
0
          match = (strcasecmp(value,candidate) == 0);
1212
0
      else
1213
0
    match = (strcmp(value,candidate) == 0);
1214
0
      if(match) {char* dup = nclistremove(strs,j); nullfree(dup);}
1215
0
  }
1216
0
    }
1217
0
}
1218
1219
1220
/**************************************************/
1221
/**
1222
 * @internal Given an existing file, figure out its format and return
1223
 * that format value (NC_FORMATX_XXX) in model arg. Assume any path
1224
 * conversion was already performed at a higher level.
1225
 *
1226
 * @param path File name.
1227
 * @param flags
1228
 * @param use_parallel
1229
 * @param parameters
1230
 * @param model Pointer that gets the model to use for the dispatch table.
1231
 * @param version Pointer that gets version of the file.
1232
 *
1233
 * @return ::NC_NOERR No error.
1234
 * @author Dennis Heimbigner
1235
*/
1236
static int
1237
check_file_type(const char *path, int omode, int use_parallel,
1238
       void *parameters, NCmodel* model, NCURI* uri)
1239
114
{
1240
114
    char magic[NC_MAX_MAGIC_NUMBER_LEN];
1241
114
    int status = NC_NOERR;
1242
114
    struct MagicFile magicinfo;
1243
#ifdef _WIN32
1244
    NC* nc = NULL;
1245
#endif
1246
1247
114
    memset((void*)&magicinfo,0,sizeof(magicinfo));
1248
1249
#ifdef _WIN32 /* including MINGW */
1250
    /* Windows does not handle multiple handles to the same file very well.
1251
       So if file is already open/created, then find it and just get the
1252
       model from that. */
1253
    if((nc = find_in_NCList_by_name(path)) != NULL) {
1254
  int format = 0;
1255
  /* Get the model from this NC */
1256
  if((status = nc_inq_format_extended(nc->ext_ncid,&format,NULL))) goto done;
1257
  model->impl = format;
1258
  if((status = nc_inq_format(nc->ext_ncid,&format))) goto done;
1259
  model->format = format;
1260
  goto done;
1261
    }
1262
#endif
1263
1264
114
    magicinfo.path = path; /* do not free */
1265
114
    magicinfo.uri = uri; /* do not free */
1266
114
    magicinfo.omode = omode;
1267
114
    magicinfo.model = model; /* do not free */
1268
114
    magicinfo.parameters = parameters; /* do not free */
1269
#ifdef USE_STDIO
1270
    magicinfo.use_parallel = 0;
1271
#else
1272
114
    magicinfo.use_parallel = use_parallel;
1273
114
#endif
1274
1275
114
    if((status = openmagic(&magicinfo))) goto done;
1276
1277
    /* Verify we have a large enough file */
1278
114
    if(magicinfo.filelen < (unsigned long long)MAGIC_NUMBER_LEN)
1279
0
  {status = NC_ENOTNC; goto done;}
1280
114
    if((status = readmagic(&magicinfo,0L,magic)) != NC_NOERR) {
1281
0
  status = NC_ENOTNC;
1282
0
  goto done;
1283
0
    }
1284
1285
    /* Look at the magic number */
1286
114
    if(NC_interpret_magic_number(magic,model) == NC_NOERR
1287
114
  && model->format != 0) {
1288
104
        if (use_parallel && (model->format == NC_FORMAT_NC3 || model->impl == NC_FORMATX_NC3))
1289
            /* this is called from nc_open_par() and file is classic */
1290
0
            model->impl = NC_FORMATX_PNETCDF;
1291
104
        goto done; /* found something */
1292
104
    }
1293
1294
    /* Remaining case when implementation is an HDF5 file;
1295
       search forward at starting at 512
1296
       and doubling to see if we have HDF5 magic number */
1297
10
    {
1298
10
  long pos = 512L;
1299
101
        for(;;) {
1300
101
      if((pos+MAGIC_NUMBER_LEN) > magicinfo.filelen)
1301
10
    {status = NC_ENOTNC; goto done;}
1302
91
            if((status = readmagic(&magicinfo,pos,magic)) != NC_NOERR)
1303
0
          {status = NC_ENOTNC; goto done; }
1304
91
            NC_interpret_magic_number(magic,model);
1305
91
            if(model->impl == NC_FORMATX_NC4) break;
1306
      /* double and try again */
1307
91
      pos = 2*pos;
1308
91
        }
1309
10
    }
1310
114
done:
1311
114
    closemagic(&magicinfo);
1312
114
    return check(status);
1313
10
}
1314
1315
/**
1316
\internal
1317
\ingroup datasets
1318
Provide open, read and close for use when searching for magic numbers
1319
*/
1320
static int
1321
openmagic(struct MagicFile* file)
1322
114
{
1323
114
    int status = NC_NOERR;
1324
114
    if(fIsSet(file->omode,NC_INMEMORY)) {
1325
  /* Get its length */
1326
114
  NC_memio* meminfo = (NC_memio*)file->parameters;
1327
114
        assert(meminfo != NULL);
1328
114
  file->filelen = (long long)meminfo->size;
1329
114
  goto done;
1330
114
    }
1331
0
    if(file->uri != NULL) {
1332
0
#ifdef ENABLE_BYTERANGE
1333
  /* Construct a URL minus any fragment */
1334
0
        file->curlurl = ncuribuild(file->uri,NULL,NULL,NCURISVC);
1335
  /* Open the curl handle */
1336
0
        if((status=nc_http_open(file->curlurl, &file->state))) goto done;
1337
0
  if((status=nc_http_size(file->state,&file->filelen))) goto done;
1338
#else /*!BYTERANGE*/
1339
  {status = NC_ENOTBUILT;}
1340
#endif /*BYTERANGE*/
1341
0
  goto done;
1342
0
    } 
1343
#ifdef USE_PARALLEL
1344
    if (file->use_parallel) {
1345
  int retval;
1346
  MPI_Offset size;
1347
        assert(file->parameters != NULL);
1348
  if((retval = MPI_File_open(((NC_MPI_INFO*)file->parameters)->comm,
1349
                                   (char*)file->path,MPI_MODE_RDONLY,
1350
                                   ((NC_MPI_INFO*)file->parameters)->info,
1351
                                   &file->fh)) != MPI_SUCCESS) {
1352
#ifdef MPI_ERR_NO_SUCH_FILE
1353
      int errorclass;
1354
      MPI_Error_class(retval, &errorclass);
1355
      if (errorclass == MPI_ERR_NO_SUCH_FILE)
1356
#ifdef NC_ENOENT
1357
          status = NC_ENOENT;
1358
#else /*!NC_ENOENT*/
1359
    status = errno;
1360
#endif /*NC_ENOENT*/
1361
      else
1362
#endif /*MPI_ERR_NO_SUCH_FILE*/
1363
          status = NC_EPARINIT;
1364
      file->fh = MPI_FILE_NULL;
1365
      goto done;
1366
  }
1367
  /* Get its length */
1368
  if((retval=MPI_File_get_size(file->fh, &size)) != MPI_SUCCESS)
1369
      {status = NC_EPARINIT; goto done;}
1370
  file->filelen = (long long)size;
1371
  goto done;
1372
    }
1373
#endif /* USE_PARALLEL */
1374
0
    {
1375
0
        if (file->path == NULL || strlen(file->path) == 0)
1376
0
            {status = NC_EINVAL; goto done;}
1377
0
        file->fp = NCfopen(file->path, "r");
1378
0
        if(file->fp == NULL)
1379
0
      {status = errno; goto done;}
1380
  /* Get its length */
1381
0
  {
1382
0
      int fd = fileno(file->fp);
1383
#ifdef _WIN32
1384
      __int64 len64 = _filelengthi64(fd);
1385
      if(len64 < 0)
1386
    {status = errno; goto done;}
1387
      file->filelen = (long long)len64;
1388
#else
1389
0
      off_t size;
1390
0
      size = lseek(fd, 0, SEEK_END);
1391
0
      if(size == -1)
1392
0
    {status = errno; goto done;}
1393
0
    file->filelen = (long long)size;
1394
0
#endif
1395
0
  }
1396
0
        rewind(file->fp);
1397
0
    }
1398
114
done:
1399
114
    return check(status);
1400
0
}
1401
1402
static int
1403
readmagic(struct MagicFile* file, long pos, char* magic)
1404
205
{
1405
205
    int status = NC_NOERR;
1406
205
    NCbytes* buf = ncbytesnew();
1407
1408
205
    memset(magic,0,MAGIC_NUMBER_LEN);
1409
205
    if(fIsSet(file->omode,NC_INMEMORY)) {
1410
205
  char* mempos;
1411
205
  NC_memio* meminfo = (NC_memio*)file->parameters;
1412
205
  if((pos + MAGIC_NUMBER_LEN) > meminfo->size)
1413
0
      {status = NC_EINMEMORY; goto done;}
1414
205
  mempos = ((char*)meminfo->memory) + pos;
1415
205
  memcpy((void*)magic,mempos,MAGIC_NUMBER_LEN);
1416
#ifdef DEBUG
1417
  printmagic("XXX: readmagic",magic,file);
1418
#endif
1419
205
    } else if(file->uri != NULL) {
1420
0
#ifdef ENABLE_BYTERANGE
1421
0
  fileoffset_t start = (size_t)pos;
1422
0
  fileoffset_t count = MAGIC_NUMBER_LEN;
1423
0
        status = nc_http_read(file->state, start, count, buf);
1424
0
        if (status == NC_NOERR) {
1425
0
            if (ncbyteslength(buf) != count)
1426
0
                status = NC_EINVAL;
1427
0
            else
1428
0
                memcpy(magic, ncbytescontents(buf), count);
1429
0
        }
1430
0
#endif
1431
0
    } else {
1432
#ifdef USE_PARALLEL
1433
        if (file->use_parallel) {
1434
      MPI_Status mstatus;
1435
      int retval;
1436
      if((retval = MPI_File_read_at_all(file->fh, pos, magic,
1437
          MAGIC_NUMBER_LEN, MPI_CHAR, &mstatus)) != MPI_SUCCESS)
1438
          {status = NC_EPARINIT; goto done;}
1439
        }
1440
        else
1441
#endif /* USE_PARALLEL */
1442
0
        { /* Ordinary read */
1443
0
            long i;
1444
0
            i = fseek(file->fp, pos, SEEK_SET);
1445
0
            if (i < 0) { status = errno; goto done; }
1446
0
            ncbytessetlength(buf, 0);
1447
0
            if ((status = NC_readfileF(file->fp, buf, MAGIC_NUMBER_LEN))) goto done;
1448
0
            memcpy(magic, ncbytescontents(buf), MAGIC_NUMBER_LEN);
1449
0
        }
1450
0
    }
1451
1452
205
done:
1453
205
    ncbytesfree(buf);
1454
205
    if(file && file->fp) clearerr(file->fp);
1455
205
    return check(status);
1456
205
}
1457
1458
/**
1459
 * Close the file opened to check for magic number.
1460
 *
1461
 * @param file pointer to the MagicFile struct for this open file.
1462
 * @returns NC_NOERR for success
1463
 * @returns NC_EPARINIT if there was a problem closing file with MPI
1464
 * (parallel builds only).
1465
 * @author Dennis Heimbigner
1466
 */
1467
static int
1468
closemagic(struct MagicFile* file)
1469
114
{
1470
114
    int status = NC_NOERR;
1471
1472
114
    if(fIsSet(file->omode,NC_INMEMORY)) {
1473
  /* noop */
1474
114
    } else if(file->uri != NULL) {
1475
0
#ifdef ENABLE_BYTERANGE
1476
0
      status = nc_http_close(file->state);
1477
0
#endif
1478
0
      nullfree(file->curlurl);
1479
0
    } else {
1480
#ifdef USE_PARALLEL
1481
        if (file->use_parallel) {
1482
      int retval;
1483
      if(file->fh != MPI_FILE_NULL
1484
         && (retval = MPI_File_close(&file->fh)) != MPI_SUCCESS)
1485
        {status = NC_EPARINIT; return status;}
1486
        } else
1487
#endif
1488
0
        {
1489
0
      if(file->fp) fclose(file->fp);
1490
0
        }
1491
0
    }
1492
114
    return status;
1493
114
}
1494
1495
/*!
1496
  Interpret the magic number found in the header of a netCDF file.
1497
  This function interprets the magic number/string contained in the header of a netCDF file and sets the appropriate NC_FORMATX flags.
1498
1499
  @param[in] magic Pointer to a character array with the magic number block.
1500
  @param[out] model Pointer to an integer to hold the corresponding netCDF type.
1501
  @param[out] version Pointer to an integer to hold the corresponding netCDF version.
1502
  @returns NC_NOERR if a legitimate file type found
1503
  @returns NC_ENOTNC otherwise
1504
1505
\internal
1506
\ingroup datasets
1507
1508
*/
1509
static int
1510
NC_interpret_magic_number(char* magic, NCmodel* model)
1511
205
{
1512
205
    int status = NC_NOERR;
1513
205
    int tmpimpl = 0;
1514
    /* Look at the magic number */
1515
205
    if(model->impl == NC_FORMATX_UDF0 || model->impl == NC_FORMATX_UDF1)
1516
0
        tmpimpl = model->impl;
1517
1518
    /* Use the complete magic number string for HDF5 */
1519
205
    if(memcmp(magic,HDF5_SIGNATURE,sizeof(HDF5_SIGNATURE))==0) {
1520
0
  model->impl = NC_FORMATX_NC4;
1521
0
  model->format = NC_FORMAT_NETCDF4;
1522
0
  goto done;
1523
0
    }
1524
205
    if(magic[0] == '\016' && magic[1] == '\003'
1525
205
              && magic[2] == '\023' && magic[3] == '\001') {
1526
0
  model->impl = NC_FORMATX_NC_HDF4;
1527
0
  model->format = NC_FORMAT_NETCDF4;
1528
0
  goto done;
1529
0
    }
1530
205
    if(magic[0] == 'C' && magic[1] == 'D' && magic[2] == 'F') {
1531
128
        if(magic[3] == '\001') {
1532
35
      model->impl = NC_FORMATX_NC3;
1533
35
      model->format = NC_FORMAT_CLASSIC;
1534
35
      goto done;
1535
35
  }
1536
93
        if(magic[3] == '\002') {
1537
50
      model->impl = NC_FORMATX_NC3;
1538
50
      model->format = NC_FORMAT_64BIT_OFFSET;
1539
50
      goto done;
1540
50
        }
1541
43
        if(magic[3] == '\005') {
1542
43
    model->impl = NC_FORMATX_NC3;
1543
43
    model->format = NC_FORMAT_64BIT_DATA;
1544
43
    goto done;
1545
43
  }
1546
43
     }
1547
     /* No match  */
1548
77
     if (!tmpimpl) 
1549
77
         status = NC_ENOTNC;         
1550
1551
77
     goto done;
1552
1553
205
done:
1554
     /* if model->impl was UDF0 or UDF1 on entry, make it so on exit */
1555
205
     if(tmpimpl)
1556
0
         model->impl = tmpimpl;
1557
     /* if this is a UDF magic_number update the model->impl */
1558
205
     if (strlen(UDF0_magic_number) && !strncmp(UDF0_magic_number, magic,
1559
0
                                               strlen(UDF0_magic_number)))
1560
0
     {
1561
0
         model->impl = NC_FORMATX_UDF0;
1562
0
         status = NC_NOERR;
1563
0
     }
1564
205
     if (strlen(UDF1_magic_number) && !strncmp(UDF1_magic_number, magic,
1565
0
                                               strlen(UDF1_magic_number)))
1566
0
     {
1567
0
         model->impl = NC_FORMATX_UDF1;
1568
0
         status = NC_NOERR;
1569
0
     }    
1570
1571
205
     return check(status);
1572
205
}
1573
1574
#ifdef DEBUG
1575
static void
1576
printmagic(const char* tag, char* magic, struct MagicFile* f)
1577
{
1578
    int i;
1579
    fprintf(stderr,"%s: ispar=%d magic=",tag,f->use_parallel);
1580
    for(i=0;i<MAGIC_NUMBER_LEN;i++) {
1581
        unsigned int c = (unsigned int)magic[i];
1582
  c = c & 0x000000FF;
1583
  if(c == '\n')
1584
      fprintf(stderr," 0x%0x/'\\n'",c);
1585
  else if(c == '\r')
1586
      fprintf(stderr," 0x%0x/'\\r'",c);
1587
  else if(c < ' ')
1588
      fprintf(stderr," 0x%0x/'?'",c);
1589
  else
1590
      fprintf(stderr," 0x%0x/'%c'",c,c);
1591
    }
1592
    fprintf(stderr,"\n");
1593
    fflush(stderr);
1594
}
1595
1596
static void
1597
printlist(NClist* list, const char* tag)
1598
{
1599
    int i;
1600
    fprintf(stderr,"%s:",tag);
1601
    for(i=0;i<nclistlength(list);i++) {
1602
        fprintf(stderr," %s",(char*)nclistget(list,i));
1603
  fprintf(stderr,"[%p]",(char*)nclistget(list,i));
1604
    }
1605
    fprintf(stderr,"\n");
1606
    dbgflush();
1607
}
1608
1609
1610
#endif