Coverage Report

Created: 2025-10-28 07:06

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/netcdf-c/libdispatch/dinfermodel.c
Line
Count
Source
1
/**
2
 * @file
3
 *
4
 * Infer as much as possible from the omode + path.
5
 * Rewrite the path to a canonical form.
6
 *
7
 * Copyright 2018 University Corporation for Atmospheric
8
 * Research/Unidata. See COPYRIGHT file for more info.
9
*/
10
#include "config.h"
11
#include <stddef.h>
12
#include <stdlib.h>
13
#include <string.h>
14
#ifdef HAVE_UNISTD_H
15
#include <unistd.h>
16
#endif
17
#ifdef HAVE_SYS_TYPES_H
18
#include <sys/types.h>
19
#endif
20
#ifndef _WIN32
21
#ifdef USE_HDF5
22
#include <hdf5.h>
23
#endif /* USE_HDF5 */
24
#endif /* _WIN32 */
25
#ifdef HAVE_SYS_XATTR_H
26
#include <sys/xattr.h>
27
#endif
28
29
#include "ncdispatch.h"
30
#include "ncpathmgr.h"
31
#include "netcdf_mem.h"
32
#include "fbits.h"
33
#include "ncbytes.h"
34
#include "nclist.h"
35
#include "nclog.h"
36
#include "nchttp.h"
37
#include "ncutil.h"
38
#ifdef NETCDF_ENABLE_S3
39
#include "ncs3sdk.h"
40
#endif
41
42
#ifndef nulldup
43
 #define nulldup(x) ((x)?strdup(x):(x))
44
#endif
45
46
#undef DEBUG
47
48
/* If Defined, then use only stdio for all magic number io;
49
   otherwise use stdio or mpio as required.
50
 */
51
#undef USE_STDIO
52
53
/**
54
Sort info for open/read/close of
55
file when searching for magic numbers
56
*/
57
struct MagicFile {
58
    const char* path;
59
    struct NCURI* uri;
60
    int omode;
61
    NCmodel* model;
62
    long long filelen;
63
    int use_parallel;
64
    int iss3;
65
    void* parameters; /* !NULL if inmemory && !diskless */
66
    FILE* fp;
67
#ifdef USE_PARALLEL
68
    MPI_File fh;
69
#endif
70
    char* curlurl; /* url to use with CURLOPT_SET_URL */
71
    NC_HTTP_STATE* state;
72
#ifdef NETCDF_ENABLE_S3
73
    NCS3INFO s3;
74
    void* s3client;
75
    char* errmsg;
76
#endif
77
};
78
79
/** @internal Magic number for HDF5 files. To be consistent with
80
 * H5Fis_hdf5, use the complete HDF5 magic number */
81
static char HDF5_SIGNATURE[MAGIC_NUMBER_LEN] = "\211HDF\r\n\032\n";
82
83
630
#define modelcomplete(model) ((model)->impl != 0)
84
85
#ifdef DEBUG
86
static void dbgflush(void)
87
{
88
    fflush(stdout);
89
    fflush(stderr);
90
}
91
92
static void
93
fail(int err)
94
{
95
    return;
96
}
97
98
static int
99
check(int err)
100
{
101
    if(err != NC_NOERR)
102
  fail(err);
103
    return err;
104
}
105
#else
106
2.96k
#define check(err) (err)
107
#endif
108
109
/*
110
Define a table of "mode=" string values
111
from which the implementation can be inferred.
112
Note that only cases that can currently
113
take URLs are included.
114
*/
115
static struct FORMATMODES {
116
    const char* tag;
117
    const int impl; /* NC_FORMATX_XXX value */
118
    const int format; /* NC_FORMAT_XXX value */
119
} formatmodes[] = {
120
{"dap2",NC_FORMATX_DAP2,NC_FORMAT_CLASSIC},
121
{"dap4",NC_FORMATX_DAP4,NC_FORMAT_NETCDF4},
122
{"netcdf-3",NC_FORMATX_NC3,0}, /* Might be e.g. cdf5 */
123
{"classic",NC_FORMATX_NC3,0}, /* ditto */
124
{"netcdf-4",NC_FORMATX_NC4,NC_FORMAT_NETCDF4},
125
{"enhanced",NC_FORMATX_NC4,NC_FORMAT_NETCDF4},
126
{"udf0",NC_FORMATX_UDF0,0},
127
{"udf1",NC_FORMATX_UDF1,0},
128
{"nczarr",NC_FORMATX_NCZARR,NC_FORMAT_NETCDF4},
129
{"zarr",NC_FORMATX_NCZARR,NC_FORMAT_NETCDF4},
130
{"bytes",NC_FORMATX_NC4,NC_FORMAT_NETCDF4}, /* temporary until 3 vs 4 is determined */
131
{NULL,0},
132
};
133
134
/* Replace top-level name with defkey=defvalue */
135
static const struct MACRODEF {
136
    char* name;
137
    char* defkey;
138
    char* defvalues[4];
139
} macrodefs[] = {
140
{"zarr","mode",{"nczarr","zarr",NULL}},
141
{"dap2","mode",{"dap2",NULL}},
142
{"dap4","mode",{"dap4",NULL}},
143
{"s3","mode",{"s3","nczarr",NULL}},
144
{"bytes","mode",{"bytes",NULL}},
145
{"xarray","mode",{"zarr", NULL}},
146
{"noxarray","mode",{"nczarr", "noxarray", NULL}},
147
{"zarr","mode",{"nczarr","zarr", NULL}},
148
{"gs3","mode",{"gs3","nczarr",NULL}}, /* Google S3 API */
149
{NULL,NULL,{NULL}}
150
};
151
152
/*
153
Mode inferences: if mode contains key value, then add the inferred value;
154
Warning: be careful how this list is constructed to avoid infinite inferences.
155
In order to (mostly) avoid that consequence, any attempt to
156
infer a value that is already present will be ignored.
157
This effectively means that the inference graph
158
must be a DAG and may not have cycles.
159
You have been warned.
160
*/
161
static const struct MODEINFER {
162
    char* key;
163
    char* inference;
164
} modeinferences[] = {
165
{"zarr","nczarr"},
166
{"xarray","zarr"},
167
{"noxarray","nczarr"},
168
{"noxarray","zarr"},
169
{NULL,NULL}
170
};
171
172
/* Mode negations: if mode contains key, then remove all occurrences of the inference and repeat */
173
static const struct MODEINFER modenegations[] = {
174
{"bytes","nczarr"}, /* bytes negates (nc)zarr */
175
{"bytes","zarr"},
176
{"noxarray","xarray"},
177
{NULL,NULL}
178
};
179
180
/* Map FORMATX to readability to get magic number */
181
static struct Readable {
182
    int impl;
183
    int readable;
184
} readable[] = {
185
{NC_FORMATX_NC3,1},
186
{NC_FORMATX_NC_HDF5,1},
187
{NC_FORMATX_NC_HDF4,1},
188
{NC_FORMATX_PNETCDF,1},
189
{NC_FORMATX_DAP2,0},
190
{NC_FORMATX_DAP4,0},
191
{NC_FORMATX_UDF0,1},
192
{NC_FORMATX_UDF1,1},
193
{NC_FORMATX_NCZARR,0}, /* eventually make readable */
194
{0,0},
195
};
196
197
/* Define the known URL protocols and their interpretation */
198
static struct NCPROTOCOLLIST {
199
    const char* protocol;
200
    const char* substitute;
201
    const char* fragments; /* arbitrary fragment arguments */
202
} ncprotolist[] = {
203
    {"http",NULL,NULL},
204
    {"https",NULL,NULL},
205
    {"file",NULL,NULL},
206
    {"dods","http","mode=dap2"},
207
    {"dap4","http","mode=dap4"},
208
    {"s3","s3","mode=s3"},
209
    {"gs3","gs3","mode=gs3"},
210
    {NULL,NULL,NULL} /* Terminate search */
211
};
212
213
/* Forward */
214
static int NC_omodeinfer(int useparallel, int omode, NCmodel*);
215
static int check_file_type(const char *path, int omode, int use_parallel, void *parameters, NCmodel* model, NCURI* uri);
216
static int processuri(const char* path, NCURI** urip, NClist* fraglist);
217
static int processmacros(NClist* fraglistp, NClist* expanded);
218
static char* envvlist2string(NClist* pairs, const char*);
219
static void set_default_mode(int* cmodep);
220
static int parseonchar(const char* s, int ch, NClist* segments);
221
static int mergelist(NClist** valuesp);
222
223
static int openmagic(struct MagicFile* file);
224
static int readmagic(struct MagicFile* file, size_t pos, char* magic);
225
static int closemagic(struct MagicFile* file);
226
static int NC_interpret_magic_number(char* magic, NCmodel* model);
227
#ifdef DEBUG
228
static void printmagic(const char* tag, char* magic,struct MagicFile*);
229
static void printlist(NClist* list, const char* tag);
230
#endif
231
static int isreadable(NCURI*,NCmodel*);
232
static char* list2string(NClist*);
233
static int parsepair(const char* pair, char** keyp, char** valuep);
234
static NClist* parsemode(const char* modeval);
235
static const char* getmodekey(const NClist* envv);
236
static int replacemode(NClist* envv, const char* newval);
237
static void infernext(NClist* current, NClist* next);
238
static int negateone(const char* mode, NClist* modes);
239
static void cleanstringlist(NClist* strs, int caseinsensitive);
240
static int isdaoscontainer(const char* path);
241
242
/*
243
If the path looks like a URL, then parse it, reformat it.
244
*/
245
static int
246
processuri(const char* path, NCURI** urip, NClist* fraglenv)
247
339
{
248
339
    int stat = NC_NOERR;
249
339
    int found = 0;
250
339
    NClist* tmp = NULL;
251
339
    struct NCPROTOCOLLIST* protolist;
252
339
    NCURI* uri = NULL;
253
339
    size_t pathlen = strlen(path);
254
339
    char* str = NULL;
255
339
    const NClist* ufrags;
256
339
    size_t i;
257
258
339
    if(path == NULL || pathlen == 0) {stat = NC_EURL; goto done;}
259
260
    /* Defaults */
261
339
    if(urip) *urip = NULL;
262
263
339
    ncuriparse(path,&uri);
264
339
    if(uri == NULL) goto done; /* not url */
265
266
    /* Look up the protocol */
267
0
    for(found=0,protolist=ncprotolist;protolist->protocol;protolist++) {
268
0
        if(strcmp(uri->protocol,protolist->protocol) == 0) {
269
0
      found = 1;
270
0
      break;
271
0
  }
272
0
    }
273
0
    if(!found)
274
0
  {stat = NC_EINVAL; goto done;} /* unrecognized URL form */
275
276
    /* process the corresponding fragments for that protocol */
277
0
    if(protolist->fragments != NULL) {
278
0
  tmp = nclistnew();
279
0
  if((stat = parseonchar(protolist->fragments,'&',tmp))) goto done;
280
0
  for(i=0;i<nclistlength(tmp);i++) {
281
0
      char* key=NULL;
282
0
          char* value=NULL;
283
0
      if((stat = parsepair(nclistget(tmp,i),&key,&value))) goto done;
284
0
      if(value == NULL) value = strdup("");
285
0
      nclistpush(fraglenv,key);
286
0
          nclistpush(fraglenv,value);
287
0
  }
288
0
  nclistfreeall(tmp); tmp = NULL;
289
0
    }
290
291
    /* Substitute the protocol in any case */
292
0
    if(protolist->substitute) ncurisetprotocol(uri,protolist->substitute);
293
294
    /* capture the fragments of the url */
295
0
    ufrags = (const NClist*)ncurifragmentparams(uri);
296
0
    for(i=0;i<nclistlength(ufrags);i+=2) {
297
0
  const char* key = nclistget(ufrags,i);
298
0
  const char* value = nclistget(ufrags,i+1);
299
0
        nclistpush(fraglenv,nulldup(key));
300
0
  value = (value==NULL?"":value);
301
0
  nclistpush(fraglenv,strdup(value));
302
0
    }
303
0
    if(urip) {
304
0
  *urip = uri;
305
0
  uri = NULL;
306
0
    }
307
308
339
done:
309
339
    nclistfreeall(tmp);
310
339
    nullfree(str);
311
339
    if(uri != NULL) ncurifree(uri);
312
339
    return check(stat);
313
0
}
314
315
/* Split a key=value pair */
316
static int
317
parsepair(const char* pair, char** keyp, char** valuep)
318
0
{
319
0
    const char* p;
320
0
    char* key = NULL;
321
0
    char* value = NULL;
322
323
0
    if(pair == NULL)
324
0
        return NC_EINVAL; /* empty pair */
325
0
    if(pair[0] == '\0' || pair[0] == '=')
326
0
        return NC_EINVAL; /* no key */
327
0
    p = strchr(pair,'=');
328
0
    if(p == NULL) {
329
0
  value = NULL;
330
0
  key = strdup(pair);
331
0
    } else {
332
0
  ptrdiff_t len = (p-pair);
333
0
  if((key = malloc((size_t)len+1))==NULL) return NC_ENOMEM;
334
0
  memcpy(key,pair,(size_t)len);
335
0
  key[len] = '\0';
336
0
  if(p[1] == '\0')
337
0
      value = NULL;
338
0
  else
339
0
      value = strdup(p+1);
340
0
    }
341
0
    if(keyp) {*keyp = key; key = NULL;};
342
0
    if(valuep) {*valuep = value; value = NULL;};
343
0
    nullfree(key);
344
0
    nullfree(value);
345
0
    return NC_NOERR;
346
0
}
347
348
#if 0
349
static int
350
parseurlmode(const char* modestr, NClist* list)
351
{
352
    int stat = NC_NOERR;
353
    const char* p = NULL;
354
    const char* endp = NULL;
355
356
    if(modestr == NULL || *modestr == '\0') goto done;
357
358
    /* Split modestr at the commas or EOL */
359
    p = modestr;
360
    for(;;) {
361
  char* s;
362
  ptrdiff_t slen;
363
  endp = strchr(p,',');
364
  if(endp == NULL) endp = p + strlen(p);
365
  slen = (endp - p);
366
  if((s = malloc(slen+1)) == NULL) {stat = NC_ENOMEM; goto done;}
367
  memcpy(s,p,slen);
368
  s[slen] = '\0';
369
  nclistpush(list,s);
370
  if(*endp == '\0') break;
371
  p = endp+1;
372
    }
373
374
done:
375
    return check(stat);
376
}
377
#endif
378
379
/* Split a string at a given char */
380
static int
381
parseonchar(const char* s, int ch, NClist* segments)
382
0
{
383
0
    int stat = NC_NOERR;
384
0
    const char* p = NULL;
385
0
    const char* endp = NULL;
386
387
0
    if(s == NULL || *s == '\0') goto done;
388
389
0
    p = s;
390
0
    for(;;) {
391
0
  char* q;
392
0
  ptrdiff_t slen;
393
0
  endp = strchr(p,ch);
394
0
  if(endp == NULL) endp = p + strlen(p);
395
0
  slen = (endp - p);
396
0
  if((q = malloc((size_t)slen+1)) == NULL) {stat = NC_ENOMEM; goto done;}
397
0
  memcpy(q,p,(size_t)slen);
398
0
  q[slen] = '\0';
399
0
  nclistpush(segments,q);
400
0
  if(*endp == '\0') break;
401
0
  p = endp+1;
402
0
    }
403
404
0
done:
405
0
    return check(stat);
406
0
}
407
408
/* Convert a key,value envv pairlist into a delimited string*/
409
static char*
410
envvlist2string(NClist* envv, const char* delim)
411
0
{
412
0
    size_t i;
413
0
    NCbytes* buf = NULL;
414
0
    char* result = NULL;
415
416
0
    if(envv == NULL || nclistlength(envv) == 0) return NULL;
417
0
    buf = ncbytesnew();
418
0
    for(i=0;i<nclistlength(envv);i+=2) {
419
0
  const char* key = nclistget(envv,i);
420
0
  const char* val = nclistget(envv,i+1);
421
0
  if(key == NULL || strlen(key) == 0) continue;
422
0
  assert(val != NULL);
423
0
  if(i > 0) ncbytescat(buf,"&");
424
0
  ncbytescat(buf,key);
425
0
  if(val != NULL && val[0] != '\0') {
426
0
      ncbytescat(buf,"=");
427
0
      ncbytescat(buf,val);
428
0
  }
429
0
    }
430
0
    result = ncbytesextract(buf);
431
0
    ncbytesfree(buf);
432
0
    return result;
433
0
}
434
435
/* Given a mode= argument, fill in the impl */
436
static int
437
processmodearg(const char* arg, NCmodel* model)
438
0
{
439
0
    int stat = NC_NOERR;
440
0
    struct FORMATMODES* format = formatmodes;
441
0
    for(;format->tag;format++) {
442
0
  if(strcmp(format->tag,arg)==0) {
443
0
            model->impl = format->impl;
444
0
      if(format->format != 0) model->format = format->format;
445
0
  }
446
0
    }
447
0
    return check(stat);
448
0
}
449
450
/* Given an envv fragment list, do macro replacement */
451
static int
452
processmacros(NClist* fraglenv, NClist* expanded)
453
0
{
454
0
    size_t i;
455
0
    int stat = NC_NOERR;
456
0
    const struct MACRODEF* macros = NULL;
457
458
0
    for(i=0;i<nclistlength(fraglenv);i+=2) {
459
0
  int found = 0;
460
0
  char* key = nclistget(fraglenv,i);
461
0
  char* value = nclistget(fraglenv,i+1);
462
0
  if(strlen(value) == 0) { /* must be a singleton  */
463
0
            for(macros=macrodefs;macros->name;macros++) {
464
0
                if(strcmp(macros->name,key)==0) {
465
0
        char* const * p;
466
0
        nclistpush(expanded,strdup(macros->defkey));
467
0
        for(p=macros->defvalues;*p;p++) 
468
0
      nclistpush(expanded,strdup(*p));
469
0
        found = 1;        
470
0
        break;
471
0
          }
472
0
      }
473
0
  }
474
0
  if(!found) {/* pass thru */
475
0
      nclistpush(expanded,strdup(key));
476
0
          nclistpush(expanded,strdup(value));
477
0
  }
478
0
    }
479
480
0
    return check(stat);
481
0
}
482
483
/* Process mode flag inferences */
484
static int
485
processinferences(NClist* fraglenv)
486
0
{
487
0
    int stat = NC_NOERR;
488
0
    const char* modeval = NULL;
489
0
    NClist* newmodes = nclistnew();
490
0
    NClist* currentmodes = NULL;
491
0
    NClist* nextmodes = nclistnew();
492
0
    size_t i;
493
0
    char* newmodeval = NULL;
494
495
    /* Get "mode" entry */
496
0
    if((modeval = getmodekey(fraglenv))==NULL) goto done;
497
498
    /* Get the mode as list */
499
0
    currentmodes = parsemode(modeval);
500
501
#ifdef DEBUG
502
    printlist(currentmodes,"processinferences: initial mode list");
503
#endif
504
505
    /* Do what amounts to breadth first inferencing down the inference DAG. */
506
507
0
    for(;;) {
508
0
        NClist* tmp = NULL;
509
        /* Compute the next set of inferred modes */
510
#ifdef DEBUG
511
printlist(currentmodes,"processinferences: current mode list");
512
#endif
513
0
        infernext(currentmodes,nextmodes);
514
#ifdef DEBUG
515
printlist(nextmodes,"processinferences: next mode list");
516
#endif
517
        /* move current modes into list of newmodes */
518
0
        for(i=0;i<nclistlength(currentmodes);i++) {
519
0
      nclistpush(newmodes,nclistget(currentmodes,i));
520
0
  }
521
0
        nclistsetlength(currentmodes,0); /* clear current mode list */
522
0
        if(nclistlength(nextmodes) == 0) break; /* nothing more to do */
523
#ifdef DEBUG
524
printlist(newmodes,"processinferences: new mode list");
525
#endif
526
  /* Swap current and next */
527
0
        tmp = currentmodes;
528
0
  currentmodes = nextmodes;
529
0
  nextmodes = tmp;
530
0
        tmp = NULL;
531
0
    }
532
    /* cleanup any unused elements in currentmodes */
533
0
    nclistclearall(currentmodes);
534
535
    /* Ensure no duplicates */
536
0
    cleanstringlist(newmodes,1);
537
538
#ifdef DEBUG
539
    printlist(newmodes,"processinferences: final inferred mode list");
540
#endif
541
542
   /* Remove negative inferences */
543
0
   for(i=0;i<nclistlength(newmodes);i++) {
544
0
  const char* mode = nclistget(newmodes,i);
545
0
  negateone(mode,newmodes);
546
0
    }
547
548
    /* Store new mode value */
549
0
    if((newmodeval = list2string(newmodes))== NULL)
550
0
  {stat = NC_ENOMEM; goto done;}        
551
0
    if((stat=replacemode(fraglenv,newmodeval))) goto done;
552
0
    modeval = NULL;
553
554
0
done:
555
0
    nullfree(newmodeval);
556
0
    nclistfreeall(newmodes);
557
0
    nclistfreeall(currentmodes);
558
0
    nclistfreeall(nextmodes);
559
0
    return check(stat);
560
0
}
561
562
563
static int
564
negateone(const char* mode, NClist* newmodes)
565
0
{
566
0
    const struct MODEINFER* tests = modenegations;
567
0
    int changed = 0;
568
0
    for(;tests->key;tests++) {
569
0
  if(strcasecmp(tests->key,mode)==0) {
570
      /* Find and remove all instances of the inference value */
571
0
      for(size_t i = nclistlength(newmodes); i-- > 0;) {
572
0
    char* candidate = nclistget(newmodes,i);
573
0
    if(strcasecmp(candidate,tests->inference)==0) {
574
0
        nclistremove(newmodes,i);
575
0
        nullfree(candidate);
576
0
              changed = 1;
577
0
    }
578
0
      }
579
0
        }
580
0
    }
581
0
    return changed;
582
0
}
583
584
static void
585
infernext(NClist* current, NClist* next)
586
0
{
587
0
    size_t i;
588
0
    for(i=0;i<nclistlength(current);i++) {
589
0
        const struct MODEINFER* tests = NULL;
590
0
  const char* cur = nclistget(current,i);
591
0
        for(tests=modeinferences;tests->key;tests++) {
592
0
      if(strcasecmp(tests->key,cur)==0) {
593
          /* Append the inferred mode unless dup */
594
0
    if(!nclistmatch(next,tests->inference,1))
595
0
              nclistpush(next,strdup(tests->inference));
596
0
      }
597
0
        }
598
0
    }
599
0
}
600
601
/*
602
Given a list of strings, remove nulls and duplicates
603
*/
604
static int
605
mergelist(NClist** valuesp)
606
0
{
607
0
    size_t i,j;
608
0
    int stat = NC_NOERR;
609
0
    NClist* values = *valuesp;
610
0
    NClist* allvalues = nclistnew();
611
0
    NClist* newvalues = nclistnew();
612
0
    char* value = NULL;
613
614
0
    for(i=0;i<nclistlength(values);i++) {
615
0
  char* val1 = nclistget(values,i);
616
  /* split on commas and put pieces into allvalues */
617
0
  if((stat=parseonchar(val1,',',allvalues))) goto done;
618
0
    }
619
    /* Remove duplicates and "" */
620
0
    while(nclistlength(allvalues) > 0) {
621
0
  value = nclistremove(allvalues,0);
622
0
  if(strlen(value) == 0) {
623
0
      nullfree(value); value = NULL;
624
0
  } else {
625
0
      for(j=0;j<nclistlength(newvalues);j++) {
626
0
          char* candidate = nclistget(newvalues,j);
627
0
          if(strcasecmp(candidate,value)==0)
628
0
              {nullfree(value); value = NULL; break;}
629
0
       }
630
0
  }
631
0
  if(value != NULL) {nclistpush(newvalues,value); value = NULL;}
632
0
    }
633
    /* Make sure to have at least 1 value */
634
0
    if(nclistlength(newvalues)==0) nclistpush(newvalues,strdup(""));
635
0
    *valuesp = values; values = NULL;
636
637
0
done:
638
0
    nclistfree(allvalues);
639
0
    nclistfreeall(values);
640
0
    nclistfreeall(newvalues);
641
0
    return check(stat);
642
0
}
643
644
static int
645
lcontains(NClist* l, const char* key0)
646
0
{
647
0
    size_t i;
648
0
    for(i=0;i<nclistlength(l);i++) {
649
0
        const char* key1 = nclistget(l,i);
650
0
  if(strcasecmp(key0,key1)==0) return 1;
651
0
    }
652
0
    return 0;
653
0
}
654
655
/* Warning values should not use nclistfreeall */
656
static void
657
collectvaluesbykey(NClist* fraglenv, const char* key, NClist* values)
658
0
{
659
0
    size_t i;
660
    /* collect all the values with the same key (including this one) */
661
0
    for(i=0;i<nclistlength(fraglenv);i+=2) {
662
0
        const char* key2 = nclistget(fraglenv,i);
663
0
        if(strcasecmp(key,key2)==0) {
664
0
      const char* value2 = nclistget(fraglenv,i+1);
665
0
      nclistpush(values,value2); value2 = NULL;
666
0
  }
667
0
    }
668
0
}
669
670
/* Warning allkeys should not use nclistfreeall */
671
static void
672
collectallkeys(NClist* fraglenv, NClist* allkeys)
673
0
{
674
0
    size_t i;
675
    /* collect all the distinct keys */
676
0
    for(i=0;i<nclistlength(fraglenv);i+=2) {
677
0
  char* key = nclistget(fraglenv,i);
678
0
  if(!lcontains(allkeys,key)) {
679
0
      nclistpush(allkeys,key);
680
0
  }
681
0
    }
682
0
}
683
684
/* Given a fragment envv list, coalesce duplicate keys and remove duplicate values*/
685
static int
686
cleanfragments(NClist* fraglenv, NClist* newlist)
687
0
{
688
0
    size_t i;
689
0
    int stat = NC_NOERR;
690
0
    NClist* tmp = NULL;
691
0
    NClist* allkeys = NULL;
692
0
    NCbytes* buf = NULL;
693
0
    char* key = NULL;
694
0
    char* value = NULL;
695
696
0
    buf = ncbytesnew();
697
0
    allkeys = nclistnew();
698
0
    tmp = nclistnew();
699
700
    /* collect all unique keys */
701
0
    collectallkeys(fraglenv,allkeys);
702
    /* Collect all values for same key across all fragment pairs */
703
0
    for(i=0;i<nclistlength(allkeys);i++) {
704
0
  key = nclistget(allkeys,i);
705
0
  collectvaluesbykey(fraglenv,key,tmp);
706
  /* merge the key values, remove duplicate */
707
0
  if((stat=mergelist(&tmp))) goto done;
708
        /* Construct key,value pair and insert into newlist */
709
0
  key = strdup(key);
710
0
  nclistpush(newlist,key);
711
0
  value = list2string(tmp);
712
0
  nclistpush(newlist,value);
713
0
  nclistclear(tmp);
714
0
    }
715
0
done:
716
0
    nclistfree(allkeys);
717
0
    nclistfree(tmp);
718
0
    ncbytesfree(buf);
719
0
    return check(stat);
720
0
}
721
722
/* process non-mode fragment keys in case they hold significance; currently not */
723
static int
724
processfragmentkeys(const char* key, const char* value, NCmodel* model)
725
0
{
726
0
    return NC_NOERR;
727
0
}
728
729
/*
730
Infer from the mode + useparallel
731
only call if iscreate or file is not easily readable.
732
*/
733
static int
734
NC_omodeinfer(int useparallel, int cmode, NCmodel* model)
735
339
{
736
339
    int stat = NC_NOERR;
737
738
    /* If no format flags are set, then use default */
739
339
    if(!fIsSet(cmode,NC_FORMAT_ALL))
740
339
  set_default_mode(&cmode);
741
742
    /* Process the cmode; may override some already set flags. The
743
     * user-defined formats must be checked first. They may choose to
744
     * use some of the other flags, like NC_NETCDF4, so we must first
745
     * check NC_UDF0 and NC_UDF1 before checking for any other
746
     * flag. */
747
339
    if(fIsSet(cmode, NC_UDF0)  || fIsSet(cmode, NC_UDF1))
748
0
    {
749
0
        if(fIsSet(cmode, NC_UDF0))
750
0
        {
751
0
      model->impl = NC_FORMATX_UDF0;
752
0
  } else {
753
0
      model->impl = NC_FORMATX_UDF1;
754
0
  }
755
0
        if(fIsSet(cmode,NC_64BIT_OFFSET)) 
756
0
        {
757
0
            model->format = NC_FORMAT_64BIT_OFFSET;
758
0
        }
759
0
        else if(fIsSet(cmode,NC_64BIT_DATA))
760
0
        {
761
0
            model->format = NC_FORMAT_64BIT_DATA;
762
0
        }
763
0
        else if(fIsSet(cmode,NC_NETCDF4))
764
0
        {
765
0
            if(fIsSet(cmode,NC_CLASSIC_MODEL))
766
0
                model->format = NC_FORMAT_NETCDF4_CLASSIC;
767
0
            else
768
0
                model->format = NC_FORMAT_NETCDF4;
769
0
        }
770
0
        if(! model->format)
771
0
            model->format = NC_FORMAT_CLASSIC;
772
0
  goto done;
773
0
    }
774
775
339
    if(fIsSet(cmode,NC_64BIT_OFFSET)) {
776
0
  model->impl = NC_FORMATX_NC3;
777
0
  model->format = NC_FORMAT_64BIT_OFFSET;
778
0
        goto done;
779
0
    }
780
781
339
    if(fIsSet(cmode,NC_64BIT_DATA)) {
782
0
  model->impl = NC_FORMATX_NC3;
783
0
  model->format = NC_FORMAT_64BIT_DATA;
784
0
        goto done;
785
0
    }
786
787
339
    if(fIsSet(cmode,NC_NETCDF4)) {
788
0
  model->impl = NC_FORMATX_NC4;
789
0
        if(fIsSet(cmode,NC_CLASSIC_MODEL))
790
0
      model->format = NC_FORMAT_NETCDF4_CLASSIC;
791
0
  else
792
0
      model->format = NC_FORMAT_NETCDF4;
793
0
        goto done;
794
0
    }
795
796
    /* Default to classic model */
797
339
    model->format = NC_FORMAT_CLASSIC;
798
339
    model->impl = NC_FORMATX_NC3;
799
800
339
done:
801
    /* Apply parallel flag */
802
339
    if(useparallel) {
803
0
        if(model->impl == NC_FORMATX_NC3)
804
0
      model->impl = NC_FORMATX_PNETCDF;
805
0
    }
806
339
    return check(stat);
807
339
}
808
809
/*
810
If the mode flags do not necessarily specify the
811
format, then default it by adding in appropriate flags.
812
*/
813
814
static void
815
set_default_mode(int* modep)
816
339
{
817
339
    int mode = *modep;
818
339
    int dfaltformat;
819
820
339
    dfaltformat = nc_get_default_format();
821
339
    switch (dfaltformat) {
822
0
    case NC_FORMAT_64BIT_OFFSET: mode |= NC_64BIT_OFFSET; break;
823
0
    case NC_FORMAT_64BIT_DATA: mode |= NC_64BIT_DATA; break;
824
0
    case NC_FORMAT_NETCDF4: mode |= NC_NETCDF4; break;
825
0
    case NC_FORMAT_NETCDF4_CLASSIC: mode |= (NC_NETCDF4|NC_CLASSIC_MODEL); break;
826
339
    case NC_FORMAT_CLASSIC: /* fall thru */
827
339
    default: break; /* default to classic */
828
339
    }
829
339
    *modep = mode; /* final result */
830
339
}
831
832
/**************************************************/
833
/*
834
   Infer model for this dataset using some
835
   combination of cmode, path, and reading the dataset.
836
   See the documentation in docs/internal.dox.
837
838
@param path
839
@param omode
840
@param iscreate
841
@param useparallel
842
@param params
843
@param model
844
@param newpathp
845
*/
846
847
int
848
NC_infermodel(const char* path, int* omodep, int iscreate, int useparallel, void* params, NCmodel* model, char** newpathp)
849
339
{
850
339
    size_t i;
851
339
    int stat = NC_NOERR;
852
339
    NCURI* uri = NULL;
853
339
    int omode = *omodep;
854
339
    NClist* fraglenv = nclistnew();
855
339
    NClist* modeargs = nclistnew();
856
339
    char* sfrag = NULL;
857
339
    const char* modeval = NULL;
858
339
    char* abspath = NULL;
859
339
    NClist* tmp = NULL;
860
861
    /* Phase 1:
862
       1. convert special protocols to http|https
863
       2. begin collecting fragments
864
    */
865
339
    if((stat = processuri(path, &uri, fraglenv))) goto done;
866
867
339
    if(uri != NULL) {
868
#ifdef DEBUG
869
  printlist(fraglenv,"processuri");
870
#endif
871
872
        /* Phase 2: Expand macros and add to fraglenv */
873
0
  nclistfreeall(tmp);
874
0
  tmp = nclistnew();
875
0
        if((stat = processmacros(fraglenv,tmp))) goto done;
876
0
  nclistfreeall(fraglenv);
877
0
  fraglenv = tmp; tmp = NULL;
878
#ifdef DEBUG
879
  printlist(fraglenv,"processmacros");
880
#endif
881
  /* Cleanup the fragment list */
882
0
  nclistfreeall(tmp);
883
0
  tmp = nclistnew();
884
0
        if((stat = cleanfragments(fraglenv,tmp))) goto done;
885
0
  nclistfreeall(fraglenv);
886
0
  fraglenv = tmp; tmp = NULL;
887
888
        /* Phase 2a: Expand mode inferences and add to fraglenv */
889
0
        if((stat = processinferences(fraglenv))) goto done;
890
#ifdef DEBUG
891
  printlist(fraglenv,"processinferences");
892
#endif
893
894
        /* Phase 3: coalesce duplicate fragment keys and remove duplicate values */
895
0
  nclistfreeall(tmp);
896
0
  tmp = nclistnew();
897
0
        if((stat = cleanfragments(fraglenv,tmp))) goto done;
898
0
  nclistfreeall(fraglenv);
899
0
  fraglenv = tmp; tmp = NULL;
900
#ifdef DEBUG
901
  printlist(fraglenv,"cleanfragments");
902
#endif
903
904
        /* Phase 4: Rebuild the url fragment and rebuilt the url */
905
0
        sfrag = envvlist2string(fraglenv,"&");
906
0
        nclistfreeall(fraglenv); fraglenv = NULL;
907
#ifdef DEBUG
908
  fprintf(stderr,"frag final: %s\n",sfrag);
909
#endif
910
0
        ncurisetfragments(uri,sfrag);
911
0
        nullfree(sfrag); sfrag = NULL;
912
913
#ifdef NETCDF_ENABLE_S3
914
  /* If s3, then rebuild the url */
915
  if(NC_iss3(uri,NULL)) {
916
      NCURI* newuri = NULL;
917
      if((stat = NC_s3urlrebuild(uri,NULL,&newuri))) goto done;
918
      ncurifree(uri);
919
      uri = newuri;
920
  } else
921
#endif
922
0
  if(strcmp(uri->protocol,"file")==0) {
923
            /* convert path to absolute */
924
0
      char* canon = NULL;
925
0
      abspath = NCpathabsolute(uri->path);
926
0
      if((stat = NCpathcanonical(abspath,&canon))) goto done;
927
0
      nullfree(abspath);
928
0
      abspath = canon; canon = NULL;
929
0
      if((stat = ncurisetpath(uri,abspath))) goto done;
930
0
  }
931
  
932
  /* rebuild the path */
933
0
        if(newpathp) {
934
0
            *newpathp = ncuribuild(uri,NULL,NULL,NCURIALL);
935
#ifdef DEBUG
936
      fprintf(stderr,"newpath=|%s|\n",*newpathp); fflush(stderr);
937
#endif    
938
0
  }
939
940
        /* Phase 5: Process the mode key to see if we can tell the formatx */
941
0
        modeval = ncurifragmentlookup(uri,"mode");
942
0
        if(modeval != NULL) {
943
0
      if((stat = parseonchar(modeval,',',modeargs))) goto done;
944
0
            for(i=0;i<nclistlength(modeargs);i++) {
945
0
          const char* arg = nclistget(modeargs,i);
946
0
          if((stat=processmodearg(arg,model))) goto done;
947
0
            }
948
0
  }
949
950
        /* Phase 6: Process the non-mode keys to see if we can tell the formatx */
951
0
  if(!modelcomplete(model)) {
952
0
      size_t i;
953
0
      NClist* p = (NClist*)ncurifragmentparams(uri); /* envv format */
954
0
      for(i=0;i<nclistlength(p);i+=2) {
955
0
    const char* key = nclistget(p,0);
956
0
    const char* value = nclistget(p,1);
957
0
    if((stat=processfragmentkeys(key,value,model))) goto done;
958
0
      }
959
0
  }
960
961
        /* Phase 7: Special cases: if this is a URL and model.impl is still not defined */
962
        /* Phase7a: Default is DAP2 */
963
0
        if(!modelcomplete(model)) {
964
0
      model->impl = NC_FORMATX_DAP2;
965
0
      model->format = NC_FORMAT_NC3;
966
0
        }
967
968
339
    } else {/* Not URL */
969
339
  if(newpathp) *newpathp = NULL;
970
339
    }
971
972
    /* Phase 8: mode inference from mode flags */
973
    /* The modeargs did not give us a model (probably not a URL).
974
       So look at the combination of mode flags and the useparallel flag */
975
339
    if(!modelcomplete(model)) {
976
339
        if((stat = NC_omodeinfer(useparallel,omode,model))) goto done;
977
339
    }
978
979
    /* Phase 9: Special case for file stored in DAOS container */
980
339
    if(isdaoscontainer(path) == NC_NOERR) {
981
        /* This is a DAOS container, so immediately assume it is HDF5. */
982
0
        model->impl = NC_FORMATX_NC_HDF5;
983
0
        model->format = NC_FORMAT_NETCDF4;
984
339
    } else {
985
        /* Phase 10: Infer from file content, if possible;
986
           this has highest precedence, so it may override
987
           previous decisions. Note that we do this last
988
           because we need previously determined model info
989
           to guess if this file is readable.
990
        */
991
339
        if(!iscreate && isreadable(uri,model)) {
992
       /* Ok, we need to try to read the file */
993
339
            if((stat = check_file_type(path, omode, useparallel, params, model, uri))) goto done;
994
339
        }
995
339
    }
996
997
    /* Need a decision */
998
291
    if(!modelcomplete(model))
999
0
  {stat = NC_ENOTNC; goto done;}
1000
1001
    /* Force flag consistency */
1002
291
    switch (model->impl) {
1003
0
    case NC_FORMATX_NC4:
1004
0
    case NC_FORMATX_NC_HDF4:
1005
0
    case NC_FORMATX_DAP4:
1006
0
    case NC_FORMATX_NCZARR:
1007
0
  omode |= NC_NETCDF4;
1008
0
  if(model->format == NC_FORMAT_NETCDF4_CLASSIC)
1009
0
      omode |= NC_CLASSIC_MODEL;
1010
0
  break;
1011
291
    case NC_FORMATX_NC3:
1012
291
  omode &= ~NC_NETCDF4; /* must be netcdf-3 (CDF-1, CDF-2, CDF-5) */
1013
291
  if(model->format == NC_FORMAT_64BIT_OFFSET) omode |= NC_64BIT_OFFSET;
1014
191
  else if(model->format == NC_FORMAT_64BIT_DATA) omode |= NC_64BIT_DATA;
1015
291
  break;
1016
0
    case NC_FORMATX_PNETCDF:
1017
0
  omode &= ~NC_NETCDF4; /* must be netcdf-3 (CDF-1, CDF-2, CDF-5) */
1018
0
  if(model->format == NC_FORMAT_64BIT_OFFSET) omode |= NC_64BIT_OFFSET;
1019
0
  else if(model->format == NC_FORMAT_64BIT_DATA) omode |= NC_64BIT_DATA;
1020
0
  break;
1021
0
    case NC_FORMATX_DAP2:
1022
0
  omode &= ~(NC_NETCDF4|NC_64BIT_OFFSET|NC_64BIT_DATA|NC_CLASSIC_MODEL);
1023
0
  break;
1024
0
    case NC_FORMATX_UDF0:
1025
0
    case NC_FORMATX_UDF1:
1026
0
        if(model->format == NC_FORMAT_64BIT_OFFSET) 
1027
0
            omode |= NC_64BIT_OFFSET;
1028
0
        else if(model->format == NC_FORMAT_64BIT_DATA)
1029
0
            omode |= NC_64BIT_DATA;
1030
0
        else if(model->format == NC_FORMAT_NETCDF4)  
1031
0
            omode |= NC_NETCDF4;
1032
0
        else if(model->format == NC_FORMAT_NETCDF4_CLASSIC)  
1033
0
            omode |= NC_NETCDF4|NC_CLASSIC_MODEL;
1034
0
        break;
1035
0
    default:
1036
0
  {stat = NC_ENOTNC; goto done;}
1037
291
    }
1038
1039
339
done:
1040
339
    nullfree(sfrag);
1041
339
    nullfree(abspath);
1042
339
    ncurifree(uri);
1043
339
    nclistfreeall(modeargs);
1044
339
    nclistfreeall(fraglenv);
1045
339
    nclistfreeall(tmp);
1046
339
    *omodep = omode; /* in/out */
1047
339
    return check(stat);
1048
291
}
1049
1050
static int
1051
isreadable(NCURI* uri, NCmodel* model)
1052
339
{
1053
339
    int canread = 0;
1054
339
    struct Readable* r;
1055
    /* Step 1: Look up the implementation */
1056
339
    for(r=readable;r->impl;r++) {
1057
339
  if(model->impl == r->impl) {canread = r->readable; break;}
1058
339
    }
1059
    /* Step 2: check for bytes mode */
1060
339
    if(!canread && NC_testmode(uri,"bytes") && (model->impl == NC_FORMATX_NC4 || model->impl == NC_FORMATX_NC_HDF5))
1061
0
        canread = 1;
1062
339
    return canread;
1063
339
}
1064
1065
#if 0
1066
static char*
1067
emptyify(char* s)
1068
{
1069
    if(s == NULL) s = strdup("");
1070
    return strdup(s);
1071
}
1072
1073
static const char*
1074
nullify(const char* s)
1075
{
1076
    if(s != NULL && strlen(s) == 0)
1077
        return NULL;
1078
    return s;
1079
}
1080
#endif
1081
1082
/**************************************************/
1083
/* Envv list utilities */
1084
1085
static const char*
1086
getmodekey(const NClist* envv)
1087
0
{
1088
0
    size_t i;
1089
    /* Get "mode" entry */
1090
0
    for(i=0;i<nclistlength(envv);i+=2) {
1091
0
  char* key = NULL;
1092
0
  key = nclistget(envv,i);
1093
0
  if(strcasecmp(key,"mode")==0)
1094
0
      return nclistget(envv,i+1);
1095
0
    }
1096
0
    return NULL;
1097
0
}
1098
1099
static int
1100
replacemode(NClist* envv, const char* newval)
1101
0
{
1102
0
    size_t i;
1103
    /* Get "mode" entry */
1104
0
    for(i=0;i<nclistlength(envv);i+=2) {
1105
0
  char* key = NULL;
1106
0
  char* val = NULL;
1107
0
  key = nclistget(envv,i);
1108
0
  if(strcasecmp(key,"mode")==0) {
1109
0
      val = nclistget(envv,i+1);      
1110
0
      nclistset(envv,i+1,strdup(newval));
1111
0
      nullfree(val);
1112
0
      return NC_NOERR;
1113
0
  }
1114
0
    }
1115
0
    return NC_EINVAL;
1116
0
}
1117
1118
static NClist*
1119
parsemode(const char* modeval)
1120
0
{
1121
0
    NClist* modes = nclistnew();
1122
0
    if(modeval)
1123
0
        (void)parseonchar(modeval,',',modes);/* split on commas */
1124
0
    return modes;    
1125
0
}
1126
1127
/* Convert a list into a comma'd string */
1128
static char*
1129
list2string(NClist* list)
1130
0
{
1131
0
    size_t i;
1132
0
    NCbytes* buf = NULL;
1133
0
    char* result = NULL;
1134
1135
0
    if(list == NULL || nclistlength(list)==0) return strdup("");
1136
0
    buf = ncbytesnew();
1137
0
    for(i=0;i<nclistlength(list);i++) {
1138
0
  const char* m = nclistget(list,i);
1139
0
  if(m == NULL || strlen(m) == 0) continue;
1140
0
  if(i > 0) ncbytescat(buf,",");
1141
0
  ncbytescat(buf,m);
1142
0
    }
1143
0
    result = ncbytesextract(buf);
1144
0
    ncbytesfree(buf);
1145
0
    if(result == NULL) result = strdup("");
1146
0
    return result;
1147
0
}
1148
1149
#if 0
1150
/* Given a comma separated string, remove duplicates; mostly used to cleanup mode list */
1151
static char* 
1152
cleancommalist(const char* commalist, int caseinsensitive)
1153
{
1154
    NClist* tmp = nclistnew();
1155
    char* newlist = NULL;
1156
    if(commalist == NULL || strlen(commalist)==0) return nulldup(commalist);
1157
    (void)parseonchar(commalist,',',tmp);/* split on commas */
1158
    cleanstringlist(tmp,caseinsensitive);
1159
    newlist = list2string(tmp);
1160
    nclistfreeall(tmp);
1161
    return newlist;
1162
}
1163
#endif
1164
1165
/* Given a list of strings, remove nulls and duplicated */
1166
static void
1167
cleanstringlist(NClist* strs, int caseinsensitive)
1168
0
{
1169
0
    if(nclistlength(strs) == 0) return;
1170
    /* Remove nulls */
1171
0
    for(size_t i = nclistlength(strs); i-->0;) {
1172
0
        if(nclistget(strs,i)==NULL) nclistremove(strs,i);
1173
0
    }
1174
0
    if(nclistlength(strs) <= 1) return;
1175
    /* Remove duplicates*/
1176
0
    for(size_t i=0;i<nclistlength(strs);i++) {
1177
0
        const char* value = nclistget(strs,i);
1178
        /* look ahead for duplicates */
1179
0
        for(size_t j=nclistlength(strs)-1;j>i;j--) {
1180
0
            int match;
1181
0
            const char* candidate = nclistget(strs,j);
1182
0
            if(caseinsensitive)
1183
0
                match = (strcasecmp(value,candidate) == 0);
1184
0
            else
1185
0
                match = (strcmp(value,candidate) == 0);
1186
0
            if(match) {char* dup = nclistremove(strs,j); nullfree(dup);}
1187
0
        }
1188
0
    }
1189
0
}
1190
1191
1192
/**************************************************/
1193
/**
1194
 * @internal Given an existing file, figure out its format and return
1195
 * that format value (NC_FORMATX_XXX) in model arg. Assume any path
1196
 * conversion was already performed at a higher level.
1197
 *
1198
 * @param path File name.
1199
 * @param flags
1200
 * @param use_parallel
1201
 * @param parameters
1202
 * @param model Pointer that gets the model to use for the dispatch table.
1203
 * @param version Pointer that gets version of the file.
1204
 *
1205
 * @return ::NC_NOERR No error.
1206
 * @author Dennis Heimbigner
1207
*/
1208
static int
1209
check_file_type(const char *path, int omode, int use_parallel,
1210
       void *parameters, NCmodel* model, NCURI* uri)
1211
339
{
1212
339
    char magic[NC_MAX_MAGIC_NUMBER_LEN];
1213
339
    int status = NC_NOERR;
1214
339
    struct MagicFile magicinfo;
1215
#ifdef _WIN32
1216
    NC* nc = NULL;
1217
#endif
1218
1219
339
    memset((void*)&magicinfo,0,sizeof(magicinfo));
1220
1221
#ifdef _WIN32 /* including MINGW */
1222
    /* Windows does not handle multiple handles to the same file very well.
1223
       So if file is already open/created, then find it and just get the
1224
       model from that. */
1225
    if((nc = find_in_NCList_by_name(path)) != NULL) {
1226
  int format = 0;
1227
  /* Get the model from this NC */
1228
  if((status = nc_inq_format_extended(nc->ext_ncid,&format,NULL))) goto done;
1229
  model->impl = format;
1230
  if((status = nc_inq_format(nc->ext_ncid,&format))) goto done;
1231
  model->format = format;
1232
  goto done;
1233
    }
1234
#endif
1235
1236
339
    magicinfo.path = path; /* do not free */
1237
339
    magicinfo.uri = uri; /* do not free */
1238
339
    magicinfo.omode = omode;
1239
339
    magicinfo.model = model; /* do not free */
1240
339
    magicinfo.parameters = parameters; /* do not free */
1241
#ifdef USE_STDIO
1242
    magicinfo.use_parallel = 0;
1243
#else
1244
339
    magicinfo.use_parallel = use_parallel;
1245
339
#endif
1246
1247
339
    if((status = openmagic(&magicinfo))) goto done;
1248
1249
    /* Verify we have a large enough file */
1250
339
    if(MAGIC_NUMBER_LEN >= (unsigned long long)magicinfo.filelen)
1251
0
  {status = NC_ENOTNC; goto done;}
1252
339
    if((status = readmagic(&magicinfo,0L,magic)) != NC_NOERR) {
1253
0
  status = NC_ENOTNC;
1254
0
  goto done;
1255
0
    }
1256
1257
    /* Look at the magic number */
1258
339
    if(NC_interpret_magic_number(magic,model) == NC_NOERR
1259
291
  && model->format != 0) {
1260
291
        if (use_parallel && (model->format == NC_FORMAT_NC3 || model->impl == NC_FORMATX_NC3))
1261
            /* this is called from nc_open_par() and file is classic */
1262
0
            model->impl = NC_FORMATX_PNETCDF;
1263
291
        goto done; /* found something */
1264
291
    }
1265
1266
    /* Remaining case when implementation is an HDF5 file;
1267
       search forward at starting at 512
1268
       and doubling to see if we have HDF5 magic number */
1269
48
    {
1270
48
  size_t pos = 512L;
1271
343
        for(;;) {
1272
343
      if((pos+MAGIC_NUMBER_LEN) > (unsigned long long)magicinfo.filelen)
1273
48
    {status = NC_ENOTNC; goto done;}
1274
295
            if((status = readmagic(&magicinfo,pos,magic)) != NC_NOERR)
1275
0
          {status = NC_ENOTNC; goto done; }
1276
295
            NC_interpret_magic_number(magic,model);
1277
295
            if(model->impl == NC_FORMATX_NC4) break;
1278
      /* double and try again */
1279
295
      pos = 2*pos;
1280
295
        }
1281
48
    }
1282
339
done:
1283
339
    closemagic(&magicinfo);
1284
339
    return check(status);
1285
48
}
1286
1287
/**
1288
\internal
1289
\ingroup datasets
1290
Provide open, read and close for use when searching for magic numbers
1291
*/
1292
static int
1293
openmagic(struct MagicFile* file)
1294
339
{
1295
339
    int status = NC_NOERR;
1296
339
    if(fIsSet(file->omode,NC_INMEMORY)) {
1297
  /* Get its length */
1298
339
  NC_memio* meminfo = (NC_memio*)file->parameters;
1299
339
        assert(meminfo != NULL);
1300
339
  file->filelen = (long long)meminfo->size;
1301
339
  goto done;
1302
339
    }
1303
0
    if(file->uri != NULL) {
1304
#ifdef NETCDF_ENABLE_BYTERANGE
1305
  /* Construct a URL minus any fragment */
1306
        file->curlurl = ncuribuild(file->uri,NULL,NULL,NCURISVC);
1307
  /* Open the curl handle */
1308
        if((status=nc_http_open(file->path, &file->state))) goto done;
1309
  if((status=nc_http_size(file->state,&file->filelen))) goto done;
1310
#else /*!BYTERANGE*/
1311
0
  {status = NC_ENOTBUILT;}
1312
0
#endif /*BYTERANGE*/
1313
0
  goto done;
1314
0
    } 
1315
#ifdef USE_PARALLEL
1316
    if (file->use_parallel) {
1317
  int retval;
1318
  MPI_Offset size;
1319
        assert(file->parameters != NULL);
1320
  if((retval = MPI_File_open(((NC_MPI_INFO*)file->parameters)->comm,
1321
                                   (char*)file->path,MPI_MODE_RDONLY,
1322
                                   ((NC_MPI_INFO*)file->parameters)->info,
1323
                                   &file->fh)) != MPI_SUCCESS) {
1324
#ifdef MPI_ERR_NO_SUCH_FILE
1325
      int errorclass;
1326
      MPI_Error_class(retval, &errorclass);
1327
      if (errorclass == MPI_ERR_NO_SUCH_FILE)
1328
#ifdef NC_ENOENT
1329
          status = NC_ENOENT;
1330
#else /*!NC_ENOENT*/
1331
    status = errno;
1332
#endif /*NC_ENOENT*/
1333
      else
1334
#endif /*MPI_ERR_NO_SUCH_FILE*/
1335
          status = NC_EPARINIT;
1336
      file->fh = MPI_FILE_NULL;
1337
      goto done;
1338
  }
1339
  /* Get its length */
1340
  if((retval=MPI_File_get_size(file->fh, &size)) != MPI_SUCCESS)
1341
      {status = NC_EPARINIT; goto done;}
1342
  file->filelen = (long long)size;
1343
  goto done;
1344
    }
1345
#endif /* USE_PARALLEL */
1346
0
    {
1347
0
        if (file->path == NULL || strlen(file->path) == 0)
1348
0
            {status = NC_EINVAL; goto done;}
1349
0
        file->fp = NCfopen(file->path, "r");
1350
0
        if(file->fp == NULL)
1351
0
      {status = errno; goto done;}
1352
  /* Get its length */
1353
0
  {
1354
0
      int fd = fileno(file->fp);
1355
#ifdef _WIN32
1356
      __int64 len64 = _filelengthi64(fd);
1357
      if(len64 < 0)
1358
    {status = errno; goto done;}
1359
      file->filelen = (long long)len64;
1360
#else
1361
0
      off_t size;
1362
0
      size = lseek(fd, 0, SEEK_END);
1363
0
      if(size == -1)
1364
0
    {status = errno; goto done;}
1365
0
    file->filelen = (long long)size;
1366
0
#endif
1367
0
  }
1368
0
        int retval2 = fseek(file->fp, 0L, SEEK_SET);        
1369
0
      if(retval2 != 0)
1370
0
    {status = errno; goto done;}
1371
0
    }
1372
339
done:
1373
339
    return check(status);
1374
0
}
1375
1376
static int
1377
readmagic(struct MagicFile* file, size_t pos, char* magic)
1378
634
{
1379
634
    int status = NC_NOERR;
1380
634
    NCbytes* buf = ncbytesnew();
1381
1382
634
    memset(magic,0,MAGIC_NUMBER_LEN);
1383
634
    if(fIsSet(file->omode,NC_INMEMORY)) {
1384
634
  char* mempos;
1385
634
  NC_memio* meminfo = (NC_memio*)file->parameters;
1386
634
  if((pos + MAGIC_NUMBER_LEN) > meminfo->size)
1387
0
      {status = NC_EINMEMORY; goto done;}
1388
634
  mempos = ((char*)meminfo->memory) + pos;
1389
634
  memcpy((void*)magic,mempos,MAGIC_NUMBER_LEN);
1390
#ifdef DEBUG
1391
  printmagic("XXX: readmagic",magic,file);
1392
#endif
1393
634
    } else if(file->uri != NULL) {
1394
#ifdef NETCDF_ENABLE_BYTERANGE
1395
        size64_t start = (size64_t)pos;
1396
        size64_t count = MAGIC_NUMBER_LEN;
1397
        status = nc_http_read(file->state, start, count, buf);
1398
        if (status == NC_NOERR) {
1399
            if (ncbyteslength(buf) != count)
1400
                status = NC_EINVAL;
1401
            else
1402
                memcpy(magic, ncbytescontents(buf), count);
1403
        }
1404
#endif
1405
0
    } else {
1406
#ifdef USE_PARALLEL
1407
        if (file->use_parallel) {
1408
      MPI_Status mstatus;
1409
      int retval;
1410
      if((retval = MPI_File_read_at_all(file->fh, pos, magic,
1411
          MAGIC_NUMBER_LEN, MPI_CHAR, &mstatus)) != MPI_SUCCESS)
1412
          {status = NC_EPARINIT; goto done;}
1413
        }
1414
        else
1415
#endif /* USE_PARALLEL */
1416
0
        { /* Ordinary read */
1417
0
            long i;
1418
0
            i = fseek(file->fp, (long)pos, SEEK_SET);
1419
0
            if (i < 0) { status = errno; goto done; }
1420
0
            ncbytessetlength(buf, 0);
1421
0
            if ((status = NC_readfileF(file->fp, buf, MAGIC_NUMBER_LEN))) goto done;
1422
0
            memcpy(magic, ncbytescontents(buf), MAGIC_NUMBER_LEN);
1423
0
        }
1424
0
    }
1425
1426
634
done:
1427
634
    ncbytesfree(buf);
1428
634
    if(file && file->fp) clearerr(file->fp);
1429
634
    return check(status);
1430
634
}
1431
1432
/**
1433
 * Close the file opened to check for magic number.
1434
 *
1435
 * @param file pointer to the MagicFile struct for this open file.
1436
 * @returns NC_NOERR for success
1437
 * @returns NC_EPARINIT if there was a problem closing file with MPI
1438
 * (parallel builds only).
1439
 * @author Dennis Heimbigner
1440
 */
1441
static int
1442
closemagic(struct MagicFile* file)
1443
339
{
1444
339
    int status = NC_NOERR;
1445
1446
339
    if(fIsSet(file->omode,NC_INMEMORY)) {
1447
  /* noop */
1448
339
    } else if(file->uri != NULL) {
1449
#ifdef NETCDF_ENABLE_BYTERANGE
1450
      status = nc_http_close(file->state);
1451
#endif
1452
0
      nullfree(file->curlurl);
1453
0
    } else {
1454
#ifdef USE_PARALLEL
1455
        if (file->use_parallel) {
1456
      int retval;
1457
      if(file->fh != MPI_FILE_NULL
1458
         && (retval = MPI_File_close(&file->fh)) != MPI_SUCCESS)
1459
        {status = NC_EPARINIT; return status;}
1460
        } else
1461
#endif
1462
0
        {
1463
0
      if(file->fp) fclose(file->fp);
1464
0
        }
1465
0
    }
1466
339
    return status;
1467
339
}
1468
1469
/*!
1470
  Interpret the magic number found in the header of a netCDF file.
1471
  This function interprets the magic number/string contained in the header of a netCDF file and sets the appropriate NC_FORMATX flags.
1472
1473
  @param[in] magic Pointer to a character array with the magic number block.
1474
  @param[out] model Pointer to an integer to hold the corresponding netCDF type.
1475
  @param[out] version Pointer to an integer to hold the corresponding netCDF version.
1476
  @returns NC_NOERR if a legitimate file type found
1477
  @returns NC_ENOTNC otherwise
1478
1479
\internal
1480
\ingroup datasets
1481
1482
*/
1483
static int
1484
NC_interpret_magic_number(char* magic, NCmodel* model)
1485
634
{
1486
634
    int status = NC_NOERR;
1487
634
    int tmpimpl = 0;
1488
    /* Look at the magic number */
1489
634
    if(model->impl == NC_FORMATX_UDF0 || model->impl == NC_FORMATX_UDF1)
1490
0
        tmpimpl = model->impl;
1491
1492
    /* Use the complete magic number string for HDF5 */
1493
634
    if(memcmp(magic,HDF5_SIGNATURE,sizeof(HDF5_SIGNATURE))==0) {
1494
0
  model->impl = NC_FORMATX_NC4;
1495
0
  model->format = NC_FORMAT_NETCDF4;
1496
0
  goto done;
1497
0
    }
1498
634
    if(magic[0] == '\016' && magic[1] == '\003'
1499
35
              && magic[2] == '\023' && magic[3] == '\001') {
1500
7
  model->impl = NC_FORMATX_NC_HDF4;
1501
7
  model->format = NC_FORMAT_NETCDF4;
1502
7
  goto done;
1503
7
    }
1504
627
    if(magic[0] == 'C' && magic[1] == 'D' && magic[2] == 'F') {
1505
357
        if(magic[3] == '\001') {
1506
108
      model->impl = NC_FORMATX_NC3;
1507
108
      model->format = NC_FORMAT_CLASSIC;
1508
108
      goto done;
1509
108
  }
1510
249
        if(magic[3] == '\002') {
1511
122
      model->impl = NC_FORMATX_NC3;
1512
122
      model->format = NC_FORMAT_64BIT_OFFSET;
1513
122
      goto done;
1514
122
        }
1515
127
        if(magic[3] == '\005') {
1516
105
    model->impl = NC_FORMATX_NC3;
1517
105
    model->format = NC_FORMAT_64BIT_DATA;
1518
105
    goto done;
1519
105
  }
1520
127
     }
1521
     /* No match  */
1522
292
     if (!tmpimpl) 
1523
292
         status = NC_ENOTNC;         
1524
1525
292
     goto done;
1526
1527
634
done:
1528
     /* if model->impl was UDF0 or UDF1 on entry, make it so on exit */
1529
634
     if(tmpimpl)
1530
0
         model->impl = tmpimpl;
1531
     /* if this is a UDF magic_number update the model->impl */
1532
634
     if (strlen(UDF0_magic_number) && !strncmp(UDF0_magic_number, magic,
1533
0
                                               strlen(UDF0_magic_number)))
1534
0
     {
1535
0
         model->impl = NC_FORMATX_UDF0;
1536
0
         status = NC_NOERR;
1537
0
     }
1538
634
     if (strlen(UDF1_magic_number) && !strncmp(UDF1_magic_number, magic,
1539
0
                                               strlen(UDF1_magic_number)))
1540
0
     {
1541
0
         model->impl = NC_FORMATX_UDF1;
1542
0
         status = NC_NOERR;
1543
0
     }    
1544
1545
634
     return check(status);
1546
627
}
1547
1548
/* Return NC_NOERR if path is a DAOS container; return NC_EXXX otherwise */
1549
static int
1550
isdaoscontainer(const char* path)
1551
339
{
1552
339
    int stat = NC_ENOTNC; /* default is that this is not a DAOS container */
1553
339
#ifndef _WIN32
1554
#ifdef USE_HDF5
1555
#if H5_VERSION_GE(1,12,0)
1556
    htri_t accessible;
1557
    hid_t fapl_id;
1558
    int rc;
1559
    /* Check for a DAOS container */
1560
    if((fapl_id = H5Pcreate(H5P_FILE_ACCESS)) < 0) {stat = NC_EHDFERR; goto done;}
1561
    H5Pset_fapl_sec2(fapl_id);
1562
    accessible = H5Fis_accessible(path, fapl_id);
1563
    H5Pclose(fapl_id); /* Ignore any error */
1564
    rc = 0;
1565
    if(accessible > 0) {
1566
#ifdef HAVE_SYS_XATTR_H
1567
  ssize_t xlen;
1568
#ifdef __APPLE__
1569
  xlen = listxattr(path, NULL, 0, 0);
1570
#else
1571
  xlen = listxattr(path, NULL, 0);
1572
#endif
1573
        if(xlen > 0) {
1574
        char* xlist = NULL;
1575
      char* xvalue = NULL;
1576
      char* p;
1577
      char* endp;
1578
      if((xlist = (char*)calloc(1,(size_t)xlen))==NULL)
1579
    {stat = NC_ENOMEM; goto done;}
1580
#ifdef __APPLE__
1581
      (void)listxattr(path, xlist, (size_t)xlen, 0); /* Get xattr names */
1582
#else
1583
      (void)listxattr(path, xlist, (size_t)xlen); /* Get xattr names */
1584
#endif
1585
      p = xlist; endp = p + xlen; /* delimit names */
1586
      /* walk the list of xattr names */
1587
      for(;p < endp;p += (strlen(p)+1)) {
1588
    /* The popen version looks for the string ".daos";
1589
                   It would be nice if we know whether that occurred
1590
       int the xattr's name or it value.
1591
       Oh well, we will do the general search */
1592
    /* Look for '.daos' in the key */
1593
    if(strstr(p,".daos") != NULL) {rc = 1; break;} /* success */
1594
    /* Else get the p'th xattr's value size */
1595
#ifdef __APPLE__
1596
    xlen = getxattr(path, p, NULL, 0, 0, 0);
1597
#else
1598
    xlen = getxattr(path, p, NULL, 0);
1599
#endif
1600
    if((xvalue = (char*)calloc(1,(size_t)xlen))==NULL)
1601
        {stat = NC_ENOMEM; goto done;}
1602
    /* Read the value */
1603
#ifdef __APPLE__
1604
    (void)getxattr(path, p, xvalue, (size_t)xlen, 0, 0);
1605
#else
1606
    (void)getxattr(path, p, xvalue, (size_t)xlen);
1607
#endif
1608
    /* Look for '.daos' in the value */
1609
    if(strstr(xvalue,".daos") != NULL) {rc = 1; break;} /* success */
1610
      }
1611
        }
1612
#else /*!HAVE_SYS_XATTR_H*/
1613
1614
#ifdef HAVE_GETFATTR
1615
  {
1616
      FILE *fp;
1617
      char cmd[4096];
1618
      memset(cmd,0,sizeof(cmd));
1619
        snprintf(cmd,sizeof(cmd),"getfattr %s | grep -c '.daos'",path);
1620
        if((fp = popen(cmd, "r")) != NULL) {
1621
               fscanf(fp, "%d", &rc);
1622
               pclose(fp);
1623
      }
1624
    }
1625
#else /*!HAVE_GETFATTR*/
1626
    /* We just can't test for DAOS container.*/
1627
    rc = 0;
1628
#endif /*HAVE_GETFATTR*/
1629
#endif /*HAVE_SYS_XATTR_H*/
1630
    }
1631
    /* Test for DAOS container */
1632
    stat = (rc == 1 ? NC_NOERR : NC_ENOTNC);
1633
done:
1634
#endif
1635
#endif
1636
339
#endif
1637
    errno = 0; /* reset */
1638
339
    return stat;
1639
339
}
1640
1641
#ifdef DEBUG
1642
static void
1643
printmagic(const char* tag, char* magic, struct MagicFile* f)
1644
{
1645
    int i;
1646
    fprintf(stderr,"%s: ispar=%d magic=",tag,f->use_parallel);
1647
    for(i=0;i<MAGIC_NUMBER_LEN;i++) {
1648
        unsigned int c = (unsigned int)magic[i];
1649
  c = c & 0x000000FF;
1650
  if(c == '\n')
1651
      fprintf(stderr," 0x%0x/'\\n'",c);
1652
  else if(c == '\r')
1653
      fprintf(stderr," 0x%0x/'\\r'",c);
1654
  else if(c < ' ')
1655
      fprintf(stderr," 0x%0x/'?'",c);
1656
  else
1657
      fprintf(stderr," 0x%0x/'%c'",c,c);
1658
    }
1659
    fprintf(stderr,"\n");
1660
    fflush(stderr);
1661
}
1662
1663
static void
1664
printlist(NClist* list, const char* tag)
1665
{
1666
    int i;
1667
    fprintf(stderr,"%s:",tag);
1668
    for(i=0;i<nclistlength(list);i++) {
1669
        fprintf(stderr," %s",(char*)nclistget(list,i));
1670
  fprintf(stderr,"[%p]",(char*)nclistget(list,i));
1671
    }
1672
    fprintf(stderr,"\n");
1673
    dbgflush();
1674
}
1675
1676
1677
#endif