Coverage Report

Created: 2026-03-10 06:46

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/netcdf-c/libdispatch/dinfermodel.c
Line
Count
Source
1
/**
2
 * @file
3
 *
4
 * Infer as much as possible from the omode + path.
5
 * Rewrite the path to a canonical form.
6
 *
7
 * Copyright 2018 University Corporation for Atmospheric
8
 * Research/Unidata. See COPYRIGHT file for more info.
9
*/
10
#include "config.h"
11
#include <stddef.h>
12
#include <stdlib.h>
13
#include <string.h>
14
#ifdef HAVE_UNISTD_H
15
#include <unistd.h>
16
#endif
17
#ifdef HAVE_SYS_TYPES_H
18
#include <sys/types.h>
19
#endif
20
#ifndef _WIN32
21
#ifdef USE_HDF5
22
#include <hdf5.h>
23
#endif /* USE_HDF5 */
24
#endif /* _WIN32 */
25
#ifdef HAVE_SYS_XATTR_H
26
#include <sys/xattr.h>
27
#endif
28
29
#include "ncdispatch.h"
30
#include "ncpathmgr.h"
31
#include "netcdf_mem.h"
32
#include "fbits.h"
33
#include "ncbytes.h"
34
#include "nclist.h"
35
#include "nclog.h"
36
#include "nchttp.h"
37
#include "ncutil.h"
38
#ifdef NETCDF_ENABLE_S3
39
#include "ncs3sdk.h"
40
#endif
41
42
#ifndef nulldup
43
 #define nulldup(x) ((x)?strdup(x):(x))
44
#endif
45
46
#undef DEBUG
47
48
/* If Defined, then use only stdio for all magic number io;
49
   otherwise use stdio or mpio as required.
50
 */
51
#undef USE_STDIO
52
53
/**
54
Sort info for open/read/close of
55
file when searching for magic numbers
56
*/
57
struct MagicFile {
58
    const char* path;
59
    struct NCURI* uri;
60
    int omode;
61
    NCmodel* model;
62
    long long filelen;
63
    int use_parallel;
64
    int iss3;
65
    void* parameters; /* !NULL if inmemory && !diskless */
66
    FILE* fp;
67
#ifdef USE_PARALLEL
68
    MPI_File fh;
69
#endif
70
    char* curlurl; /* url to use with CURLOPT_SET_URL */
71
    NC_HTTP_STATE* state;
72
#ifdef NETCDF_ENABLE_S3
73
    NCS3INFO s3;
74
    void* s3client;
75
    char* errmsg;
76
#endif
77
};
78
79
/** @internal Magic number for HDF5 files. To be consistent with
80
 * H5Fis_hdf5, use the complete HDF5 magic number */
81
static char HDF5_SIGNATURE[MAGIC_NUMBER_LEN] = "\211HDF\r\n\032\n";
82
83
335
#define modelcomplete(model) ((model)->impl != 0)
84
85
#ifdef DEBUG
86
static void dbgflush(void)
87
{
88
    fflush(stdout);
89
    fflush(stderr);
90
}
91
92
static void
93
fail(int err)
94
{
95
    return;
96
}
97
98
static int
99
check(int err)
100
{
101
    if(err != NC_NOERR)
102
  fail(err);
103
    return err;
104
}
105
#else
106
1.69k
#define check(err) (err)
107
#endif
108
109
/*
110
Define a table of "mode=" string values
111
from which the implementation can be inferred.
112
Note that only cases that can currently
113
take URLs are included.
114
*/
115
static struct FORMATMODES {
116
    const char* tag;
117
    const int impl; /* NC_FORMATX_XXX value */
118
    const int format; /* NC_FORMAT_XXX value */
119
} formatmodes[] = {
120
{"dap2",NC_FORMATX_DAP2,NC_FORMAT_CLASSIC},
121
{"dap4",NC_FORMATX_DAP4,NC_FORMAT_NETCDF4},
122
{"netcdf-3",NC_FORMATX_NC3,0}, /* Might be e.g. cdf5 */
123
{"classic",NC_FORMATX_NC3,0}, /* ditto */
124
{"netcdf-4",NC_FORMATX_NC4,NC_FORMAT_NETCDF4},
125
{"enhanced",NC_FORMATX_NC4,NC_FORMAT_NETCDF4},
126
{"udf0",NC_FORMATX_UDF0,0},
127
{"udf1",NC_FORMATX_UDF1,0},
128
{"udf2",NC_FORMATX_UDF2,0},
129
{"udf3",NC_FORMATX_UDF3,0},
130
{"udf4",NC_FORMATX_UDF4,0},
131
{"udf5",NC_FORMATX_UDF5,0},
132
{"udf6",NC_FORMATX_UDF6,0},
133
{"udf7",NC_FORMATX_UDF7,0},
134
{"udf8",NC_FORMATX_UDF8,0},
135
{"udf9",NC_FORMATX_UDF9,0},
136
{"nczarr",NC_FORMATX_NCZARR,NC_FORMAT_NETCDF4},
137
{"zarr",NC_FORMATX_NCZARR,NC_FORMAT_NETCDF4},
138
{"bytes",NC_FORMATX_NC4,NC_FORMAT_NETCDF4}, /* temporary until 3 vs 4 is determined */
139
{NULL,0},
140
};
141
142
/* Replace top-level name with defkey=defvalue */
143
static const struct MACRODEF {
144
    char* name;
145
    char* defkey;
146
    char* defvalues[4];
147
} macrodefs[] = {
148
{"zarr","mode",{"nczarr","zarr",NULL}},
149
{"dap2","mode",{"dap2",NULL}},
150
{"dap4","mode",{"dap4",NULL}},
151
{"s3","mode",{"s3","nczarr",NULL}},
152
{"bytes","mode",{"bytes",NULL}},
153
{"xarray","mode",{"zarr", NULL}},
154
{"noxarray","mode",{"nczarr", "noxarray", NULL}},
155
{"zarr","mode",{"nczarr","zarr", NULL}},
156
{"gs3","mode",{"gs3","nczarr",NULL}}, /* Google S3 API */
157
{NULL,NULL,{NULL}}
158
};
159
160
/*
161
Mode inferences: if mode contains key value, then add the inferred value;
162
Warning: be careful how this list is constructed to avoid infinite inferences.
163
In order to (mostly) avoid that consequence, any attempt to
164
infer a value that is already present will be ignored.
165
This effectively means that the inference graph
166
must be a DAG and may not have cycles.
167
You have been warned.
168
*/
169
static const struct MODEINFER {
170
    char* key;
171
    char* inference;
172
} modeinferences[] = {
173
{"zarr","nczarr"},
174
{"xarray","zarr"},
175
{"noxarray","nczarr"},
176
{"noxarray","zarr"},
177
{NULL,NULL}
178
};
179
180
/* Mode negations: if mode contains key, then remove all occurrences of the inference and repeat */
181
static const struct MODEINFER modenegations[] = {
182
{"bytes","nczarr"}, /* bytes negates (nc)zarr */
183
{"bytes","zarr"},
184
{"noxarray","xarray"},
185
{NULL,NULL}
186
};
187
188
/* Map FORMATX to readability to get magic number */
189
static struct Readable {
190
    int impl;
191
    int readable;
192
} readable[] = {
193
{NC_FORMATX_NC3,1},
194
{NC_FORMATX_NC_HDF5,1},
195
{NC_FORMATX_NC_HDF4,1},
196
{NC_FORMATX_PNETCDF,1},
197
{NC_FORMATX_DAP2,0},
198
{NC_FORMATX_DAP4,0},
199
{NC_FORMATX_UDF0,1},
200
{NC_FORMATX_UDF1,1},
201
{NC_FORMATX_UDF2,1},
202
{NC_FORMATX_UDF3,1},
203
{NC_FORMATX_UDF4,1},
204
{NC_FORMATX_UDF5,1},
205
{NC_FORMATX_UDF6,1},
206
{NC_FORMATX_UDF7,1},
207
{NC_FORMATX_UDF8,1},
208
{NC_FORMATX_UDF9,1},
209
{NC_FORMATX_NCZARR,0}, /* eventually make readable */
210
{0,0},
211
};
212
213
/* Define the known URL protocols and their interpretation */
214
static struct NCPROTOCOLLIST {
215
    const char* protocol;
216
    const char* substitute;
217
    const char* fragments; /* arbitrary fragment arguments */
218
} ncprotolist[] = {
219
    {"http",NULL,NULL},
220
    {"https",NULL,NULL},
221
    {"file",NULL,NULL},
222
    {"dods","http","mode=dap2"},
223
    {"dap4","http","mode=dap4"},
224
    {"s3","s3","mode=s3"},
225
    {"gs3","gs3","mode=gs3"},
226
    {NULL,NULL,NULL} /* Terminate search */
227
};
228
229
/* Forward */
230
static int NC_omodeinfer(int useparallel, int omode, NCmodel*);
231
static int check_file_type(const char *path, int omode, int use_parallel, void *parameters, NCmodel* model, NCURI* uri);
232
static int processuri(const char* path, NCURI** urip, NClist* fraglist);
233
static int processmacros(NClist* fraglistp, NClist* expanded);
234
static char* envvlist2string(NClist* pairs, const char*);
235
static void set_default_mode(int* cmodep);
236
static int parseonchar(const char* s, int ch, NClist* segments);
237
static int mergelist(NClist** valuesp);
238
239
static int openmagic(struct MagicFile* file);
240
static int readmagic(struct MagicFile* file, size_t pos, char* magic);
241
static int closemagic(struct MagicFile* file);
242
static int NC_interpret_magic_number(char* magic, NCmodel* model);
243
#ifdef DEBUG
244
static void printmagic(const char* tag, char* magic,struct MagicFile*);
245
static void printlist(NClist* list, const char* tag);
246
#endif
247
static int isreadable(NCURI*,NCmodel*);
248
static char* list2string(NClist*);
249
static int parsepair(const char* pair, char** keyp, char** valuep);
250
static NClist* parsemode(const char* modeval);
251
static const char* getmodekey(const NClist* envv);
252
static int replacemode(NClist* envv, const char* newval);
253
static void infernext(NClist* current, NClist* next);
254
static int negateone(const char* mode, NClist* modes);
255
static void cleanstringlist(NClist* strs, int caseinsensitive);
256
static int isdaoscontainer(const char* path);
257
258
/*
259
If the path looks like a URL, then parse it, reformat it.
260
*/
261
static int
262
processuri(const char* path, NCURI** urip, NClist* fraglenv)
263
180
{
264
180
    int stat = NC_NOERR;
265
180
    int found = 0;
266
180
    NClist* tmp = NULL;
267
180
    struct NCPROTOCOLLIST* protolist;
268
180
    NCURI* uri = NULL;
269
180
    size_t pathlen = strlen(path);
270
180
    char* str = NULL;
271
180
    const NClist* ufrags;
272
180
    size_t i;
273
274
180
    if(path == NULL || pathlen == 0) {stat = NC_EURL; goto done;}
275
276
    /* Defaults */
277
180
    if(urip) *urip = NULL;
278
279
180
    ncuriparse(path,&uri);
280
180
    if(uri == NULL) goto done; /* not url */
281
282
    /* Look up the protocol */
283
0
    for(found=0,protolist=ncprotolist;protolist->protocol;protolist++) {
284
0
        if(strcmp(uri->protocol,protolist->protocol) == 0) {
285
0
      found = 1;
286
0
      break;
287
0
  }
288
0
    }
289
0
    if(!found)
290
0
  {stat = NC_EINVAL; goto done;} /* unrecognized URL form */
291
292
    /* process the corresponding fragments for that protocol */
293
0
    if(protolist->fragments != NULL) {
294
0
  tmp = nclistnew();
295
0
  if((stat = parseonchar(protolist->fragments,'&',tmp))) goto done;
296
0
  for(i=0;i<nclistlength(tmp);i++) {
297
0
      char* key=NULL;
298
0
          char* value=NULL;
299
0
      if((stat = parsepair(nclistget(tmp,i),&key,&value))) goto done;
300
0
      if(value == NULL) value = strdup("");
301
0
      nclistpush(fraglenv,key);
302
0
          nclistpush(fraglenv,value);
303
0
  }
304
0
  nclistfreeall(tmp); tmp = NULL;
305
0
    }
306
307
    /* Substitute the protocol in any case */
308
0
    if(protolist->substitute) ncurisetprotocol(uri,protolist->substitute);
309
310
    /* capture the fragments of the url */
311
0
    ufrags = (const NClist*)ncurifragmentparams(uri);
312
0
    for(i=0;i<nclistlength(ufrags);i+=2) {
313
0
  const char* key = nclistget(ufrags,i);
314
0
  const char* value = nclistget(ufrags,i+1);
315
0
        nclistpush(fraglenv,nulldup(key));
316
0
  value = (value==NULL?"":value);
317
0
  nclistpush(fraglenv,strdup(value));
318
0
    }
319
0
    if(urip) {
320
0
  *urip = uri;
321
0
  uri = NULL;
322
0
    }
323
324
180
done:
325
180
    nclistfreeall(tmp);
326
180
    nullfree(str);
327
180
    if(uri != NULL) ncurifree(uri);
328
180
    return check(stat);
329
0
}
330
331
/* Split a key=value pair */
332
static int
333
parsepair(const char* pair, char** keyp, char** valuep)
334
0
{
335
0
    const char* p;
336
0
    char* key = NULL;
337
0
    char* value = NULL;
338
339
0
    if(pair == NULL)
340
0
        return NC_EINVAL; /* empty pair */
341
0
    if(pair[0] == '\0' || pair[0] == '=')
342
0
        return NC_EINVAL; /* no key */
343
0
    p = strchr(pair,'=');
344
0
    if(p == NULL) {
345
0
  value = NULL;
346
0
  key = strdup(pair);
347
0
    } else {
348
0
  ptrdiff_t len = (p-pair);
349
0
  if((key = malloc((size_t)len+1))==NULL) return NC_ENOMEM;
350
0
  memcpy(key,pair,(size_t)len);
351
0
  key[len] = '\0';
352
0
  if(p[1] == '\0')
353
0
      value = NULL;
354
0
  else
355
0
      value = strdup(p+1);
356
0
    }
357
0
    if(keyp) {*keyp = key; key = NULL;};
358
0
    if(valuep) {*valuep = value; value = NULL;};
359
0
    nullfree(key);
360
0
    nullfree(value);
361
0
    return NC_NOERR;
362
0
}
363
364
#if 0
365
static int
366
parseurlmode(const char* modestr, NClist* list)
367
{
368
    int stat = NC_NOERR;
369
    const char* p = NULL;
370
    const char* endp = NULL;
371
372
    if(modestr == NULL || *modestr == '\0') goto done;
373
374
    /* Split modestr at the commas or EOL */
375
    p = modestr;
376
    for(;;) {
377
  char* s;
378
  ptrdiff_t slen;
379
  endp = strchr(p,',');
380
  if(endp == NULL) endp = p + strlen(p);
381
  slen = (endp - p);
382
  if((s = malloc(slen+1)) == NULL) {stat = NC_ENOMEM; goto done;}
383
  memcpy(s,p,slen);
384
  s[slen] = '\0';
385
  nclistpush(list,s);
386
  if(*endp == '\0') break;
387
  p = endp+1;
388
    }
389
390
done:
391
    return check(stat);
392
}
393
#endif
394
395
/* Split a string at a given char */
396
static int
397
parseonchar(const char* s, int ch, NClist* segments)
398
0
{
399
0
    int stat = NC_NOERR;
400
0
    const char* p = NULL;
401
0
    const char* endp = NULL;
402
403
0
    if(s == NULL || *s == '\0') goto done;
404
405
0
    p = s;
406
0
    for(;;) {
407
0
  char* q;
408
0
  ptrdiff_t slen;
409
0
  endp = strchr(p,ch);
410
0
  if(endp == NULL) endp = p + strlen(p);
411
0
  slen = (endp - p);
412
0
  if((q = malloc((size_t)slen+1)) == NULL) {stat = NC_ENOMEM; goto done;}
413
0
  memcpy(q,p,(size_t)slen);
414
0
  q[slen] = '\0';
415
0
  nclistpush(segments,q);
416
0
  if(*endp == '\0') break;
417
0
  p = endp+1;
418
0
    }
419
420
0
done:
421
0
    return check(stat);
422
0
}
423
424
/* Convert a key,value envv pairlist into a delimited string*/
425
static char*
426
envvlist2string(NClist* envv, const char* delim)
427
0
{
428
0
    size_t i;
429
0
    NCbytes* buf = NULL;
430
0
    char* result = NULL;
431
432
0
    if(envv == NULL || nclistlength(envv) == 0) return NULL;
433
0
    buf = ncbytesnew();
434
0
    for(i=0;i<nclistlength(envv);i+=2) {
435
0
  const char* key = nclistget(envv,i);
436
0
  const char* val = nclistget(envv,i+1);
437
0
  if(key == NULL || strlen(key) == 0) continue;
438
0
  assert(val != NULL);
439
0
  if(i > 0) ncbytescat(buf,"&");
440
0
  ncbytescat(buf,key);
441
0
  if(val != NULL && val[0] != '\0') {
442
0
      ncbytescat(buf,"=");
443
0
      ncbytescat(buf,val);
444
0
  }
445
0
    }
446
0
    result = ncbytesextract(buf);
447
0
    ncbytesfree(buf);
448
0
    return result;
449
0
}
450
451
/* Given a mode= argument, fill in the impl */
452
static int
453
processmodearg(const char* arg, NCmodel* model)
454
0
{
455
0
    int stat = NC_NOERR;
456
0
    struct FORMATMODES* format = formatmodes;
457
0
    for(;format->tag;format++) {
458
0
  if(strcmp(format->tag,arg)==0) {
459
0
            model->impl = format->impl;
460
0
      if(format->format != 0) model->format = format->format;
461
0
  }
462
0
    }
463
0
    return check(stat);
464
0
}
465
466
/* Given an envv fragment list, do macro replacement */
467
static int
468
processmacros(NClist* fraglenv, NClist* expanded)
469
0
{
470
0
    size_t i;
471
0
    int stat = NC_NOERR;
472
0
    const struct MACRODEF* macros = NULL;
473
474
0
    for(i=0;i<nclistlength(fraglenv);i+=2) {
475
0
  int found = 0;
476
0
  char* key = nclistget(fraglenv,i);
477
0
  char* value = nclistget(fraglenv,i+1);
478
0
  if(strlen(value) == 0) { /* must be a singleton  */
479
0
            for(macros=macrodefs;macros->name;macros++) {
480
0
                if(strcmp(macros->name,key)==0) {
481
0
        char* const * p;
482
0
        nclistpush(expanded,strdup(macros->defkey));
483
0
        for(p=macros->defvalues;*p;p++) 
484
0
      nclistpush(expanded,strdup(*p));
485
0
        found = 1;        
486
0
        break;
487
0
          }
488
0
      }
489
0
  }
490
0
  if(!found) {/* pass thru */
491
0
      nclistpush(expanded,strdup(key));
492
0
          nclistpush(expanded,strdup(value));
493
0
  }
494
0
    }
495
496
0
    return check(stat);
497
0
}
498
499
/* Process mode flag inferences */
500
static int
501
processinferences(NClist* fraglenv)
502
0
{
503
0
    int stat = NC_NOERR;
504
0
    const char* modeval = NULL;
505
0
    NClist* newmodes = nclistnew();
506
0
    NClist* currentmodes = NULL;
507
0
    NClist* nextmodes = nclistnew();
508
0
    size_t i;
509
0
    char* newmodeval = NULL;
510
511
    /* Get "mode" entry */
512
0
    if((modeval = getmodekey(fraglenv))==NULL) goto done;
513
514
    /* Get the mode as list */
515
0
    currentmodes = parsemode(modeval);
516
517
#ifdef DEBUG
518
    printlist(currentmodes,"processinferences: initial mode list");
519
#endif
520
521
    /* Do what amounts to breadth first inferencing down the inference DAG. */
522
523
0
    for(;;) {
524
0
        NClist* tmp = NULL;
525
        /* Compute the next set of inferred modes */
526
#ifdef DEBUG
527
printlist(currentmodes,"processinferences: current mode list");
528
#endif
529
0
        infernext(currentmodes,nextmodes);
530
#ifdef DEBUG
531
printlist(nextmodes,"processinferences: next mode list");
532
#endif
533
        /* move current modes into list of newmodes */
534
0
        for(i=0;i<nclistlength(currentmodes);i++) {
535
0
      nclistpush(newmodes,nclistget(currentmodes,i));
536
0
  }
537
0
        nclistsetlength(currentmodes,0); /* clear current mode list */
538
0
        if(nclistlength(nextmodes) == 0) break; /* nothing more to do */
539
#ifdef DEBUG
540
printlist(newmodes,"processinferences: new mode list");
541
#endif
542
  /* Swap current and next */
543
0
        tmp = currentmodes;
544
0
  currentmodes = nextmodes;
545
0
  nextmodes = tmp;
546
0
        tmp = NULL;
547
0
    }
548
    /* cleanup any unused elements in currentmodes */
549
0
    nclistclearall(currentmodes);
550
551
    /* Ensure no duplicates */
552
0
    cleanstringlist(newmodes,1);
553
554
#ifdef DEBUG
555
    printlist(newmodes,"processinferences: final inferred mode list");
556
#endif
557
558
   /* Remove negative inferences */
559
0
   for(i=0;i<nclistlength(newmodes);i++) {
560
0
  const char* mode = nclistget(newmodes,i);
561
0
  negateone(mode,newmodes);
562
0
    }
563
564
    /* Store new mode value */
565
0
    if((newmodeval = list2string(newmodes))== NULL)
566
0
  {stat = NC_ENOMEM; goto done;}        
567
0
    if((stat=replacemode(fraglenv,newmodeval))) goto done;
568
0
    modeval = NULL;
569
570
0
done:
571
0
    nullfree(newmodeval);
572
0
    nclistfreeall(newmodes);
573
0
    nclistfreeall(currentmodes);
574
0
    nclistfreeall(nextmodes);
575
0
    return check(stat);
576
0
}
577
578
579
static int
580
negateone(const char* mode, NClist* newmodes)
581
0
{
582
0
    const struct MODEINFER* tests = modenegations;
583
0
    int changed = 0;
584
0
    for(;tests->key;tests++) {
585
0
  if(strcasecmp(tests->key,mode)==0) {
586
      /* Find and remove all instances of the inference value */
587
0
      for(size_t i = nclistlength(newmodes); i-- > 0;) {
588
0
    char* candidate = nclistget(newmodes,i);
589
0
    if(strcasecmp(candidate,tests->inference)==0) {
590
0
        nclistremove(newmodes,i);
591
0
        nullfree(candidate);
592
0
              changed = 1;
593
0
    }
594
0
      }
595
0
        }
596
0
    }
597
0
    return changed;
598
0
}
599
600
static void
601
infernext(NClist* current, NClist* next)
602
0
{
603
0
    size_t i;
604
0
    for(i=0;i<nclistlength(current);i++) {
605
0
        const struct MODEINFER* tests = NULL;
606
0
  const char* cur = nclistget(current,i);
607
0
        for(tests=modeinferences;tests->key;tests++) {
608
0
      if(strcasecmp(tests->key,cur)==0) {
609
          /* Append the inferred mode unless dup */
610
0
    if(!nclistmatch(next,tests->inference,1))
611
0
              nclistpush(next,strdup(tests->inference));
612
0
      }
613
0
        }
614
0
    }
615
0
}
616
617
/*
618
Given a list of strings, remove nulls and duplicates
619
*/
620
static int
621
mergelist(NClist** valuesp)
622
0
{
623
0
    size_t i,j;
624
0
    int stat = NC_NOERR;
625
0
    NClist* values = *valuesp;
626
0
    NClist* allvalues = nclistnew();
627
0
    NClist* newvalues = nclistnew();
628
0
    char* value = NULL;
629
630
0
    for(i=0;i<nclistlength(values);i++) {
631
0
  char* val1 = nclistget(values,i);
632
  /* split on commas and put pieces into allvalues */
633
0
  if((stat=parseonchar(val1,',',allvalues))) goto done;
634
0
    }
635
    /* Remove duplicates and "" */
636
0
    while(nclistlength(allvalues) > 0) {
637
0
  value = nclistremove(allvalues,0);
638
0
  if(strlen(value) == 0) {
639
0
      nullfree(value); value = NULL;
640
0
  } else {
641
0
      for(j=0;j<nclistlength(newvalues);j++) {
642
0
          char* candidate = nclistget(newvalues,j);
643
0
          if(strcasecmp(candidate,value)==0)
644
0
              {nullfree(value); value = NULL; break;}
645
0
       }
646
0
  }
647
0
  if(value != NULL) {nclistpush(newvalues,value); value = NULL;}
648
0
    }
649
    /* Make sure to have at least 1 value */
650
0
    if(nclistlength(newvalues)==0) nclistpush(newvalues,strdup(""));
651
0
    *valuesp = values; values = NULL;
652
653
0
done:
654
0
    nclistfree(allvalues);
655
0
    nclistfreeall(values);
656
0
    nclistfreeall(newvalues);
657
0
    return check(stat);
658
0
}
659
660
static int
661
lcontains(NClist* l, const char* key0)
662
0
{
663
0
    size_t i;
664
0
    for(i=0;i<nclistlength(l);i++) {
665
0
        const char* key1 = nclistget(l,i);
666
0
  if(strcasecmp(key0,key1)==0) return 1;
667
0
    }
668
0
    return 0;
669
0
}
670
671
/* Warning values should not use nclistfreeall */
672
static void
673
collectvaluesbykey(NClist* fraglenv, const char* key, NClist* values)
674
0
{
675
0
    size_t i;
676
    /* collect all the values with the same key (including this one) */
677
0
    for(i=0;i<nclistlength(fraglenv);i+=2) {
678
0
        const char* key2 = nclistget(fraglenv,i);
679
0
        if(strcasecmp(key,key2)==0) {
680
0
      const char* value2 = nclistget(fraglenv,i+1);
681
0
      nclistpush(values,value2); value2 = NULL;
682
0
  }
683
0
    }
684
0
}
685
686
/* Warning allkeys should not use nclistfreeall */
687
static void
688
collectallkeys(NClist* fraglenv, NClist* allkeys)
689
0
{
690
0
    size_t i;
691
    /* collect all the distinct keys */
692
0
    for(i=0;i<nclistlength(fraglenv);i+=2) {
693
0
  char* key = nclistget(fraglenv,i);
694
0
  if(!lcontains(allkeys,key)) {
695
0
      nclistpush(allkeys,key);
696
0
  }
697
0
    }
698
0
}
699
700
/* Given a fragment envv list, coalesce duplicate keys and remove duplicate values*/
701
static int
702
cleanfragments(NClist* fraglenv, NClist* newlist)
703
0
{
704
0
    size_t i;
705
0
    int stat = NC_NOERR;
706
0
    NClist* tmp = NULL;
707
0
    NClist* allkeys = NULL;
708
0
    NCbytes* buf = NULL;
709
0
    char* key = NULL;
710
0
    char* value = NULL;
711
712
0
    buf = ncbytesnew();
713
0
    allkeys = nclistnew();
714
0
    tmp = nclistnew();
715
716
    /* collect all unique keys */
717
0
    collectallkeys(fraglenv,allkeys);
718
    /* Collect all values for same key across all fragment pairs */
719
0
    for(i=0;i<nclistlength(allkeys);i++) {
720
0
  key = nclistget(allkeys,i);
721
0
  collectvaluesbykey(fraglenv,key,tmp);
722
  /* merge the key values, remove duplicate */
723
0
  if((stat=mergelist(&tmp))) goto done;
724
        /* Construct key,value pair and insert into newlist */
725
0
  key = strdup(key);
726
0
  nclistpush(newlist,key);
727
0
  value = list2string(tmp);
728
0
  nclistpush(newlist,value);
729
0
  nclistclear(tmp);
730
0
    }
731
0
done:
732
0
    nclistfree(allkeys);
733
0
    nclistfree(tmp);
734
0
    ncbytesfree(buf);
735
0
    return check(stat);
736
0
}
737
738
/* process non-mode fragment keys in case they hold significance; currently not */
739
static int
740
processfragmentkeys(const char* key, const char* value, NCmodel* model)
741
0
{
742
0
    return NC_NOERR;
743
0
}
744
745
/*
746
Infer from the mode + useparallel
747
only call if iscreate or file is not easily readable.
748
*/
749
static int
750
NC_omodeinfer(int useparallel, int cmode, NCmodel* model)
751
180
{
752
180
    int stat = NC_NOERR;
753
754
    /* If no format flags are set, then use default */
755
180
    if(!fIsSet(cmode,NC_FORMAT_ALL))
756
180
  set_default_mode(&cmode);
757
758
    /* Process the cmode; may override some already set flags. The
759
     * user-defined formats must be checked first. They may choose to
760
     * use some of the other flags, like NC_NETCDF4, so we must first
761
     * check NC_UDF0-NC_UDF9 before checking for any other flag. */
762
180
    int udf_found = 0;
763
    /* Lookup table for all UDF mode flags. This replaces the previous bit-shift
764
     * calculation which was fragile due to non-sequential bit positions
765
     * (bits 16, 19-25 to avoid conflicts with NC_NOATTCREORD and NC_NODIMSCALE_ATTACH). */
766
180
    static const int udf_flags[NC_MAX_UDF_FORMATS] = {
767
180
        NC_UDF0, NC_UDF1, NC_UDF2, NC_UDF3, NC_UDF4,
768
180
        NC_UDF5, NC_UDF6, NC_UDF7, NC_UDF8, NC_UDF9
769
180
    };
770
    /* Check if any UDF format flag is set in the mode */
771
1.98k
    for(int i = 0; i < NC_MAX_UDF_FORMATS; i++) {
772
1.80k
        if(fIsSet(cmode, udf_flags[i])) {
773
            /* Convert array index to format constant (handles gap in numbering) */
774
0
            int formatx = (i <= 1) ? (NC_FORMATX_UDF0 + i) : (NC_FORMATX_UDF2 + i - 2);
775
0
            model->impl = formatx;
776
0
            udf_found = 1;
777
0
            break;
778
0
        }
779
1.80k
    }
780
    
781
180
    if(udf_found)
782
0
    {
783
0
        if(fIsSet(cmode,NC_64BIT_OFFSET)) 
784
0
        {
785
0
            model->format = NC_FORMAT_64BIT_OFFSET;
786
0
        }
787
0
        else if(fIsSet(cmode,NC_64BIT_DATA))
788
0
        {
789
0
            model->format = NC_FORMAT_64BIT_DATA;
790
0
        }
791
0
        else if(fIsSet(cmode,NC_NETCDF4))
792
0
        {
793
0
            if(fIsSet(cmode,NC_CLASSIC_MODEL))
794
0
                model->format = NC_FORMAT_NETCDF4_CLASSIC;
795
0
            else
796
0
                model->format = NC_FORMAT_NETCDF4;
797
0
        }
798
0
        if(! model->format)
799
0
            model->format = NC_FORMAT_CLASSIC;
800
0
  goto done;
801
0
    }
802
803
180
    if(fIsSet(cmode,NC_64BIT_OFFSET)) {
804
0
  model->impl = NC_FORMATX_NC3;
805
0
  model->format = NC_FORMAT_64BIT_OFFSET;
806
0
        goto done;
807
0
    }
808
809
180
    if(fIsSet(cmode,NC_64BIT_DATA)) {
810
0
  model->impl = NC_FORMATX_NC3;
811
0
  model->format = NC_FORMAT_64BIT_DATA;
812
0
        goto done;
813
0
    }
814
815
180
    if(fIsSet(cmode,NC_NETCDF4)) {
816
0
  model->impl = NC_FORMATX_NC4;
817
0
        if(fIsSet(cmode,NC_CLASSIC_MODEL))
818
0
      model->format = NC_FORMAT_NETCDF4_CLASSIC;
819
0
  else
820
0
      model->format = NC_FORMAT_NETCDF4;
821
0
        goto done;
822
0
    }
823
824
    /* Default to classic model */
825
180
    model->format = NC_FORMAT_CLASSIC;
826
180
    model->impl = NC_FORMATX_NC3;
827
828
180
done:
829
    /* Apply parallel flag */
830
180
    if(useparallel) {
831
0
        if(model->impl == NC_FORMATX_NC3)
832
0
      model->impl = NC_FORMATX_PNETCDF;
833
0
    }
834
180
    return check(stat);
835
180
}
836
837
/*
838
If the mode flags do not necessarily specify the
839
format, then default it by adding in appropriate flags.
840
*/
841
842
static void
843
set_default_mode(int* modep)
844
180
{
845
180
    int mode = *modep;
846
180
    int dfaltformat;
847
848
180
    dfaltformat = nc_get_default_format();
849
180
    switch (dfaltformat) {
850
0
    case NC_FORMAT_64BIT_OFFSET: mode |= NC_64BIT_OFFSET; break;
851
0
    case NC_FORMAT_64BIT_DATA: mode |= NC_64BIT_DATA; break;
852
0
    case NC_FORMAT_NETCDF4: mode |= NC_NETCDF4; break;
853
0
    case NC_FORMAT_NETCDF4_CLASSIC: mode |= (NC_NETCDF4|NC_CLASSIC_MODEL); break;
854
180
    case NC_FORMAT_CLASSIC: /* fall thru */
855
180
    default: break; /* default to classic */
856
180
    }
857
180
    *modep = mode; /* final result */
858
180
}
859
860
/**************************************************/
861
/*
862
   Infer model for this dataset using some
863
   combination of cmode, path, and reading the dataset.
864
   See the documentation in docs/internal.dox.
865
866
@param path
867
@param omode
868
@param iscreate
869
@param useparallel
870
@param params
871
@param model
872
@param newpathp
873
*/
874
875
int
876
NC_infermodel(const char* path, int* omodep, int iscreate, int useparallel, void* params, NCmodel* model, char** newpathp)
877
180
{
878
180
    size_t i;
879
180
    int stat = NC_NOERR;
880
180
    NCURI* uri = NULL;
881
180
    int omode = *omodep;
882
180
    NClist* fraglenv = nclistnew();
883
180
    NClist* modeargs = nclistnew();
884
180
    char* sfrag = NULL;
885
180
    const char* modeval = NULL;
886
180
    char* abspath = NULL;
887
180
    NClist* tmp = NULL;
888
889
    /* Phase 1:
890
       1. convert special protocols to http|https
891
       2. begin collecting fragments
892
    */
893
180
    if((stat = processuri(path, &uri, fraglenv))) goto done;
894
895
180
    if(uri != NULL) {
896
#ifdef DEBUG
897
  printlist(fraglenv,"processuri");
898
#endif
899
900
        /* Phase 2: Expand macros and add to fraglenv */
901
0
  nclistfreeall(tmp);
902
0
  tmp = nclistnew();
903
0
        if((stat = processmacros(fraglenv,tmp))) goto done;
904
0
  nclistfreeall(fraglenv);
905
0
  fraglenv = tmp; tmp = NULL;
906
#ifdef DEBUG
907
  printlist(fraglenv,"processmacros");
908
#endif
909
  /* Cleanup the fragment list */
910
0
  nclistfreeall(tmp);
911
0
  tmp = nclistnew();
912
0
        if((stat = cleanfragments(fraglenv,tmp))) goto done;
913
0
  nclistfreeall(fraglenv);
914
0
  fraglenv = tmp; tmp = NULL;
915
916
        /* Phase 2a: Expand mode inferences and add to fraglenv */
917
0
        if((stat = processinferences(fraglenv))) goto done;
918
#ifdef DEBUG
919
  printlist(fraglenv,"processinferences");
920
#endif
921
922
        /* Phase 3: coalesce duplicate fragment keys and remove duplicate values */
923
0
  nclistfreeall(tmp);
924
0
  tmp = nclistnew();
925
0
        if((stat = cleanfragments(fraglenv,tmp))) goto done;
926
0
  nclistfreeall(fraglenv);
927
0
  fraglenv = tmp; tmp = NULL;
928
#ifdef DEBUG
929
  printlist(fraglenv,"cleanfragments");
930
#endif
931
932
        /* Phase 4: Rebuild the url fragment and rebuilt the url */
933
0
        sfrag = envvlist2string(fraglenv,"&");
934
0
        nclistfreeall(fraglenv); fraglenv = NULL;
935
#ifdef DEBUG
936
  fprintf(stderr,"frag final: %s\n",sfrag);
937
#endif
938
0
        ncurisetfragments(uri,sfrag);
939
0
        nullfree(sfrag); sfrag = NULL;
940
941
#ifdef NETCDF_ENABLE_S3
942
  /* If s3, then rebuild the url */
943
  if(NC_iss3(uri,NULL)) {
944
      NCURI* newuri = NULL;
945
      if((stat = NC_s3urlrebuild(uri,NULL,&newuri))) goto done;
946
      ncurifree(uri);
947
      uri = newuri;
948
  } else
949
#endif
950
0
  if(strcmp(uri->protocol,"file")==0) {
951
            /* convert path to absolute */
952
0
      char* canon = NULL;
953
0
      abspath = NCpathabsolute(uri->path);
954
0
      if((stat = NCpathcanonical(abspath,&canon))) goto done;
955
0
      nullfree(abspath);
956
0
      abspath = canon; canon = NULL;
957
0
      if((stat = ncurisetpath(uri,abspath))) goto done;
958
0
  }
959
  
960
  /* rebuild the path */
961
0
        if(newpathp) {
962
0
            *newpathp = ncuribuild(uri,NULL,NULL,NCURIALL);
963
#ifdef DEBUG
964
      fprintf(stderr,"newpath=|%s|\n",*newpathp); fflush(stderr);
965
#endif    
966
0
  }
967
968
        /* Phase 5: Process the mode key to see if we can tell the formatx */
969
0
        modeval = ncurifragmentlookup(uri,"mode");
970
0
        if(modeval != NULL) {
971
0
      if((stat = parseonchar(modeval,',',modeargs))) goto done;
972
0
            for(i=0;i<nclistlength(modeargs);i++) {
973
0
          const char* arg = nclistget(modeargs,i);
974
0
          if((stat=processmodearg(arg,model))) goto done;
975
0
            }
976
0
  }
977
978
        /* Phase 6: Process the non-mode keys to see if we can tell the formatx */
979
0
  if(!modelcomplete(model)) {
980
0
      size_t i;
981
0
      NClist* p = (NClist*)ncurifragmentparams(uri); /* envv format */
982
0
      for(i=0;i<nclistlength(p);i+=2) {
983
0
    const char* key = nclistget(p,0);
984
0
    const char* value = nclistget(p,1);
985
0
    if((stat=processfragmentkeys(key,value,model))) goto done;
986
0
      }
987
0
  }
988
989
        /* Phase 7: Special cases: if this is a URL and model.impl is still not defined */
990
        /* Phase7a: Default is DAP2 */
991
0
        if(!modelcomplete(model)) {
992
0
      model->impl = NC_FORMATX_DAP2;
993
0
      model->format = NC_FORMAT_NC3;
994
0
        }
995
996
180
    } else {/* Not URL */
997
180
  if(newpathp) *newpathp = NULL;
998
180
    }
999
1000
    /* Phase 8: mode inference from mode flags */
1001
    /* The modeargs did not give us a model (probably not a URL).
1002
       So look at the combination of mode flags and the useparallel flag */
1003
180
    if(!modelcomplete(model)) {
1004
180
        if((stat = NC_omodeinfer(useparallel,omode,model))) goto done;
1005
180
    }
1006
1007
    /* Phase 9: Special case for file stored in DAOS container */
1008
180
    if(isdaoscontainer(path) == NC_NOERR) {
1009
        /* This is a DAOS container, so immediately assume it is HDF5. */
1010
0
        model->impl = NC_FORMATX_NC_HDF5;
1011
0
        model->format = NC_FORMAT_NETCDF4;
1012
180
    } else {
1013
        /* Phase 10: Infer from file content, if possible;
1014
           this has highest precedence, so it may override
1015
           previous decisions. Note that we do this last
1016
           because we need previously determined model info
1017
           to guess if this file is readable.
1018
        */
1019
180
        if(!iscreate && isreadable(uri,model)) {
1020
       /* Ok, we need to try to read the file */
1021
180
            if((stat = check_file_type(path, omode, useparallel, params, model, uri))) goto done;
1022
180
        }
1023
180
    }
1024
1025
    /* Need a decision */
1026
155
    if(!modelcomplete(model))
1027
0
  {stat = NC_ENOTNC; goto done;}
1028
1029
    /* Force flag consistency */
1030
155
    switch (model->impl) {
1031
1
    case NC_FORMATX_NC4:
1032
1
    case NC_FORMATX_NC_HDF4:
1033
1
    case NC_FORMATX_DAP4:
1034
1
    case NC_FORMATX_NCZARR:
1035
1
  omode |= NC_NETCDF4;
1036
1
  if(model->format == NC_FORMAT_NETCDF4_CLASSIC)
1037
0
      omode |= NC_CLASSIC_MODEL;
1038
1
  break;
1039
154
    case NC_FORMATX_NC3:
1040
154
  omode &= ~NC_NETCDF4; /* must be netcdf-3 (CDF-1, CDF-2, CDF-5) */
1041
154
  if(model->format == NC_FORMAT_64BIT_OFFSET) omode |= NC_64BIT_OFFSET;
1042
97
  else if(model->format == NC_FORMAT_64BIT_DATA) omode |= NC_64BIT_DATA;
1043
154
  break;
1044
0
    case NC_FORMATX_PNETCDF:
1045
0
  omode &= ~NC_NETCDF4; /* must be netcdf-3 (CDF-1, CDF-2, CDF-5) */
1046
0
  if(model->format == NC_FORMAT_64BIT_OFFSET) omode |= NC_64BIT_OFFSET;
1047
0
  else if(model->format == NC_FORMAT_64BIT_DATA) omode |= NC_64BIT_DATA;
1048
0
  break;
1049
0
    case NC_FORMATX_DAP2:
1050
0
  omode &= ~(NC_NETCDF4|NC_64BIT_OFFSET|NC_64BIT_DATA|NC_CLASSIC_MODEL);
1051
0
  break;
1052
0
    case NC_FORMATX_UDF0:
1053
0
    case NC_FORMATX_UDF1:
1054
0
    case NC_FORMATX_UDF2:
1055
0
    case NC_FORMATX_UDF3:
1056
0
    case NC_FORMATX_UDF4:
1057
0
    case NC_FORMATX_UDF5:
1058
0
    case NC_FORMATX_UDF6:
1059
0
    case NC_FORMATX_UDF7:
1060
0
    case NC_FORMATX_UDF8:
1061
0
    case NC_FORMATX_UDF9:
1062
0
        if(model->format == NC_FORMAT_64BIT_OFFSET) 
1063
0
            omode |= NC_64BIT_OFFSET;
1064
0
        else if(model->format == NC_FORMAT_64BIT_DATA)
1065
0
            omode |= NC_64BIT_DATA;
1066
0
        else if(model->format == NC_FORMAT_NETCDF4)  
1067
0
            omode |= NC_NETCDF4;
1068
0
        else if(model->format == NC_FORMAT_NETCDF4_CLASSIC)  
1069
0
            omode |= NC_NETCDF4|NC_CLASSIC_MODEL;
1070
0
        break;
1071
0
    default:
1072
0
  {stat = NC_ENOTNC; goto done;}
1073
155
    }
1074
1075
180
done:
1076
180
    nullfree(sfrag);
1077
180
    nullfree(abspath);
1078
180
    ncurifree(uri);
1079
180
    nclistfreeall(modeargs);
1080
180
    nclistfreeall(fraglenv);
1081
180
    nclistfreeall(tmp);
1082
180
    *omodep = omode; /* in/out */
1083
180
    return check(stat);
1084
155
}
1085
1086
static int
1087
isreadable(NCURI* uri, NCmodel* model)
1088
180
{
1089
180
    int canread = 0;
1090
180
    struct Readable* r;
1091
    /* Step 1: Look up the implementation */
1092
180
    for(r=readable;r->impl;r++) {
1093
180
  if(model->impl == r->impl) {canread = r->readable; break;}
1094
180
    }
1095
    /* Step 2: check for bytes mode */
1096
180
    if(!canread && NC_testmode(uri,"bytes") && (model->impl == NC_FORMATX_NC4 || model->impl == NC_FORMATX_NC_HDF5))
1097
0
        canread = 1;
1098
180
    return canread;
1099
180
}
1100
1101
#if 0
1102
static char*
1103
emptyify(char* s)
1104
{
1105
    if(s == NULL) s = strdup("");
1106
    return strdup(s);
1107
}
1108
1109
static const char*
1110
nullify(const char* s)
1111
{
1112
    if(s != NULL && strlen(s) == 0)
1113
        return NULL;
1114
    return s;
1115
}
1116
#endif
1117
1118
/**************************************************/
1119
/* Envv list utilities */
1120
1121
static const char*
1122
getmodekey(const NClist* envv)
1123
0
{
1124
0
    size_t i;
1125
    /* Get "mode" entry */
1126
0
    for(i=0;i<nclistlength(envv);i+=2) {
1127
0
  char* key = NULL;
1128
0
  key = nclistget(envv,i);
1129
0
  if(strcasecmp(key,"mode")==0)
1130
0
      return nclistget(envv,i+1);
1131
0
    }
1132
0
    return NULL;
1133
0
}
1134
1135
static int
1136
replacemode(NClist* envv, const char* newval)
1137
0
{
1138
0
    size_t i;
1139
    /* Get "mode" entry */
1140
0
    for(i=0;i<nclistlength(envv);i+=2) {
1141
0
  char* key = NULL;
1142
0
  char* val = NULL;
1143
0
  key = nclistget(envv,i);
1144
0
  if(strcasecmp(key,"mode")==0) {
1145
0
      val = nclistget(envv,i+1);      
1146
0
      nclistset(envv,i+1,strdup(newval));
1147
0
      nullfree(val);
1148
0
      return NC_NOERR;
1149
0
  }
1150
0
    }
1151
0
    return NC_EINVAL;
1152
0
}
1153
1154
static NClist*
1155
parsemode(const char* modeval)
1156
0
{
1157
0
    NClist* modes = nclistnew();
1158
0
    if(modeval)
1159
0
        (void)parseonchar(modeval,',',modes);/* split on commas */
1160
0
    return modes;    
1161
0
}
1162
1163
/* Convert a list into a comma'd string */
1164
static char*
1165
list2string(NClist* list)
1166
0
{
1167
0
    size_t i;
1168
0
    NCbytes* buf = NULL;
1169
0
    char* result = NULL;
1170
1171
0
    if(list == NULL || nclistlength(list)==0) return strdup("");
1172
0
    buf = ncbytesnew();
1173
0
    for(i=0;i<nclistlength(list);i++) {
1174
0
  const char* m = nclistget(list,i);
1175
0
  if(m == NULL || strlen(m) == 0) continue;
1176
0
  if(i > 0) ncbytescat(buf,",");
1177
0
  ncbytescat(buf,m);
1178
0
    }
1179
0
    result = ncbytesextract(buf);
1180
0
    ncbytesfree(buf);
1181
0
    if(result == NULL) result = strdup("");
1182
0
    return result;
1183
0
}
1184
1185
#if 0
1186
/* Given a comma separated string, remove duplicates; mostly used to cleanup mode list */
1187
static char* 
1188
cleancommalist(const char* commalist, int caseinsensitive)
1189
{
1190
    NClist* tmp = nclistnew();
1191
    char* newlist = NULL;
1192
    if(commalist == NULL || strlen(commalist)==0) return nulldup(commalist);
1193
    (void)parseonchar(commalist,',',tmp);/* split on commas */
1194
    cleanstringlist(tmp,caseinsensitive);
1195
    newlist = list2string(tmp);
1196
    nclistfreeall(tmp);
1197
    return newlist;
1198
}
1199
#endif
1200
1201
/* Given a list of strings, remove nulls and duplicated */
1202
static void
1203
cleanstringlist(NClist* strs, int caseinsensitive)
1204
0
{
1205
0
    if(nclistlength(strs) == 0) return;
1206
    /* Remove nulls */
1207
0
    for(size_t i = nclistlength(strs); i-->0;) {
1208
0
        if(nclistget(strs,i)==NULL) nclistremove(strs,i);
1209
0
    }
1210
0
    if(nclistlength(strs) <= 1) return;
1211
    /* Remove duplicates*/
1212
0
    for(size_t i=0;i<nclistlength(strs);i++) {
1213
0
        const char* value = nclistget(strs,i);
1214
        /* look ahead for duplicates */
1215
0
        for(size_t j=nclistlength(strs)-1;j>i;j--) {
1216
0
            int match;
1217
0
            const char* candidate = nclistget(strs,j);
1218
0
            if(caseinsensitive)
1219
0
                match = (strcasecmp(value,candidate) == 0);
1220
0
            else
1221
0
                match = (strcmp(value,candidate) == 0);
1222
0
            if(match) {char* dup = nclistremove(strs,j); nullfree(dup);}
1223
0
        }
1224
0
    }
1225
0
}
1226
1227
1228
/**************************************************/
1229
/**
1230
 * @internal Given an existing file, figure out its format and return
1231
 * that format value (NC_FORMATX_XXX) in model arg. Assume any path
1232
 * conversion was already performed at a higher level.
1233
 *
1234
 * @param path File name.
1235
 * @param flags
1236
 * @param use_parallel
1237
 * @param parameters
1238
 * @param model Pointer that gets the model to use for the dispatch table.
1239
 * @param version Pointer that gets version of the file.
1240
 *
1241
 * @return ::NC_NOERR No error.
1242
 * @author Dennis Heimbigner
1243
*/
1244
static int
1245
check_file_type(const char *path, int omode, int use_parallel,
1246
       void *parameters, NCmodel* model, NCURI* uri)
1247
180
{
1248
180
    char magic[NC_MAX_MAGIC_NUMBER_LEN];
1249
180
    int status = NC_NOERR;
1250
180
    struct MagicFile magicinfo;
1251
#ifdef _WIN32
1252
    NC* nc = NULL;
1253
#endif
1254
1255
180
    memset((void*)&magicinfo,0,sizeof(magicinfo));
1256
1257
#ifdef _WIN32 /* including MINGW */
1258
    /* Windows does not handle multiple handles to the same file very well.
1259
       So if file is already open/created, then find it and just get the
1260
       model from that. */
1261
    if((nc = find_in_NCList_by_name(path)) != NULL) {
1262
  int format = 0;
1263
  /* Get the model from this NC */
1264
  if((status = nc_inq_format_extended(nc->ext_ncid,&format,NULL))) goto done;
1265
  model->impl = format;
1266
  if((status = nc_inq_format(nc->ext_ncid,&format))) goto done;
1267
  model->format = format;
1268
  goto done;
1269
    }
1270
#endif
1271
1272
180
    magicinfo.path = path; /* do not free */
1273
180
    magicinfo.uri = uri; /* do not free */
1274
180
    magicinfo.omode = omode;
1275
180
    magicinfo.model = model; /* do not free */
1276
180
    magicinfo.parameters = parameters; /* do not free */
1277
#ifdef USE_STDIO
1278
    magicinfo.use_parallel = 0;
1279
#else
1280
180
    magicinfo.use_parallel = use_parallel;
1281
180
#endif
1282
1283
180
    if((status = openmagic(&magicinfo))) goto done;
1284
1285
    /* Verify we have a large enough file */
1286
180
    if(MAGIC_NUMBER_LEN >= (unsigned long long)magicinfo.filelen)
1287
0
  {status = NC_ENOTNC; goto done;}
1288
180
    if((status = readmagic(&magicinfo,0L,magic)) != NC_NOERR) {
1289
0
  status = NC_ENOTNC;
1290
0
  goto done;
1291
0
    }
1292
1293
    /* Look at the magic number */
1294
180
    if(NC_interpret_magic_number(magic,model) == NC_NOERR
1295
154
  && model->format != 0) {
1296
154
        if (use_parallel && (model->format == NC_FORMAT_NC3 || model->impl == NC_FORMATX_NC3))
1297
            /* this is called from nc_open_par() and file is classic */
1298
0
            model->impl = NC_FORMATX_PNETCDF;
1299
154
        goto done; /* found something */
1300
154
    }
1301
1302
    /* Remaining case when implementation is an HDF5 file;
1303
       search forward at starting at 512
1304
       and doubling to see if we have HDF5 magic number */
1305
26
    {
1306
26
  size_t pos = 512L;
1307
242
        for(;;) {
1308
242
      if((pos+MAGIC_NUMBER_LEN) > (unsigned long long)magicinfo.filelen)
1309
25
    {status = NC_ENOTNC; goto done;}
1310
217
            if((status = readmagic(&magicinfo,pos,magic)) != NC_NOERR)
1311
0
          {status = NC_ENOTNC; goto done; }
1312
217
            NC_interpret_magic_number(magic,model);
1313
217
            if(model->impl == NC_FORMATX_NC4) break;
1314
      /* double and try again */
1315
216
      pos = 2*pos;
1316
216
        }
1317
26
    }
1318
180
done:
1319
180
    closemagic(&magicinfo);
1320
180
    return check(status);
1321
26
}
1322
1323
/**
1324
\internal
1325
\ingroup datasets
1326
Provide open, read and close for use when searching for magic numbers
1327
*/
1328
static int
1329
openmagic(struct MagicFile* file)
1330
180
{
1331
180
    int status = NC_NOERR;
1332
180
    if(fIsSet(file->omode,NC_INMEMORY)) {
1333
  /* Get its length */
1334
180
  NC_memio* meminfo = (NC_memio*)file->parameters;
1335
180
        assert(meminfo != NULL);
1336
180
  file->filelen = (long long)meminfo->size;
1337
180
  goto done;
1338
180
    }
1339
0
    if(file->uri != NULL) {
1340
#ifdef NETCDF_ENABLE_BYTERANGE
1341
  /* Construct a URL minus any fragment */
1342
        file->curlurl = ncuribuild(file->uri,NULL,NULL,NCURISVC);
1343
  /* Open the curl handle */
1344
        if((status=nc_http_open(file->path, &file->state))) goto done;
1345
  if((status=nc_http_size(file->state,&file->filelen))) goto done;
1346
#else /*!BYTERANGE*/
1347
0
  {status = NC_ENOTBUILT;}
1348
0
#endif /*BYTERANGE*/
1349
0
  goto done;
1350
0
    } 
1351
#ifdef USE_PARALLEL
1352
    if (file->use_parallel) {
1353
  int retval;
1354
  MPI_Offset size;
1355
        assert(file->parameters != NULL);
1356
  if((retval = MPI_File_open(((NC_MPI_INFO*)file->parameters)->comm,
1357
                                   (char*)file->path,MPI_MODE_RDONLY,
1358
                                   ((NC_MPI_INFO*)file->parameters)->info,
1359
                                   &file->fh)) != MPI_SUCCESS) {
1360
#ifdef MPI_ERR_NO_SUCH_FILE
1361
      int errorclass;
1362
      MPI_Error_class(retval, &errorclass);
1363
      if (errorclass == MPI_ERR_NO_SUCH_FILE)
1364
#ifdef NC_ENOENT
1365
          status = NC_ENOENT;
1366
#else /*!NC_ENOENT*/
1367
    status = errno;
1368
#endif /*NC_ENOENT*/
1369
      else
1370
#endif /*MPI_ERR_NO_SUCH_FILE*/
1371
          status = NC_EPARINIT;
1372
      file->fh = MPI_FILE_NULL;
1373
      goto done;
1374
  }
1375
  /* Get its length */
1376
  if((retval=MPI_File_get_size(file->fh, &size)) != MPI_SUCCESS)
1377
      {status = NC_EPARINIT; goto done;}
1378
  file->filelen = (long long)size;
1379
  goto done;
1380
    }
1381
#endif /* USE_PARALLEL */
1382
0
    {
1383
0
        if (file->path == NULL || strlen(file->path) == 0)
1384
0
            {status = NC_EINVAL; goto done;}
1385
0
        file->fp = NCfopen(file->path, "r");
1386
0
        if(file->fp == NULL)
1387
0
      {status = errno; goto done;}
1388
  /* Get its length */
1389
0
  {
1390
0
      int fd = fileno(file->fp);
1391
#ifdef _WIN32
1392
      __int64 len64 = _filelengthi64(fd);
1393
      if(len64 < 0)
1394
    {status = errno; goto done;}
1395
      file->filelen = (long long)len64;
1396
#else
1397
0
      off_t size;
1398
0
      size = lseek(fd, 0, SEEK_END);
1399
0
      if(size == -1)
1400
0
    {status = errno; goto done;}
1401
0
    file->filelen = (long long)size;
1402
0
#endif
1403
0
  }
1404
0
        int retval2 = fseek(file->fp, 0L, SEEK_SET);        
1405
0
      if(retval2 != 0)
1406
0
    {status = errno; goto done;}
1407
0
    }
1408
180
done:
1409
180
    return check(status);
1410
0
}
1411
1412
static int
1413
readmagic(struct MagicFile* file, size_t pos, char* magic)
1414
397
{
1415
397
    int status = NC_NOERR;
1416
397
    NCbytes* buf = ncbytesnew();
1417
1418
397
    memset(magic,0,MAGIC_NUMBER_LEN);
1419
397
    if(fIsSet(file->omode,NC_INMEMORY)) {
1420
397
  char* mempos;
1421
397
  NC_memio* meminfo = (NC_memio*)file->parameters;
1422
397
  if((pos + MAGIC_NUMBER_LEN) > meminfo->size)
1423
0
      {status = NC_EINMEMORY; goto done;}
1424
397
  mempos = ((char*)meminfo->memory) + pos;
1425
397
  memcpy((void*)magic,mempos,MAGIC_NUMBER_LEN);
1426
#ifdef DEBUG
1427
  printmagic("XXX: readmagic",magic,file);
1428
#endif
1429
397
    } else if(file->uri != NULL) {
1430
#ifdef NETCDF_ENABLE_BYTERANGE
1431
        size64_t start = (size64_t)pos;
1432
        size64_t count = MAGIC_NUMBER_LEN;
1433
        status = nc_http_read(file->state, start, count, buf);
1434
        if (status == NC_NOERR) {
1435
            if (ncbyteslength(buf) != count)
1436
                status = NC_EINVAL;
1437
            else
1438
                memcpy(magic, ncbytescontents(buf), count);
1439
        }
1440
#endif
1441
0
    } else {
1442
#ifdef USE_PARALLEL
1443
        if (file->use_parallel) {
1444
      MPI_Status mstatus;
1445
      int retval;
1446
      if((retval = MPI_File_read_at_all(file->fh, pos, magic,
1447
          MAGIC_NUMBER_LEN, MPI_CHAR, &mstatus)) != MPI_SUCCESS)
1448
          {status = NC_EPARINIT; goto done;}
1449
        }
1450
        else
1451
#endif /* USE_PARALLEL */
1452
0
        { /* Ordinary read */
1453
0
            long i;
1454
0
            i = fseek(file->fp, (long)pos, SEEK_SET);
1455
0
            if (i < 0) { status = errno; goto done; }
1456
0
            ncbytessetlength(buf, 0);
1457
0
            if ((status = NC_readfileF(file->fp, buf, MAGIC_NUMBER_LEN))) goto done;
1458
0
            memcpy(magic, ncbytescontents(buf), MAGIC_NUMBER_LEN);
1459
0
        }
1460
0
    }
1461
1462
397
done:
1463
397
    ncbytesfree(buf);
1464
397
    if(file && file->fp) clearerr(file->fp);
1465
397
    return check(status);
1466
397
}
1467
1468
/**
1469
 * Close the file opened to check for magic number.
1470
 *
1471
 * @param file pointer to the MagicFile struct for this open file.
1472
 * @returns NC_NOERR for success
1473
 * @returns NC_EPARINIT if there was a problem closing file with MPI
1474
 * (parallel builds only).
1475
 * @author Dennis Heimbigner
1476
 */
1477
static int
1478
closemagic(struct MagicFile* file)
1479
180
{
1480
180
    int status = NC_NOERR;
1481
1482
180
    if(fIsSet(file->omode,NC_INMEMORY)) {
1483
  /* noop */
1484
180
    } else if(file->uri != NULL) {
1485
#ifdef NETCDF_ENABLE_BYTERANGE
1486
      status = nc_http_close(file->state);
1487
#endif
1488
0
      nullfree(file->curlurl);
1489
0
    } else {
1490
#ifdef USE_PARALLEL
1491
        if (file->use_parallel) {
1492
      int retval;
1493
      if(file->fh != MPI_FILE_NULL
1494
         && (retval = MPI_File_close(&file->fh)) != MPI_SUCCESS)
1495
        {status = NC_EPARINIT; return status;}
1496
        } else
1497
#endif
1498
0
        {
1499
0
      if(file->fp) fclose(file->fp);
1500
0
        }
1501
0
    }
1502
180
    return status;
1503
180
}
1504
1505
/*!
1506
  Interpret the magic number found in the header of a netCDF file.
1507
  This function interprets the magic number/string contained in the header of a netCDF file and sets the appropriate NC_FORMATX flags.
1508
1509
  @param[in] magic Pointer to a character array with the magic number block.
1510
  @param[out] model Pointer to an integer to hold the corresponding netCDF type.
1511
  @param[out] version Pointer to an integer to hold the corresponding netCDF version.
1512
  @returns NC_NOERR if a legitimate file type found
1513
  @returns NC_ENOTNC otherwise
1514
1515
\internal
1516
\ingroup datasets
1517
1518
*/
1519
static int
1520
NC_interpret_magic_number(char* magic, NCmodel* model)
1521
397
{
1522
397
    int status = NC_NOERR;
1523
397
    int tmpimpl = 0;
1524
    /* Look at the magic number - save any UDF format on entry */
1525
397
    if(model->impl >= NC_FORMATX_UDF0 && model->impl <= NC_FORMATX_UDF1)
1526
0
        tmpimpl = model->impl;
1527
397
    else if(model->impl >= NC_FORMATX_UDF2 && model->impl <= NC_FORMATX_UDF9)
1528
0
        tmpimpl = model->impl;
1529
1530
    /* Use the complete magic number string for HDF5 */
1531
397
    if(memcmp(magic,HDF5_SIGNATURE,sizeof(HDF5_SIGNATURE))==0) {
1532
1
  model->impl = NC_FORMATX_NC4;
1533
1
  model->format = NC_FORMAT_NETCDF4;
1534
1
  goto done;
1535
1
    }
1536
396
    if(magic[0] == '\016' && magic[1] == '\003'
1537
16
              && magic[2] == '\023' && magic[3] == '\001') {
1538
0
  model->impl = NC_FORMATX_NC_HDF4;
1539
0
  model->format = NC_FORMAT_NETCDF4;
1540
0
  goto done;
1541
0
    }
1542
396
    if(magic[0] == 'C' && magic[1] == 'D' && magic[2] == 'F') {
1543
192
        if(magic[3] == '\001') {
1544
62
      model->impl = NC_FORMATX_NC3;
1545
62
      model->format = NC_FORMAT_CLASSIC;
1546
62
      goto done;
1547
62
  }
1548
130
        if(magic[3] == '\002') {
1549
65
      model->impl = NC_FORMATX_NC3;
1550
65
      model->format = NC_FORMAT_64BIT_OFFSET;
1551
65
      goto done;
1552
65
        }
1553
65
        if(magic[3] == '\005') {
1554
49
    model->impl = NC_FORMATX_NC3;
1555
49
    model->format = NC_FORMAT_64BIT_DATA;
1556
49
    goto done;
1557
49
  }
1558
65
     }
1559
     /* No match  */
1560
220
     if (!tmpimpl) 
1561
220
         status = NC_ENOTNC;         
1562
1563
220
     goto done;
1564
1565
397
done:
1566
     /* if model->impl was any UDF format (0-9) on entry, make it so on exit */
1567
397
     if(tmpimpl)
1568
0
         model->impl = tmpimpl;
1569
     /* if this is a UDF magic_number update the model->impl */
1570
4.36k
     for(int i = 0; i < NC_MAX_UDF_FORMATS; i++) {
1571
3.97k
         if (strlen(UDF_magic_numbers[i]) && !strncmp(UDF_magic_numbers[i], magic,
1572
0
                                                       strlen(UDF_magic_numbers[i])))
1573
0
         {
1574
0
             int formatx = (i <= 1) ? (NC_FORMATX_UDF0 + i) : (NC_FORMATX_UDF2 + i - 2);
1575
0
             model->impl = formatx;
1576
0
             status = NC_NOERR;
1577
0
             break;
1578
0
         }
1579
3.97k
     }    
1580
1581
397
     return check(status);
1582
396
}
1583
1584
/* Define macros to wrap getxattr and listxattrcalls */
1585
#ifdef __APPLE__
1586
#define GETXATTR(path,p,xvalue,xlen) getxattr(path, p, xvalue, (size_t)xlen, 0, 0);
1587
#define LISTXATTR(path,xlist,xlen) listxattr(path, xlist, (size_t)xlen, 0)
1588
#else
1589
#define GETXATTR(path,p,xvalue,xlen) getxattr(path, p, xvalue, (size_t)xlen);
1590
#define LISTXATTR(path,xlist,xlen) listxattr(path, xlist, (size_t)xlen)
1591
#endif
1592
1593
/* Return NC_NOERR if path is a DAOS container; return NC_EXXX otherwise */
1594
static int
1595
isdaoscontainer(const char* path)
1596
180
{
1597
180
    int stat = NC_ENOTNC; /* default is that this is not a DAOS container */
1598
180
#ifndef _WIN32
1599
#ifdef USE_HDF5
1600
#if H5_VERSION_GE(1,12,0)
1601
    htri_t accessible;
1602
    hid_t fapl_id;
1603
    int rc;
1604
    /* Check for a DAOS container */
1605
    if((fapl_id = H5Pcreate(H5P_FILE_ACCESS)) < 0) {stat = NC_EHDFERR; goto done;}
1606
    H5Pset_fapl_sec2(fapl_id);
1607
    accessible = H5Fis_accessible(path, fapl_id);
1608
    H5Pclose(fapl_id); /* Ignore any error */
1609
    rc = 0;
1610
    if(accessible > 0) {
1611
#ifdef HAVE_SYS_XATTR_H
1612
  ssize_t xlen;
1613
  xlen = LISTXATTR(path,NULL,0);
1614
        if(xlen > 0) {
1615
        char* xlist = NULL;
1616
      char* xvalue = NULL;
1617
      char* p;
1618
      char* endp;
1619
      if((xlist = (char*)calloc(1,(size_t)xlen))==NULL)
1620
    {stat = NC_ENOMEM; goto done;}
1621
      (void)LISTXATTR(path,xlist,xlen);
1622
      p = xlist; endp = p + xlen; /* delimit names */
1623
      /* walk the list of xattr names */
1624
      for(;p < endp;p += (strlen(p)+1)) {
1625
    /* The popen version looks for the string ".daos";
1626
                   It would be nice if we know whether that occurred
1627
       int the xattr's name or it value.
1628
       Oh well, we will do the general search */
1629
    /* Look for '.daos' in the key */
1630
    if(strstr(p,".daos") != NULL) {rc = 1; break;} /* success */
1631
    /* Else get the p'th xattr's value size */
1632
    xlen = GETXATTR(path,p,NULL,0);
1633
    if((xvalue = (char*)calloc(1,(size_t)xlen))==NULL)
1634
        {stat = NC_ENOMEM; goto done;}
1635
    /* Read the value */
1636
    (void)GETXATTR(path,p,xvalue,xlen);
1637
    /* Look for '.daos' in the value */
1638
    if(strstr(xvalue,".daos") != NULL) {rc = 1; break;} /* success */
1639
      }
1640
        }
1641
#else /*!HAVE_SYS_XATTR_H*/
1642
1643
#ifdef HAVE_GETFATTR
1644
  {
1645
      FILE *fp;
1646
      char cmd[4096];
1647
      memset(cmd,0,sizeof(cmd));
1648
      snprintf(cmd,sizeof(cmd),"getfattr \"%s\" | grep -c '.daos'",path);
1649
      fp = popen(cmd, "r");
1650
      if(fp != NULL) {
1651
        fscanf(fp, "%d", &rc);
1652
        pclose(fp);
1653
      } else {
1654
        rc = 0; /* Cannot test; assume not DAOS */
1655
      }
1656
  }
1657
    }
1658
#else /*!HAVE_GETFATTR*/
1659
    /* We just can't test for DAOS container.*/
1660
    rc = 0;
1661
#endif /*HAVE_GETFATTR*/
1662
#endif /*HAVE_SYS_XATTR_H*/
1663
    }
1664
    /* Test for DAOS container */
1665
    stat = (rc == 1 ? NC_NOERR : NC_ENOTNC);
1666
done:
1667
#endif
1668
#endif
1669
180
#endif
1670
    errno = 0; /* reset */
1671
180
    return stat;
1672
180
}
1673
1674
#ifdef DEBUG
1675
static void
1676
printmagic(const char* tag, char* magic, struct MagicFile* f)
1677
{
1678
    int i;
1679
    fprintf(stderr,"%s: ispar=%d magic=",tag,f->use_parallel);
1680
    for(i=0;i<MAGIC_NUMBER_LEN;i++) {
1681
        unsigned int c = (unsigned int)magic[i];
1682
  c = c & 0x000000FF;
1683
  if(c == '\n')
1684
      fprintf(stderr," 0x%0x/'\\n'",c);
1685
  else if(c == '\r')
1686
      fprintf(stderr," 0x%0x/'\\r'",c);
1687
  else if(c < ' ')
1688
      fprintf(stderr," 0x%0x/'?'",c);
1689
  else
1690
      fprintf(stderr," 0x%0x/'%c'",c,c);
1691
    }
1692
    fprintf(stderr,"\n");
1693
    fflush(stderr);
1694
}
1695
1696
static void
1697
printlist(NClist* list, const char* tag)
1698
{
1699
    int i;
1700
    fprintf(stderr,"%s:",tag);
1701
    for(i=0;i<nclistlength(list);i++) {
1702
        fprintf(stderr," %s",(char*)nclistget(list,i));
1703
  fprintf(stderr,"[%p]",(char*)nclistget(list,i));
1704
    }
1705
    fprintf(stderr,"\n");
1706
    dbgflush();
1707
}
1708
1709
1710
#endif