Coverage Report

Created: 2026-02-26 06:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/netcdf-c/libdispatch/dinfermodel.c
Line
Count
Source
1
/**
2
 * @file
3
 *
4
 * Infer as much as possible from the omode + path.
5
 * Rewrite the path to a canonical form.
6
 *
7
 * Copyright 2018 University Corporation for Atmospheric
8
 * Research/Unidata. See COPYRIGHT file for more info.
9
*/
10
#include "config.h"
11
#include <stddef.h>
12
#include <stdlib.h>
13
#include <string.h>
14
#ifdef HAVE_UNISTD_H
15
#include <unistd.h>
16
#endif
17
#ifdef HAVE_SYS_TYPES_H
18
#include <sys/types.h>
19
#endif
20
#ifndef _WIN32
21
#ifdef USE_HDF5
22
#include <hdf5.h>
23
#endif /* USE_HDF5 */
24
#endif /* _WIN32 */
25
#ifdef HAVE_SYS_XATTR_H
26
#include <sys/xattr.h>
27
#endif
28
29
#include "ncdispatch.h"
30
#include "ncpathmgr.h"
31
#include "netcdf_mem.h"
32
#include "fbits.h"
33
#include "ncbytes.h"
34
#include "nclist.h"
35
#include "nclog.h"
36
#include "nchttp.h"
37
#include "ncutil.h"
38
#ifdef NETCDF_ENABLE_S3
39
#include "ncs3sdk.h"
40
#endif
41
42
#ifndef nulldup
43
 #define nulldup(x) ((x)?strdup(x):(x))
44
#endif
45
46
#undef DEBUG
47
48
/* If Defined, then use only stdio for all magic number io;
49
   otherwise use stdio or mpio as required.
50
 */
51
#undef USE_STDIO
52
53
/**
54
Sort info for open/read/close of
55
file when searching for magic numbers
56
*/
57
struct MagicFile {
58
    const char* path;
59
    struct NCURI* uri;
60
    int omode;
61
    NCmodel* model;
62
    long long filelen;
63
    int use_parallel;
64
    int iss3;
65
    void* parameters; /* !NULL if inmemory && !diskless */
66
    FILE* fp;
67
#ifdef USE_PARALLEL
68
    MPI_File fh;
69
#endif
70
    char* curlurl; /* url to use with CURLOPT_SET_URL */
71
    NC_HTTP_STATE* state;
72
#ifdef NETCDF_ENABLE_S3
73
    NCS3INFO s3;
74
    void* s3client;
75
    char* errmsg;
76
#endif
77
};
78
79
/** @internal Magic number for HDF5 files. To be consistent with
80
 * H5Fis_hdf5, use the complete HDF5 magic number */
81
static char HDF5_SIGNATURE[MAGIC_NUMBER_LEN] = "\211HDF\r\n\032\n";
82
83
209
#define modelcomplete(model) ((model)->impl != 0)
84
85
#ifdef DEBUG
86
static void dbgflush(void)
87
{
88
    fflush(stdout);
89
    fflush(stderr);
90
}
91
92
static void
93
fail(int err)
94
{
95
    return;
96
}
97
98
static int
99
check(int err)
100
{
101
    if(err != NC_NOERR)
102
  fail(err);
103
    return err;
104
}
105
#else
106
1.08k
#define check(err) (err)
107
#endif
108
109
/*
110
Define a table of "mode=" string values
111
from which the implementation can be inferred.
112
Note that only cases that can currently
113
take URLs are included.
114
*/
115
static struct FORMATMODES {
116
    const char* tag;
117
    const int impl; /* NC_FORMATX_XXX value */
118
    const int format; /* NC_FORMAT_XXX value */
119
} formatmodes[] = {
120
{"dap2",NC_FORMATX_DAP2,NC_FORMAT_CLASSIC},
121
{"dap4",NC_FORMATX_DAP4,NC_FORMAT_NETCDF4},
122
{"netcdf-3",NC_FORMATX_NC3,0}, /* Might be e.g. cdf5 */
123
{"classic",NC_FORMATX_NC3,0}, /* ditto */
124
{"netcdf-4",NC_FORMATX_NC4,NC_FORMAT_NETCDF4},
125
{"enhanced",NC_FORMATX_NC4,NC_FORMAT_NETCDF4},
126
{"udf0",NC_FORMATX_UDF0,0},
127
{"udf1",NC_FORMATX_UDF1,0},
128
{"udf2",NC_FORMATX_UDF2,0},
129
{"udf3",NC_FORMATX_UDF3,0},
130
{"udf4",NC_FORMATX_UDF4,0},
131
{"udf5",NC_FORMATX_UDF5,0},
132
{"udf6",NC_FORMATX_UDF6,0},
133
{"udf7",NC_FORMATX_UDF7,0},
134
{"udf8",NC_FORMATX_UDF8,0},
135
{"udf9",NC_FORMATX_UDF9,0},
136
{"nczarr",NC_FORMATX_NCZARR,NC_FORMAT_NETCDF4},
137
{"zarr",NC_FORMATX_NCZARR,NC_FORMAT_NETCDF4},
138
{"bytes",NC_FORMATX_NC4,NC_FORMAT_NETCDF4}, /* temporary until 3 vs 4 is determined */
139
{NULL,0},
140
};
141
142
/* Replace top-level name with defkey=defvalue */
143
static const struct MACRODEF {
144
    char* name;
145
    char* defkey;
146
    char* defvalues[4];
147
} macrodefs[] = {
148
{"zarr","mode",{"nczarr","zarr",NULL}},
149
{"dap2","mode",{"dap2",NULL}},
150
{"dap4","mode",{"dap4",NULL}},
151
{"s3","mode",{"s3","nczarr",NULL}},
152
{"bytes","mode",{"bytes",NULL}},
153
{"xarray","mode",{"zarr", NULL}},
154
{"noxarray","mode",{"nczarr", "noxarray", NULL}},
155
{"zarr","mode",{"nczarr","zarr", NULL}},
156
{"gs3","mode",{"gs3","nczarr",NULL}}, /* Google S3 API */
157
{NULL,NULL,{NULL}}
158
};
159
160
/*
161
Mode inferences: if mode contains key value, then add the inferred value;
162
Warning: be careful how this list is constructed to avoid infinite inferences.
163
In order to (mostly) avoid that consequence, any attempt to
164
infer a value that is already present will be ignored.
165
This effectively means that the inference graph
166
must be a DAG and may not have cycles.
167
You have been warned.
168
*/
169
static const struct MODEINFER {
170
    char* key;
171
    char* inference;
172
} modeinferences[] = {
173
{"zarr","nczarr"},
174
{"xarray","zarr"},
175
{"noxarray","nczarr"},
176
{"noxarray","zarr"},
177
{NULL,NULL}
178
};
179
180
/* Mode negations: if mode contains key, then remove all occurrences of the inference and repeat */
181
static const struct MODEINFER modenegations[] = {
182
{"bytes","nczarr"}, /* bytes negates (nc)zarr */
183
{"bytes","zarr"},
184
{"noxarray","xarray"},
185
{NULL,NULL}
186
};
187
188
/* Map FORMATX to readability to get magic number */
189
static struct Readable {
190
    int impl;
191
    int readable;
192
} readable[] = {
193
{NC_FORMATX_NC3,1},
194
{NC_FORMATX_NC_HDF5,1},
195
{NC_FORMATX_NC_HDF4,1},
196
{NC_FORMATX_PNETCDF,1},
197
{NC_FORMATX_DAP2,0},
198
{NC_FORMATX_DAP4,0},
199
{NC_FORMATX_UDF0,1},
200
{NC_FORMATX_UDF1,1},
201
{NC_FORMATX_UDF2,1},
202
{NC_FORMATX_UDF3,1},
203
{NC_FORMATX_UDF4,1},
204
{NC_FORMATX_UDF5,1},
205
{NC_FORMATX_UDF6,1},
206
{NC_FORMATX_UDF7,1},
207
{NC_FORMATX_UDF8,1},
208
{NC_FORMATX_UDF9,1},
209
{NC_FORMATX_NCZARR,0}, /* eventually make readable */
210
{0,0},
211
};
212
213
/* Define the known URL protocols and their interpretation */
214
static struct NCPROTOCOLLIST {
215
    const char* protocol;
216
    const char* substitute;
217
    const char* fragments; /* arbitrary fragment arguments */
218
} ncprotolist[] = {
219
    {"http",NULL,NULL},
220
    {"https",NULL,NULL},
221
    {"file",NULL,NULL},
222
    {"dods","http","mode=dap2"},
223
    {"dap4","http","mode=dap4"},
224
    {"s3","s3","mode=s3"},
225
    {"gs3","gs3","mode=gs3"},
226
    {NULL,NULL,NULL} /* Terminate search */
227
};
228
229
/* Forward */
230
static int NC_omodeinfer(int useparallel, int omode, NCmodel*);
231
static int check_file_type(const char *path, int omode, int use_parallel, void *parameters, NCmodel* model, NCURI* uri);
232
static int processuri(const char* path, NCURI** urip, NClist* fraglist);
233
static int processmacros(NClist* fraglistp, NClist* expanded);
234
static char* envvlist2string(NClist* pairs, const char*);
235
static void set_default_mode(int* cmodep);
236
static int parseonchar(const char* s, int ch, NClist* segments);
237
static int mergelist(NClist** valuesp);
238
239
static int openmagic(struct MagicFile* file);
240
static int readmagic(struct MagicFile* file, size_t pos, char* magic);
241
static int closemagic(struct MagicFile* file);
242
static int NC_interpret_magic_number(char* magic, NCmodel* model);
243
#ifdef DEBUG
244
static void printmagic(const char* tag, char* magic,struct MagicFile*);
245
static void printlist(NClist* list, const char* tag);
246
#endif
247
static int isreadable(NCURI*,NCmodel*);
248
static char* list2string(NClist*);
249
static int parsepair(const char* pair, char** keyp, char** valuep);
250
static NClist* parsemode(const char* modeval);
251
static const char* getmodekey(const NClist* envv);
252
static int replacemode(NClist* envv, const char* newval);
253
static void infernext(NClist* current, NClist* next);
254
static int negateone(const char* mode, NClist* modes);
255
static void cleanstringlist(NClist* strs, int caseinsensitive);
256
static int isdaoscontainer(const char* path);
257
258
/*
259
If the path looks like a URL, then parse it, reformat it.
260
*/
261
static int
262
processuri(const char* path, NCURI** urip, NClist* fraglenv)
263
112
{
264
112
    int stat = NC_NOERR;
265
112
    int found = 0;
266
112
    NClist* tmp = NULL;
267
112
    struct NCPROTOCOLLIST* protolist;
268
112
    NCURI* uri = NULL;
269
112
    size_t pathlen = strlen(path);
270
112
    char* str = NULL;
271
112
    const NClist* ufrags;
272
112
    size_t i;
273
274
112
    if(path == NULL || pathlen == 0) {stat = NC_EURL; goto done;}
275
276
    /* Defaults */
277
112
    if(urip) *urip = NULL;
278
279
112
    ncuriparse(path,&uri);
280
112
    if(uri == NULL) goto done; /* not url */
281
282
    /* Look up the protocol */
283
0
    for(found=0,protolist=ncprotolist;protolist->protocol;protolist++) {
284
0
        if(strcmp(uri->protocol,protolist->protocol) == 0) {
285
0
      found = 1;
286
0
      break;
287
0
  }
288
0
    }
289
0
    if(!found)
290
0
  {stat = NC_EINVAL; goto done;} /* unrecognized URL form */
291
292
    /* process the corresponding fragments for that protocol */
293
0
    if(protolist->fragments != NULL) {
294
0
  tmp = nclistnew();
295
0
  if((stat = parseonchar(protolist->fragments,'&',tmp))) goto done;
296
0
  for(i=0;i<nclistlength(tmp);i++) {
297
0
      char* key=NULL;
298
0
          char* value=NULL;
299
0
      if((stat = parsepair(nclistget(tmp,i),&key,&value))) goto done;
300
0
      if(value == NULL) value = strdup("");
301
0
      nclistpush(fraglenv,key);
302
0
          nclistpush(fraglenv,value);
303
0
  }
304
0
  nclistfreeall(tmp); tmp = NULL;
305
0
    }
306
307
    /* Substitute the protocol in any case */
308
0
    if(protolist->substitute) ncurisetprotocol(uri,protolist->substitute);
309
310
    /* capture the fragments of the url */
311
0
    ufrags = (const NClist*)ncurifragmentparams(uri);
312
0
    for(i=0;i<nclistlength(ufrags);i+=2) {
313
0
  const char* key = nclistget(ufrags,i);
314
0
  const char* value = nclistget(ufrags,i+1);
315
0
        nclistpush(fraglenv,nulldup(key));
316
0
  value = (value==NULL?"":value);
317
0
  nclistpush(fraglenv,strdup(value));
318
0
    }
319
0
    if(urip) {
320
0
  *urip = uri;
321
0
  uri = NULL;
322
0
    }
323
324
112
done:
325
112
    nclistfreeall(tmp);
326
112
    nullfree(str);
327
112
    if(uri != NULL) ncurifree(uri);
328
112
    return check(stat);
329
0
}
330
331
/* Split a key=value pair */
332
static int
333
parsepair(const char* pair, char** keyp, char** valuep)
334
0
{
335
0
    const char* p;
336
0
    char* key = NULL;
337
0
    char* value = NULL;
338
339
0
    if(pair == NULL)
340
0
        return NC_EINVAL; /* empty pair */
341
0
    if(pair[0] == '\0' || pair[0] == '=')
342
0
        return NC_EINVAL; /* no key */
343
0
    p = strchr(pair,'=');
344
0
    if(p == NULL) {
345
0
  value = NULL;
346
0
  key = strdup(pair);
347
0
    } else {
348
0
  ptrdiff_t len = (p-pair);
349
0
  if((key = malloc((size_t)len+1))==NULL) return NC_ENOMEM;
350
0
  memcpy(key,pair,(size_t)len);
351
0
  key[len] = '\0';
352
0
  if(p[1] == '\0')
353
0
      value = NULL;
354
0
  else
355
0
      value = strdup(p+1);
356
0
    }
357
0
    if(keyp) {*keyp = key; key = NULL;};
358
0
    if(valuep) {*valuep = value; value = NULL;};
359
0
    nullfree(key);
360
0
    nullfree(value);
361
0
    return NC_NOERR;
362
0
}
363
364
#if 0
365
static int
366
parseurlmode(const char* modestr, NClist* list)
367
{
368
    int stat = NC_NOERR;
369
    const char* p = NULL;
370
    const char* endp = NULL;
371
372
    if(modestr == NULL || *modestr == '\0') goto done;
373
374
    /* Split modestr at the commas or EOL */
375
    p = modestr;
376
    for(;;) {
377
  char* s;
378
  ptrdiff_t slen;
379
  endp = strchr(p,',');
380
  if(endp == NULL) endp = p + strlen(p);
381
  slen = (endp - p);
382
  if((s = malloc(slen+1)) == NULL) {stat = NC_ENOMEM; goto done;}
383
  memcpy(s,p,slen);
384
  s[slen] = '\0';
385
  nclistpush(list,s);
386
  if(*endp == '\0') break;
387
  p = endp+1;
388
    }
389
390
done:
391
    return check(stat);
392
}
393
#endif
394
395
/* Split a string at a given char */
396
static int
397
parseonchar(const char* s, int ch, NClist* segments)
398
0
{
399
0
    int stat = NC_NOERR;
400
0
    const char* p = NULL;
401
0
    const char* endp = NULL;
402
403
0
    if(s == NULL || *s == '\0') goto done;
404
405
0
    p = s;
406
0
    for(;;) {
407
0
  char* q;
408
0
  ptrdiff_t slen;
409
0
  endp = strchr(p,ch);
410
0
  if(endp == NULL) endp = p + strlen(p);
411
0
  slen = (endp - p);
412
0
  if((q = malloc((size_t)slen+1)) == NULL) {stat = NC_ENOMEM; goto done;}
413
0
  memcpy(q,p,(size_t)slen);
414
0
  q[slen] = '\0';
415
0
  nclistpush(segments,q);
416
0
  if(*endp == '\0') break;
417
0
  p = endp+1;
418
0
    }
419
420
0
done:
421
0
    return check(stat);
422
0
}
423
424
/* Convert a key,value envv pairlist into a delimited string*/
425
static char*
426
envvlist2string(NClist* envv, const char* delim)
427
0
{
428
0
    size_t i;
429
0
    NCbytes* buf = NULL;
430
0
    char* result = NULL;
431
432
0
    if(envv == NULL || nclistlength(envv) == 0) return NULL;
433
0
    buf = ncbytesnew();
434
0
    for(i=0;i<nclistlength(envv);i+=2) {
435
0
  const char* key = nclistget(envv,i);
436
0
  const char* val = nclistget(envv,i+1);
437
0
  if(key == NULL || strlen(key) == 0) continue;
438
0
  assert(val != NULL);
439
0
  if(i > 0) ncbytescat(buf,"&");
440
0
  ncbytescat(buf,key);
441
0
  if(val != NULL && val[0] != '\0') {
442
0
      ncbytescat(buf,"=");
443
0
      ncbytescat(buf,val);
444
0
  }
445
0
    }
446
0
    result = ncbytesextract(buf);
447
0
    ncbytesfree(buf);
448
0
    return result;
449
0
}
450
451
/* Given a mode= argument, fill in the impl */
452
static int
453
processmodearg(const char* arg, NCmodel* model)
454
0
{
455
0
    int stat = NC_NOERR;
456
0
    struct FORMATMODES* format = formatmodes;
457
0
    for(;format->tag;format++) {
458
0
  if(strcmp(format->tag,arg)==0) {
459
0
            model->impl = format->impl;
460
0
      if(format->format != 0) model->format = format->format;
461
0
  }
462
0
    }
463
0
    return check(stat);
464
0
}
465
466
/* Given an envv fragment list, do macro replacement */
467
static int
468
processmacros(NClist* fraglenv, NClist* expanded)
469
0
{
470
0
    size_t i;
471
0
    int stat = NC_NOERR;
472
0
    const struct MACRODEF* macros = NULL;
473
474
0
    for(i=0;i<nclistlength(fraglenv);i+=2) {
475
0
  int found = 0;
476
0
  char* key = nclistget(fraglenv,i);
477
0
  char* value = nclistget(fraglenv,i+1);
478
0
  if(strlen(value) == 0) { /* must be a singleton  */
479
0
            for(macros=macrodefs;macros->name;macros++) {
480
0
                if(strcmp(macros->name,key)==0) {
481
0
        char* const * p;
482
0
        nclistpush(expanded,strdup(macros->defkey));
483
0
        for(p=macros->defvalues;*p;p++) 
484
0
      nclistpush(expanded,strdup(*p));
485
0
        found = 1;        
486
0
        break;
487
0
          }
488
0
      }
489
0
  }
490
0
  if(!found) {/* pass thru */
491
0
      nclistpush(expanded,strdup(key));
492
0
          nclistpush(expanded,strdup(value));
493
0
  }
494
0
    }
495
496
0
    return check(stat);
497
0
}
498
499
/* Process mode flag inferences */
500
static int
501
processinferences(NClist* fraglenv)
502
0
{
503
0
    int stat = NC_NOERR;
504
0
    const char* modeval = NULL;
505
0
    NClist* newmodes = nclistnew();
506
0
    NClist* currentmodes = NULL;
507
0
    NClist* nextmodes = nclistnew();
508
0
    size_t i;
509
0
    char* newmodeval = NULL;
510
511
    /* Get "mode" entry */
512
0
    if((modeval = getmodekey(fraglenv))==NULL) goto done;
513
514
    /* Get the mode as list */
515
0
    currentmodes = parsemode(modeval);
516
517
#ifdef DEBUG
518
    printlist(currentmodes,"processinferences: initial mode list");
519
#endif
520
521
    /* Do what amounts to breadth first inferencing down the inference DAG. */
522
523
0
    for(;;) {
524
0
        NClist* tmp = NULL;
525
        /* Compute the next set of inferred modes */
526
#ifdef DEBUG
527
printlist(currentmodes,"processinferences: current mode list");
528
#endif
529
0
        infernext(currentmodes,nextmodes);
530
#ifdef DEBUG
531
printlist(nextmodes,"processinferences: next mode list");
532
#endif
533
        /* move current modes into list of newmodes */
534
0
        for(i=0;i<nclistlength(currentmodes);i++) {
535
0
      nclistpush(newmodes,nclistget(currentmodes,i));
536
0
  }
537
0
        nclistsetlength(currentmodes,0); /* clear current mode list */
538
0
        if(nclistlength(nextmodes) == 0) break; /* nothing more to do */
539
#ifdef DEBUG
540
printlist(newmodes,"processinferences: new mode list");
541
#endif
542
  /* Swap current and next */
543
0
        tmp = currentmodes;
544
0
  currentmodes = nextmodes;
545
0
  nextmodes = tmp;
546
0
        tmp = NULL;
547
0
    }
548
    /* cleanup any unused elements in currentmodes */
549
0
    nclistclearall(currentmodes);
550
551
    /* Ensure no duplicates */
552
0
    cleanstringlist(newmodes,1);
553
554
#ifdef DEBUG
555
    printlist(newmodes,"processinferences: final inferred mode list");
556
#endif
557
558
   /* Remove negative inferences */
559
0
   for(i=0;i<nclistlength(newmodes);i++) {
560
0
  const char* mode = nclistget(newmodes,i);
561
0
  negateone(mode,newmodes);
562
0
    }
563
564
    /* Store new mode value */
565
0
    if((newmodeval = list2string(newmodes))== NULL)
566
0
  {stat = NC_ENOMEM; goto done;}        
567
0
    if((stat=replacemode(fraglenv,newmodeval))) goto done;
568
0
    modeval = NULL;
569
570
0
done:
571
0
    nullfree(newmodeval);
572
0
    nclistfreeall(newmodes);
573
0
    nclistfreeall(currentmodes);
574
0
    nclistfreeall(nextmodes);
575
0
    return check(stat);
576
0
}
577
578
579
static int
580
negateone(const char* mode, NClist* newmodes)
581
0
{
582
0
    const struct MODEINFER* tests = modenegations;
583
0
    int changed = 0;
584
0
    for(;tests->key;tests++) {
585
0
  if(strcasecmp(tests->key,mode)==0) {
586
      /* Find and remove all instances of the inference value */
587
0
      for(size_t i = nclistlength(newmodes); i-- > 0;) {
588
0
    char* candidate = nclistget(newmodes,i);
589
0
    if(strcasecmp(candidate,tests->inference)==0) {
590
0
        nclistremove(newmodes,i);
591
0
        nullfree(candidate);
592
0
              changed = 1;
593
0
    }
594
0
      }
595
0
        }
596
0
    }
597
0
    return changed;
598
0
}
599
600
static void
601
infernext(NClist* current, NClist* next)
602
0
{
603
0
    size_t i;
604
0
    for(i=0;i<nclistlength(current);i++) {
605
0
        const struct MODEINFER* tests = NULL;
606
0
  const char* cur = nclistget(current,i);
607
0
        for(tests=modeinferences;tests->key;tests++) {
608
0
      if(strcasecmp(tests->key,cur)==0) {
609
          /* Append the inferred mode unless dup */
610
0
    if(!nclistmatch(next,tests->inference,1))
611
0
              nclistpush(next,strdup(tests->inference));
612
0
      }
613
0
        }
614
0
    }
615
0
}
616
617
/*
618
Given a list of strings, remove nulls and duplicates
619
*/
620
static int
621
mergelist(NClist** valuesp)
622
0
{
623
0
    size_t i,j;
624
0
    int stat = NC_NOERR;
625
0
    NClist* values = *valuesp;
626
0
    NClist* allvalues = nclistnew();
627
0
    NClist* newvalues = nclistnew();
628
0
    char* value = NULL;
629
630
0
    for(i=0;i<nclistlength(values);i++) {
631
0
  char* val1 = nclistget(values,i);
632
  /* split on commas and put pieces into allvalues */
633
0
  if((stat=parseonchar(val1,',',allvalues))) goto done;
634
0
    }
635
    /* Remove duplicates and "" */
636
0
    while(nclistlength(allvalues) > 0) {
637
0
  value = nclistremove(allvalues,0);
638
0
  if(strlen(value) == 0) {
639
0
      nullfree(value); value = NULL;
640
0
  } else {
641
0
      for(j=0;j<nclistlength(newvalues);j++) {
642
0
          char* candidate = nclistget(newvalues,j);
643
0
          if(strcasecmp(candidate,value)==0)
644
0
              {nullfree(value); value = NULL; break;}
645
0
       }
646
0
  }
647
0
  if(value != NULL) {nclistpush(newvalues,value); value = NULL;}
648
0
    }
649
    /* Make sure to have at least 1 value */
650
0
    if(nclistlength(newvalues)==0) nclistpush(newvalues,strdup(""));
651
0
    *valuesp = values; values = NULL;
652
653
0
done:
654
0
    nclistfree(allvalues);
655
0
    nclistfreeall(values);
656
0
    nclistfreeall(newvalues);
657
0
    return check(stat);
658
0
}
659
660
static int
661
lcontains(NClist* l, const char* key0)
662
0
{
663
0
    size_t i;
664
0
    for(i=0;i<nclistlength(l);i++) {
665
0
        const char* key1 = nclistget(l,i);
666
0
  if(strcasecmp(key0,key1)==0) return 1;
667
0
    }
668
0
    return 0;
669
0
}
670
671
/* Warning values should not use nclistfreeall */
672
static void
673
collectvaluesbykey(NClist* fraglenv, const char* key, NClist* values)
674
0
{
675
0
    size_t i;
676
    /* collect all the values with the same key (including this one) */
677
0
    for(i=0;i<nclistlength(fraglenv);i+=2) {
678
0
        const char* key2 = nclistget(fraglenv,i);
679
0
        if(strcasecmp(key,key2)==0) {
680
0
      const char* value2 = nclistget(fraglenv,i+1);
681
0
      nclistpush(values,value2); value2 = NULL;
682
0
  }
683
0
    }
684
0
}
685
686
/* Warning allkeys should not use nclistfreeall */
687
static void
688
collectallkeys(NClist* fraglenv, NClist* allkeys)
689
0
{
690
0
    size_t i;
691
    /* collect all the distinct keys */
692
0
    for(i=0;i<nclistlength(fraglenv);i+=2) {
693
0
  char* key = nclistget(fraglenv,i);
694
0
  if(!lcontains(allkeys,key)) {
695
0
      nclistpush(allkeys,key);
696
0
  }
697
0
    }
698
0
}
699
700
/* Given a fragment envv list, coalesce duplicate keys and remove duplicate values*/
701
static int
702
cleanfragments(NClist* fraglenv, NClist* newlist)
703
0
{
704
0
    size_t i;
705
0
    int stat = NC_NOERR;
706
0
    NClist* tmp = NULL;
707
0
    NClist* allkeys = NULL;
708
0
    NCbytes* buf = NULL;
709
0
    char* key = NULL;
710
0
    char* value = NULL;
711
712
0
    buf = ncbytesnew();
713
0
    allkeys = nclistnew();
714
0
    tmp = nclistnew();
715
716
    /* collect all unique keys */
717
0
    collectallkeys(fraglenv,allkeys);
718
    /* Collect all values for same key across all fragment pairs */
719
0
    for(i=0;i<nclistlength(allkeys);i++) {
720
0
  key = nclistget(allkeys,i);
721
0
  collectvaluesbykey(fraglenv,key,tmp);
722
  /* merge the key values, remove duplicate */
723
0
  if((stat=mergelist(&tmp))) goto done;
724
        /* Construct key,value pair and insert into newlist */
725
0
  key = strdup(key);
726
0
  nclistpush(newlist,key);
727
0
  value = list2string(tmp);
728
0
  nclistpush(newlist,value);
729
0
  nclistclear(tmp);
730
0
    }
731
0
done:
732
0
    nclistfree(allkeys);
733
0
    nclistfree(tmp);
734
0
    ncbytesfree(buf);
735
0
    return check(stat);
736
0
}
737
738
/* process non-mode fragment keys in case they hold significance; currently not */
739
static int
740
processfragmentkeys(const char* key, const char* value, NCmodel* model)
741
0
{
742
0
    return NC_NOERR;
743
0
}
744
745
/*
746
Infer from the mode + useparallel
747
only call if iscreate or file is not easily readable.
748
*/
749
static int
750
NC_omodeinfer(int useparallel, int cmode, NCmodel* model)
751
112
{
752
112
    int stat = NC_NOERR;
753
754
    /* If no format flags are set, then use default */
755
112
    if(!fIsSet(cmode,NC_FORMAT_ALL))
756
112
  set_default_mode(&cmode);
757
758
    /* Process the cmode; may override some already set flags. The
759
     * user-defined formats must be checked first. They may choose to
760
     * use some of the other flags, like NC_NETCDF4, so we must first
761
     * check NC_UDF0-NC_UDF9 before checking for any other flag. */
762
112
    int udf_found = 0;
763
    /* Lookup table for all UDF mode flags. This replaces the previous bit-shift
764
     * calculation which was fragile due to non-sequential bit positions
765
     * (bits 16, 19-25 to avoid conflicts with NC_NOATTCREORD and NC_NODIMSCALE_ATTACH). */
766
112
    static const int udf_flags[NC_MAX_UDF_FORMATS] = {
767
112
        NC_UDF0, NC_UDF1, NC_UDF2, NC_UDF3, NC_UDF4,
768
112
        NC_UDF5, NC_UDF6, NC_UDF7, NC_UDF8, NC_UDF9
769
112
    };
770
    /* Check if any UDF format flag is set in the mode */
771
1.23k
    for(int i = 0; i < NC_MAX_UDF_FORMATS; i++) {
772
1.12k
        if(fIsSet(cmode, udf_flags[i])) {
773
            /* Convert array index to format constant (handles gap in numbering) */
774
0
            int formatx = (i <= 1) ? (NC_FORMATX_UDF0 + i) : (NC_FORMATX_UDF2 + i - 2);
775
0
            model->impl = formatx;
776
0
            udf_found = 1;
777
0
            break;
778
0
        }
779
1.12k
    }
780
    
781
112
    if(udf_found)
782
0
    {
783
0
        if(fIsSet(cmode,NC_64BIT_OFFSET)) 
784
0
        {
785
0
            model->format = NC_FORMAT_64BIT_OFFSET;
786
0
        }
787
0
        else if(fIsSet(cmode,NC_64BIT_DATA))
788
0
        {
789
0
            model->format = NC_FORMAT_64BIT_DATA;
790
0
        }
791
0
        else if(fIsSet(cmode,NC_NETCDF4))
792
0
        {
793
0
            if(fIsSet(cmode,NC_CLASSIC_MODEL))
794
0
                model->format = NC_FORMAT_NETCDF4_CLASSIC;
795
0
            else
796
0
                model->format = NC_FORMAT_NETCDF4;
797
0
        }
798
0
        if(! model->format)
799
0
            model->format = NC_FORMAT_CLASSIC;
800
0
  goto done;
801
0
    }
802
803
112
    if(fIsSet(cmode,NC_64BIT_OFFSET)) {
804
0
  model->impl = NC_FORMATX_NC3;
805
0
  model->format = NC_FORMAT_64BIT_OFFSET;
806
0
        goto done;
807
0
    }
808
809
112
    if(fIsSet(cmode,NC_64BIT_DATA)) {
810
0
  model->impl = NC_FORMATX_NC3;
811
0
  model->format = NC_FORMAT_64BIT_DATA;
812
0
        goto done;
813
0
    }
814
815
112
    if(fIsSet(cmode,NC_NETCDF4)) {
816
0
  model->impl = NC_FORMATX_NC4;
817
0
        if(fIsSet(cmode,NC_CLASSIC_MODEL))
818
0
      model->format = NC_FORMAT_NETCDF4_CLASSIC;
819
0
  else
820
0
      model->format = NC_FORMAT_NETCDF4;
821
0
        goto done;
822
0
    }
823
824
    /* Default to classic model */
825
112
    model->format = NC_FORMAT_CLASSIC;
826
112
    model->impl = NC_FORMATX_NC3;
827
828
112
done:
829
    /* Apply parallel flag */
830
112
    if(useparallel) {
831
0
        if(model->impl == NC_FORMATX_NC3)
832
0
      model->impl = NC_FORMATX_PNETCDF;
833
0
    }
834
112
    return check(stat);
835
112
}
836
837
/*
838
If the mode flags do not necessarily specify the
839
format, then default it by adding in appropriate flags.
840
*/
841
842
static void
843
set_default_mode(int* modep)
844
112
{
845
112
    int mode = *modep;
846
112
    int dfaltformat;
847
848
112
    dfaltformat = nc_get_default_format();
849
112
    switch (dfaltformat) {
850
0
    case NC_FORMAT_64BIT_OFFSET: mode |= NC_64BIT_OFFSET; break;
851
0
    case NC_FORMAT_64BIT_DATA: mode |= NC_64BIT_DATA; break;
852
0
    case NC_FORMAT_NETCDF4: mode |= NC_NETCDF4; break;
853
0
    case NC_FORMAT_NETCDF4_CLASSIC: mode |= (NC_NETCDF4|NC_CLASSIC_MODEL); break;
854
112
    case NC_FORMAT_CLASSIC: /* fall thru */
855
112
    default: break; /* default to classic */
856
112
    }
857
112
    *modep = mode; /* final result */
858
112
}
859
860
/**************************************************/
861
/*
862
   Infer model for this dataset using some
863
   combination of cmode, path, and reading the dataset.
864
   See the documentation in docs/internal.dox.
865
866
@param path
867
@param omode
868
@param iscreate
869
@param useparallel
870
@param params
871
@param model
872
@param newpathp
873
*/
874
875
int
876
NC_infermodel(const char* path, int* omodep, int iscreate, int useparallel, void* params, NCmodel* model, char** newpathp)
877
112
{
878
112
    size_t i;
879
112
    int stat = NC_NOERR;
880
112
    NCURI* uri = NULL;
881
112
    int omode = *omodep;
882
112
    NClist* fraglenv = nclistnew();
883
112
    NClist* modeargs = nclistnew();
884
112
    char* sfrag = NULL;
885
112
    const char* modeval = NULL;
886
112
    char* abspath = NULL;
887
112
    NClist* tmp = NULL;
888
889
    /* Phase 1:
890
       1. convert special protocols to http|https
891
       2. begin collecting fragments
892
    */
893
112
    if((stat = processuri(path, &uri, fraglenv))) goto done;
894
895
112
    if(uri != NULL) {
896
#ifdef DEBUG
897
  printlist(fraglenv,"processuri");
898
#endif
899
900
        /* Phase 2: Expand macros and add to fraglenv */
901
0
  nclistfreeall(tmp);
902
0
  tmp = nclistnew();
903
0
        if((stat = processmacros(fraglenv,tmp))) goto done;
904
0
  nclistfreeall(fraglenv);
905
0
  fraglenv = tmp; tmp = NULL;
906
#ifdef DEBUG
907
  printlist(fraglenv,"processmacros");
908
#endif
909
  /* Cleanup the fragment list */
910
0
  nclistfreeall(tmp);
911
0
  tmp = nclistnew();
912
0
        if((stat = cleanfragments(fraglenv,tmp))) goto done;
913
0
  nclistfreeall(fraglenv);
914
0
  fraglenv = tmp; tmp = NULL;
915
916
        /* Phase 2a: Expand mode inferences and add to fraglenv */
917
0
        if((stat = processinferences(fraglenv))) goto done;
918
#ifdef DEBUG
919
  printlist(fraglenv,"processinferences");
920
#endif
921
922
        /* Phase 3: coalesce duplicate fragment keys and remove duplicate values */
923
0
  nclistfreeall(tmp);
924
0
  tmp = nclistnew();
925
0
        if((stat = cleanfragments(fraglenv,tmp))) goto done;
926
0
  nclistfreeall(fraglenv);
927
0
  fraglenv = tmp; tmp = NULL;
928
#ifdef DEBUG
929
  printlist(fraglenv,"cleanfragments");
930
#endif
931
932
        /* Phase 4: Rebuild the url fragment and rebuilt the url */
933
0
        sfrag = envvlist2string(fraglenv,"&");
934
0
        nclistfreeall(fraglenv); fraglenv = NULL;
935
#ifdef DEBUG
936
  fprintf(stderr,"frag final: %s\n",sfrag);
937
#endif
938
0
        ncurisetfragments(uri,sfrag);
939
0
        nullfree(sfrag); sfrag = NULL;
940
941
#ifdef NETCDF_ENABLE_S3
942
  /* If s3, then rebuild the url */
943
  if(NC_iss3(uri,NULL)) {
944
      NCURI* newuri = NULL;
945
      if((stat = NC_s3urlrebuild(uri,NULL,&newuri))) goto done;
946
      ncurifree(uri);
947
      uri = newuri;
948
  } else
949
#endif
950
0
  if(strcmp(uri->protocol,"file")==0) {
951
            /* convert path to absolute */
952
0
      char* canon = NULL;
953
0
      abspath = NCpathabsolute(uri->path);
954
0
      if((stat = NCpathcanonical(abspath,&canon))) goto done;
955
0
      nullfree(abspath);
956
0
      abspath = canon; canon = NULL;
957
0
      if((stat = ncurisetpath(uri,abspath))) goto done;
958
0
  }
959
  
960
  /* rebuild the path */
961
0
        if(newpathp) {
962
0
            *newpathp = ncuribuild(uri,NULL,NULL,NCURIALL);
963
#ifdef DEBUG
964
      fprintf(stderr,"newpath=|%s|\n",*newpathp); fflush(stderr);
965
#endif    
966
0
  }
967
968
        /* Phase 5: Process the mode key to see if we can tell the formatx */
969
0
        modeval = ncurifragmentlookup(uri,"mode");
970
0
        if(modeval != NULL) {
971
0
      if((stat = parseonchar(modeval,',',modeargs))) goto done;
972
0
            for(i=0;i<nclistlength(modeargs);i++) {
973
0
          const char* arg = nclistget(modeargs,i);
974
0
          if((stat=processmodearg(arg,model))) goto done;
975
0
            }
976
0
  }
977
978
        /* Phase 6: Process the non-mode keys to see if we can tell the formatx */
979
0
  if(!modelcomplete(model)) {
980
0
      size_t i;
981
0
      NClist* p = (NClist*)ncurifragmentparams(uri); /* envv format */
982
0
      for(i=0;i<nclistlength(p);i+=2) {
983
0
    const char* key = nclistget(p,0);
984
0
    const char* value = nclistget(p,1);
985
0
    if((stat=processfragmentkeys(key,value,model))) goto done;
986
0
      }
987
0
  }
988
989
        /* Phase 7: Special cases: if this is a URL and model.impl is still not defined */
990
        /* Phase7a: Default is DAP2 */
991
0
        if(!modelcomplete(model)) {
992
0
      model->impl = NC_FORMATX_DAP2;
993
0
      model->format = NC_FORMAT_NC3;
994
0
        }
995
996
112
    } else {/* Not URL */
997
112
  if(newpathp) *newpathp = NULL;
998
112
    }
999
1000
    /* Phase 8: mode inference from mode flags */
1001
    /* The modeargs did not give us a model (probably not a URL).
1002
       So look at the combination of mode flags and the useparallel flag */
1003
112
    if(!modelcomplete(model)) {
1004
112
        if((stat = NC_omodeinfer(useparallel,omode,model))) goto done;
1005
112
    }
1006
1007
    /* Phase 9: Special case for file stored in DAOS container */
1008
112
    if(isdaoscontainer(path) == NC_NOERR) {
1009
        /* This is a DAOS container, so immediately assume it is HDF5. */
1010
0
        model->impl = NC_FORMATX_NC_HDF5;
1011
0
        model->format = NC_FORMAT_NETCDF4;
1012
112
    } else {
1013
        /* Phase 10: Infer from file content, if possible;
1014
           this has highest precedence, so it may override
1015
           previous decisions. Note that we do this last
1016
           because we need previously determined model info
1017
           to guess if this file is readable.
1018
        */
1019
112
        if(!iscreate && isreadable(uri,model)) {
1020
       /* Ok, we need to try to read the file */
1021
112
            if((stat = check_file_type(path, omode, useparallel, params, model, uri))) goto done;
1022
112
        }
1023
112
    }
1024
1025
    /* Need a decision */
1026
97
    if(!modelcomplete(model))
1027
0
  {stat = NC_ENOTNC; goto done;}
1028
1029
    /* Force flag consistency */
1030
97
    switch (model->impl) {
1031
0
    case NC_FORMATX_NC4:
1032
0
    case NC_FORMATX_NC_HDF4:
1033
0
    case NC_FORMATX_DAP4:
1034
0
    case NC_FORMATX_NCZARR:
1035
0
  omode |= NC_NETCDF4;
1036
0
  if(model->format == NC_FORMAT_NETCDF4_CLASSIC)
1037
0
      omode |= NC_CLASSIC_MODEL;
1038
0
  break;
1039
97
    case NC_FORMATX_NC3:
1040
97
  omode &= ~NC_NETCDF4; /* must be netcdf-3 (CDF-1, CDF-2, CDF-5) */
1041
97
  if(model->format == NC_FORMAT_64BIT_OFFSET) omode |= NC_64BIT_OFFSET;
1042
60
  else if(model->format == NC_FORMAT_64BIT_DATA) omode |= NC_64BIT_DATA;
1043
97
  break;
1044
0
    case NC_FORMATX_PNETCDF:
1045
0
  omode &= ~NC_NETCDF4; /* must be netcdf-3 (CDF-1, CDF-2, CDF-5) */
1046
0
  if(model->format == NC_FORMAT_64BIT_OFFSET) omode |= NC_64BIT_OFFSET;
1047
0
  else if(model->format == NC_FORMAT_64BIT_DATA) omode |= NC_64BIT_DATA;
1048
0
  break;
1049
0
    case NC_FORMATX_DAP2:
1050
0
  omode &= ~(NC_NETCDF4|NC_64BIT_OFFSET|NC_64BIT_DATA|NC_CLASSIC_MODEL);
1051
0
  break;
1052
0
    case NC_FORMATX_UDF0:
1053
0
    case NC_FORMATX_UDF1:
1054
0
    case NC_FORMATX_UDF2:
1055
0
    case NC_FORMATX_UDF3:
1056
0
    case NC_FORMATX_UDF4:
1057
0
    case NC_FORMATX_UDF5:
1058
0
    case NC_FORMATX_UDF6:
1059
0
    case NC_FORMATX_UDF7:
1060
0
    case NC_FORMATX_UDF8:
1061
0
    case NC_FORMATX_UDF9:
1062
0
        if(model->format == NC_FORMAT_64BIT_OFFSET) 
1063
0
            omode |= NC_64BIT_OFFSET;
1064
0
        else if(model->format == NC_FORMAT_64BIT_DATA)
1065
0
            omode |= NC_64BIT_DATA;
1066
0
        else if(model->format == NC_FORMAT_NETCDF4)  
1067
0
            omode |= NC_NETCDF4;
1068
0
        else if(model->format == NC_FORMAT_NETCDF4_CLASSIC)  
1069
0
            omode |= NC_NETCDF4|NC_CLASSIC_MODEL;
1070
0
        break;
1071
0
    default:
1072
0
  {stat = NC_ENOTNC; goto done;}
1073
97
    }
1074
1075
112
done:
1076
112
    nullfree(sfrag);
1077
112
    nullfree(abspath);
1078
112
    ncurifree(uri);
1079
112
    nclistfreeall(modeargs);
1080
112
    nclistfreeall(fraglenv);
1081
112
    nclistfreeall(tmp);
1082
112
    *omodep = omode; /* in/out */
1083
112
    return check(stat);
1084
97
}
1085
1086
static int
1087
isreadable(NCURI* uri, NCmodel* model)
1088
112
{
1089
112
    int canread = 0;
1090
112
    struct Readable* r;
1091
    /* Step 1: Look up the implementation */
1092
112
    for(r=readable;r->impl;r++) {
1093
112
  if(model->impl == r->impl) {canread = r->readable; break;}
1094
112
    }
1095
    /* Step 2: check for bytes mode */
1096
112
    if(!canread && NC_testmode(uri,"bytes") && (model->impl == NC_FORMATX_NC4 || model->impl == NC_FORMATX_NC_HDF5))
1097
0
        canread = 1;
1098
112
    return canread;
1099
112
}
1100
1101
#if 0
1102
static char*
1103
emptyify(char* s)
1104
{
1105
    if(s == NULL) s = strdup("");
1106
    return strdup(s);
1107
}
1108
1109
static const char*
1110
nullify(const char* s)
1111
{
1112
    if(s != NULL && strlen(s) == 0)
1113
        return NULL;
1114
    return s;
1115
}
1116
#endif
1117
1118
/**************************************************/
1119
/* Envv list utilities */
1120
1121
static const char*
1122
getmodekey(const NClist* envv)
1123
0
{
1124
0
    size_t i;
1125
    /* Get "mode" entry */
1126
0
    for(i=0;i<nclistlength(envv);i+=2) {
1127
0
  char* key = NULL;
1128
0
  key = nclistget(envv,i);
1129
0
  if(strcasecmp(key,"mode")==0)
1130
0
      return nclistget(envv,i+1);
1131
0
    }
1132
0
    return NULL;
1133
0
}
1134
1135
static int
1136
replacemode(NClist* envv, const char* newval)
1137
0
{
1138
0
    size_t i;
1139
    /* Get "mode" entry */
1140
0
    for(i=0;i<nclistlength(envv);i+=2) {
1141
0
  char* key = NULL;
1142
0
  char* val = NULL;
1143
0
  key = nclistget(envv,i);
1144
0
  if(strcasecmp(key,"mode")==0) {
1145
0
      val = nclistget(envv,i+1);      
1146
0
      nclistset(envv,i+1,strdup(newval));
1147
0
      nullfree(val);
1148
0
      return NC_NOERR;
1149
0
  }
1150
0
    }
1151
0
    return NC_EINVAL;
1152
0
}
1153
1154
static NClist*
1155
parsemode(const char* modeval)
1156
0
{
1157
0
    NClist* modes = nclistnew();
1158
0
    if(modeval)
1159
0
        (void)parseonchar(modeval,',',modes);/* split on commas */
1160
0
    return modes;    
1161
0
}
1162
1163
/* Convert a list into a comma'd string */
1164
static char*
1165
list2string(NClist* list)
1166
0
{
1167
0
    size_t i;
1168
0
    NCbytes* buf = NULL;
1169
0
    char* result = NULL;
1170
1171
0
    if(list == NULL || nclistlength(list)==0) return strdup("");
1172
0
    buf = ncbytesnew();
1173
0
    for(i=0;i<nclistlength(list);i++) {
1174
0
  const char* m = nclistget(list,i);
1175
0
  if(m == NULL || strlen(m) == 0) continue;
1176
0
  if(i > 0) ncbytescat(buf,",");
1177
0
  ncbytescat(buf,m);
1178
0
    }
1179
0
    result = ncbytesextract(buf);
1180
0
    ncbytesfree(buf);
1181
0
    if(result == NULL) result = strdup("");
1182
0
    return result;
1183
0
}
1184
1185
#if 0
1186
/* Given a comma separated string, remove duplicates; mostly used to cleanup mode list */
1187
static char* 
1188
cleancommalist(const char* commalist, int caseinsensitive)
1189
{
1190
    NClist* tmp = nclistnew();
1191
    char* newlist = NULL;
1192
    if(commalist == NULL || strlen(commalist)==0) return nulldup(commalist);
1193
    (void)parseonchar(commalist,',',tmp);/* split on commas */
1194
    cleanstringlist(tmp,caseinsensitive);
1195
    newlist = list2string(tmp);
1196
    nclistfreeall(tmp);
1197
    return newlist;
1198
}
1199
#endif
1200
1201
/* Given a list of strings, remove nulls and duplicated */
1202
static void
1203
cleanstringlist(NClist* strs, int caseinsensitive)
1204
0
{
1205
0
    if(nclistlength(strs) == 0) return;
1206
    /* Remove nulls */
1207
0
    for(size_t i = nclistlength(strs); i-->0;) {
1208
0
        if(nclistget(strs,i)==NULL) nclistremove(strs,i);
1209
0
    }
1210
0
    if(nclistlength(strs) <= 1) return;
1211
    /* Remove duplicates*/
1212
0
    for(size_t i=0;i<nclistlength(strs);i++) {
1213
0
        const char* value = nclistget(strs,i);
1214
        /* look ahead for duplicates */
1215
0
        for(size_t j=nclistlength(strs)-1;j>i;j--) {
1216
0
            int match;
1217
0
            const char* candidate = nclistget(strs,j);
1218
0
            if(caseinsensitive)
1219
0
                match = (strcasecmp(value,candidate) == 0);
1220
0
            else
1221
0
                match = (strcmp(value,candidate) == 0);
1222
0
            if(match) {char* dup = nclistremove(strs,j); nullfree(dup);}
1223
0
        }
1224
0
    }
1225
0
}
1226
1227
1228
/**************************************************/
1229
/**
1230
 * @internal Given an existing file, figure out its format and return
1231
 * that format value (NC_FORMATX_XXX) in model arg. Assume any path
1232
 * conversion was already performed at a higher level.
1233
 *
1234
 * @param path File name.
1235
 * @param flags
1236
 * @param use_parallel
1237
 * @param parameters
1238
 * @param model Pointer that gets the model to use for the dispatch table.
1239
 * @param version Pointer that gets version of the file.
1240
 *
1241
 * @return ::NC_NOERR No error.
1242
 * @author Dennis Heimbigner
1243
*/
1244
static int
1245
check_file_type(const char *path, int omode, int use_parallel,
1246
       void *parameters, NCmodel* model, NCURI* uri)
1247
112
{
1248
112
    char magic[NC_MAX_MAGIC_NUMBER_LEN];
1249
112
    int status = NC_NOERR;
1250
112
    struct MagicFile magicinfo;
1251
#ifdef _WIN32
1252
    NC* nc = NULL;
1253
#endif
1254
1255
112
    memset((void*)&magicinfo,0,sizeof(magicinfo));
1256
1257
#ifdef _WIN32 /* including MINGW */
1258
    /* Windows does not handle multiple handles to the same file very well.
1259
       So if file is already open/created, then find it and just get the
1260
       model from that. */
1261
    if((nc = find_in_NCList_by_name(path)) != NULL) {
1262
  int format = 0;
1263
  /* Get the model from this NC */
1264
  if((status = nc_inq_format_extended(nc->ext_ncid,&format,NULL))) goto done;
1265
  model->impl = format;
1266
  if((status = nc_inq_format(nc->ext_ncid,&format))) goto done;
1267
  model->format = format;
1268
  goto done;
1269
    }
1270
#endif
1271
1272
112
    magicinfo.path = path; /* do not free */
1273
112
    magicinfo.uri = uri; /* do not free */
1274
112
    magicinfo.omode = omode;
1275
112
    magicinfo.model = model; /* do not free */
1276
112
    magicinfo.parameters = parameters; /* do not free */
1277
#ifdef USE_STDIO
1278
    magicinfo.use_parallel = 0;
1279
#else
1280
112
    magicinfo.use_parallel = use_parallel;
1281
112
#endif
1282
1283
112
    if((status = openmagic(&magicinfo))) goto done;
1284
1285
    /* Verify we have a large enough file */
1286
112
    if(MAGIC_NUMBER_LEN >= (unsigned long long)magicinfo.filelen)
1287
0
  {status = NC_ENOTNC; goto done;}
1288
112
    if((status = readmagic(&magicinfo,0L,magic)) != NC_NOERR) {
1289
0
  status = NC_ENOTNC;
1290
0
  goto done;
1291
0
    }
1292
1293
    /* Look at the magic number */
1294
112
    if(NC_interpret_magic_number(magic,model) == NC_NOERR
1295
97
  && model->format != 0) {
1296
97
        if (use_parallel && (model->format == NC_FORMAT_NC3 || model->impl == NC_FORMATX_NC3))
1297
            /* this is called from nc_open_par() and file is classic */
1298
0
            model->impl = NC_FORMATX_PNETCDF;
1299
97
        goto done; /* found something */
1300
97
    }
1301
1302
    /* Remaining case when implementation is an HDF5 file;
1303
       search forward at starting at 512
1304
       and doubling to see if we have HDF5 magic number */
1305
15
    {
1306
15
  size_t pos = 512L;
1307
166
        for(;;) {
1308
166
      if((pos+MAGIC_NUMBER_LEN) > (unsigned long long)magicinfo.filelen)
1309
15
    {status = NC_ENOTNC; goto done;}
1310
151
            if((status = readmagic(&magicinfo,pos,magic)) != NC_NOERR)
1311
0
          {status = NC_ENOTNC; goto done; }
1312
151
            NC_interpret_magic_number(magic,model);
1313
151
            if(model->impl == NC_FORMATX_NC4) break;
1314
      /* double and try again */
1315
151
      pos = 2*pos;
1316
151
        }
1317
15
    }
1318
112
done:
1319
112
    closemagic(&magicinfo);
1320
112
    return check(status);
1321
15
}
1322
1323
/**
1324
\internal
1325
\ingroup datasets
1326
Provide open, read and close for use when searching for magic numbers
1327
*/
1328
static int
1329
openmagic(struct MagicFile* file)
1330
112
{
1331
112
    int status = NC_NOERR;
1332
112
    if(fIsSet(file->omode,NC_INMEMORY)) {
1333
  /* Get its length */
1334
112
  NC_memio* meminfo = (NC_memio*)file->parameters;
1335
112
        assert(meminfo != NULL);
1336
112
  file->filelen = (long long)meminfo->size;
1337
112
  goto done;
1338
112
    }
1339
0
    if(file->uri != NULL) {
1340
#ifdef NETCDF_ENABLE_BYTERANGE
1341
  /* Construct a URL minus any fragment */
1342
        file->curlurl = ncuribuild(file->uri,NULL,NULL,NCURISVC);
1343
  /* Open the curl handle */
1344
        if((status=nc_http_open(file->path, &file->state))) goto done;
1345
  if((status=nc_http_size(file->state,&file->filelen))) goto done;
1346
#else /*!BYTERANGE*/
1347
0
  {status = NC_ENOTBUILT;}
1348
0
#endif /*BYTERANGE*/
1349
0
  goto done;
1350
0
    } 
1351
#ifdef USE_PARALLEL
1352
    if (file->use_parallel) {
1353
  int retval;
1354
  MPI_Offset size;
1355
        assert(file->parameters != NULL);
1356
  if((retval = MPI_File_open(((NC_MPI_INFO*)file->parameters)->comm,
1357
                                   (char*)file->path,MPI_MODE_RDONLY,
1358
                                   ((NC_MPI_INFO*)file->parameters)->info,
1359
                                   &file->fh)) != MPI_SUCCESS) {
1360
#ifdef MPI_ERR_NO_SUCH_FILE
1361
      int errorclass;
1362
      MPI_Error_class(retval, &errorclass);
1363
      if (errorclass == MPI_ERR_NO_SUCH_FILE)
1364
#ifdef NC_ENOENT
1365
          status = NC_ENOENT;
1366
#else /*!NC_ENOENT*/
1367
    status = errno;
1368
#endif /*NC_ENOENT*/
1369
      else
1370
#endif /*MPI_ERR_NO_SUCH_FILE*/
1371
          status = NC_EPARINIT;
1372
      file->fh = MPI_FILE_NULL;
1373
      goto done;
1374
  }
1375
  /* Get its length */
1376
  if((retval=MPI_File_get_size(file->fh, &size)) != MPI_SUCCESS)
1377
      {status = NC_EPARINIT; goto done;}
1378
  file->filelen = (long long)size;
1379
  goto done;
1380
    }
1381
#endif /* USE_PARALLEL */
1382
0
    {
1383
0
        if (file->path == NULL || strlen(file->path) == 0)
1384
0
            {status = NC_EINVAL; goto done;}
1385
0
        file->fp = NCfopen(file->path, "r");
1386
0
        if(file->fp == NULL)
1387
0
      {status = errno; goto done;}
1388
  /* Get its length */
1389
0
  {
1390
0
      int fd = fileno(file->fp);
1391
#ifdef _WIN32
1392
      __int64 len64 = _filelengthi64(fd);
1393
      if(len64 < 0)
1394
    {status = errno; goto done;}
1395
      file->filelen = (long long)len64;
1396
#else
1397
0
      off_t size;
1398
0
      size = lseek(fd, 0, SEEK_END);
1399
0
      if(size == -1)
1400
0
    {status = errno; goto done;}
1401
0
    file->filelen = (long long)size;
1402
0
#endif
1403
0
  }
1404
0
        int retval2 = fseek(file->fp, 0L, SEEK_SET);        
1405
0
      if(retval2 != 0)
1406
0
    {status = errno; goto done;}
1407
0
    }
1408
112
done:
1409
112
    return check(status);
1410
0
}
1411
1412
static int
1413
readmagic(struct MagicFile* file, size_t pos, char* magic)
1414
263
{
1415
263
    int status = NC_NOERR;
1416
263
    NCbytes* buf = ncbytesnew();
1417
1418
263
    memset(magic,0,MAGIC_NUMBER_LEN);
1419
263
    if(fIsSet(file->omode,NC_INMEMORY)) {
1420
263
  char* mempos;
1421
263
  NC_memio* meminfo = (NC_memio*)file->parameters;
1422
263
  if((pos + MAGIC_NUMBER_LEN) > meminfo->size)
1423
0
      {status = NC_EINMEMORY; goto done;}
1424
263
  mempos = ((char*)meminfo->memory) + pos;
1425
263
  memcpy((void*)magic,mempos,MAGIC_NUMBER_LEN);
1426
#ifdef DEBUG
1427
  printmagic("XXX: readmagic",magic,file);
1428
#endif
1429
263
    } else if(file->uri != NULL) {
1430
#ifdef NETCDF_ENABLE_BYTERANGE
1431
        size64_t start = (size64_t)pos;
1432
        size64_t count = MAGIC_NUMBER_LEN;
1433
        status = nc_http_read(file->state, start, count, buf);
1434
        if (status == NC_NOERR) {
1435
            if (ncbyteslength(buf) != count)
1436
                status = NC_EINVAL;
1437
            else
1438
                memcpy(magic, ncbytescontents(buf), count);
1439
        }
1440
#endif
1441
0
    } else {
1442
#ifdef USE_PARALLEL
1443
        if (file->use_parallel) {
1444
      MPI_Status mstatus;
1445
      int retval;
1446
      if((retval = MPI_File_read_at_all(file->fh, pos, magic,
1447
          MAGIC_NUMBER_LEN, MPI_CHAR, &mstatus)) != MPI_SUCCESS)
1448
          {status = NC_EPARINIT; goto done;}
1449
        }
1450
        else
1451
#endif /* USE_PARALLEL */
1452
0
        { /* Ordinary read */
1453
0
            long i;
1454
0
            i = fseek(file->fp, (long)pos, SEEK_SET);
1455
0
            if (i < 0) { status = errno; goto done; }
1456
0
            ncbytessetlength(buf, 0);
1457
0
            if ((status = NC_readfileF(file->fp, buf, MAGIC_NUMBER_LEN))) goto done;
1458
0
            memcpy(magic, ncbytescontents(buf), MAGIC_NUMBER_LEN);
1459
0
        }
1460
0
    }
1461
1462
263
done:
1463
263
    ncbytesfree(buf);
1464
263
    if(file && file->fp) clearerr(file->fp);
1465
263
    return check(status);
1466
263
}
1467
1468
/**
1469
 * Close the file opened to check for magic number.
1470
 *
1471
 * @param file pointer to the MagicFile struct for this open file.
1472
 * @returns NC_NOERR for success
1473
 * @returns NC_EPARINIT if there was a problem closing file with MPI
1474
 * (parallel builds only).
1475
 * @author Dennis Heimbigner
1476
 */
1477
static int
1478
closemagic(struct MagicFile* file)
1479
112
{
1480
112
    int status = NC_NOERR;
1481
1482
112
    if(fIsSet(file->omode,NC_INMEMORY)) {
1483
  /* noop */
1484
112
    } else if(file->uri != NULL) {
1485
#ifdef NETCDF_ENABLE_BYTERANGE
1486
      status = nc_http_close(file->state);
1487
#endif
1488
0
      nullfree(file->curlurl);
1489
0
    } else {
1490
#ifdef USE_PARALLEL
1491
        if (file->use_parallel) {
1492
      int retval;
1493
      if(file->fh != MPI_FILE_NULL
1494
         && (retval = MPI_File_close(&file->fh)) != MPI_SUCCESS)
1495
        {status = NC_EPARINIT; return status;}
1496
        } else
1497
#endif
1498
0
        {
1499
0
      if(file->fp) fclose(file->fp);
1500
0
        }
1501
0
    }
1502
112
    return status;
1503
112
}
1504
1505
/*!
1506
  Interpret the magic number found in the header of a netCDF file.
1507
  This function interprets the magic number/string contained in the header of a netCDF file and sets the appropriate NC_FORMATX flags.
1508
1509
  @param[in] magic Pointer to a character array with the magic number block.
1510
  @param[out] model Pointer to an integer to hold the corresponding netCDF type.
1511
  @param[out] version Pointer to an integer to hold the corresponding netCDF version.
1512
  @returns NC_NOERR if a legitimate file type found
1513
  @returns NC_ENOTNC otherwise
1514
1515
\internal
1516
\ingroup datasets
1517
1518
*/
1519
static int
1520
NC_interpret_magic_number(char* magic, NCmodel* model)
1521
263
{
1522
263
    int status = NC_NOERR;
1523
263
    int tmpimpl = 0;
1524
    /* Look at the magic number - save any UDF format on entry */
1525
263
    if(model->impl >= NC_FORMATX_UDF0 && model->impl <= NC_FORMATX_UDF1)
1526
0
        tmpimpl = model->impl;
1527
263
    else if(model->impl >= NC_FORMATX_UDF2 && model->impl <= NC_FORMATX_UDF9)
1528
0
        tmpimpl = model->impl;
1529
1530
    /* Use the complete magic number string for HDF5 */
1531
263
    if(memcmp(magic,HDF5_SIGNATURE,sizeof(HDF5_SIGNATURE))==0) {
1532
0
  model->impl = NC_FORMATX_NC4;
1533
0
  model->format = NC_FORMAT_NETCDF4;
1534
0
  goto done;
1535
0
    }
1536
263
    if(magic[0] == '\016' && magic[1] == '\003'
1537
0
              && magic[2] == '\023' && magic[3] == '\001') {
1538
0
  model->impl = NC_FORMATX_NC_HDF4;
1539
0
  model->format = NC_FORMAT_NETCDF4;
1540
0
  goto done;
1541
0
    }
1542
263
    if(magic[0] == 'C' && magic[1] == 'D' && magic[2] == 'F') {
1543
113
        if(magic[3] == '\001') {
1544
38
      model->impl = NC_FORMATX_NC3;
1545
38
      model->format = NC_FORMAT_CLASSIC;
1546
38
      goto done;
1547
38
  }
1548
75
        if(magic[3] == '\002') {
1549
45
      model->impl = NC_FORMATX_NC3;
1550
45
      model->format = NC_FORMAT_64BIT_OFFSET;
1551
45
      goto done;
1552
45
        }
1553
30
        if(magic[3] == '\005') {
1554
30
    model->impl = NC_FORMATX_NC3;
1555
30
    model->format = NC_FORMAT_64BIT_DATA;
1556
30
    goto done;
1557
30
  }
1558
30
     }
1559
     /* No match  */
1560
150
     if (!tmpimpl) 
1561
150
         status = NC_ENOTNC;         
1562
1563
150
     goto done;
1564
1565
263
done:
1566
     /* if model->impl was any UDF format (0-9) on entry, make it so on exit */
1567
263
     if(tmpimpl)
1568
0
         model->impl = tmpimpl;
1569
     /* if this is a UDF magic_number update the model->impl */
1570
2.89k
     for(int i = 0; i < NC_MAX_UDF_FORMATS; i++) {
1571
2.63k
         if (strlen(UDF_magic_numbers[i]) && !strncmp(UDF_magic_numbers[i], magic,
1572
0
                                                       strlen(UDF_magic_numbers[i])))
1573
0
         {
1574
0
             int formatx = (i <= 1) ? (NC_FORMATX_UDF0 + i) : (NC_FORMATX_UDF2 + i - 2);
1575
0
             model->impl = formatx;
1576
0
             status = NC_NOERR;
1577
0
             break;
1578
0
         }
1579
2.63k
     }    
1580
1581
263
     return check(status);
1582
263
}
1583
1584
/* Define macros to wrap getxattr and listxattrcalls */
1585
#ifdef __APPLE__
1586
#define GETXATTR(path,p,xvalue,xlen) getxattr(path, p, xvalue, (size_t)xlen, 0, 0);
1587
#define LISTXATTR(path,xlist,xlen) listxattr(path, xlist, (size_t)xlen, 0)
1588
#else
1589
#define GETXATTR(path,p,xvalue,xlen) getxattr(path, p, xvalue, (size_t)xlen);
1590
#define LISTXATTR(path,xlist,xlen) listxattr(path, xlist, (size_t)xlen)
1591
#endif
1592
1593
/* Return NC_NOERR if path is a DAOS container; return NC_EXXX otherwise */
1594
static int
1595
isdaoscontainer(const char* path)
1596
112
{
1597
112
    int stat = NC_ENOTNC; /* default is that this is not a DAOS container */
1598
112
#ifndef _WIN32
1599
#ifdef USE_HDF5
1600
#if H5_VERSION_GE(1,12,0)
1601
    htri_t accessible;
1602
    hid_t fapl_id;
1603
    int rc;
1604
    /* Check for a DAOS container */
1605
    if((fapl_id = H5Pcreate(H5P_FILE_ACCESS)) < 0) {stat = NC_EHDFERR; goto done;}
1606
    H5Pset_fapl_sec2(fapl_id);
1607
    accessible = H5Fis_accessible(path, fapl_id);
1608
    H5Pclose(fapl_id); /* Ignore any error */
1609
    rc = 0;
1610
    if(accessible > 0) {
1611
#ifdef HAVE_SYS_XATTR_H
1612
  ssize_t xlen;
1613
  xlen = LISTXATTR(path,NULL,0);
1614
        if(xlen > 0) {
1615
        char* xlist = NULL;
1616
      char* xvalue = NULL;
1617
      char* p;
1618
      char* endp;
1619
      if((xlist = (char*)calloc(1,(size_t)xlen))==NULL)
1620
    {stat = NC_ENOMEM; goto done;}
1621
      (void)LISTXATTR(path,xlist,xlen);
1622
      p = xlist; endp = p + xlen; /* delimit names */
1623
      /* walk the list of xattr names */
1624
      for(;p < endp;p += (strlen(p)+1)) {
1625
    /* The popen version looks for the string ".daos";
1626
                   It would be nice if we know whether that occurred
1627
       int the xattr's name or it value.
1628
       Oh well, we will do the general search */
1629
    /* Look for '.daos' in the key */
1630
    if(strstr(p,".daos") != NULL) {rc = 1; break;} /* success */
1631
    /* Else get the p'th xattr's value size */
1632
    xlen = GETXATTR(path,p,NULL,0);
1633
    if((xvalue = (char*)calloc(1,(size_t)xlen))==NULL)
1634
        {stat = NC_ENOMEM; goto done;}
1635
    /* Read the value */
1636
    (void)GETXATTR(path,p,xvalue,xlen);
1637
    /* Look for '.daos' in the value */
1638
    if(strstr(xvalue,".daos") != NULL) {rc = 1; break;} /* success */
1639
      }
1640
        }
1641
#else /*!HAVE_SYS_XATTR_H*/
1642
1643
#ifdef HAVE_GETFATTR
1644
  {
1645
      FILE *fp;
1646
      char cmd[4096];
1647
      memset(cmd,0,sizeof(cmd));
1648
      snprintf(cmd,sizeof(cmd),"getfattr \"%s\" | grep -c '.daos'",path);
1649
      fp = popen(cmd, "r");
1650
      if(fp != NULL) {
1651
        fscanf(fp, "%d", &rc);
1652
        pclose(fp);
1653
      } else {
1654
        rc = 0; /* Cannot test; assume not DAOS */
1655
      }
1656
  }
1657
    }
1658
#else /*!HAVE_GETFATTR*/
1659
    /* We just can't test for DAOS container.*/
1660
    rc = 0;
1661
#endif /*HAVE_GETFATTR*/
1662
#endif /*HAVE_SYS_XATTR_H*/
1663
    }
1664
    /* Test for DAOS container */
1665
    stat = (rc == 1 ? NC_NOERR : NC_ENOTNC);
1666
done:
1667
#endif
1668
#endif
1669
112
#endif
1670
    errno = 0; /* reset */
1671
112
    return stat;
1672
112
}
1673
1674
#ifdef DEBUG
1675
static void
1676
printmagic(const char* tag, char* magic, struct MagicFile* f)
1677
{
1678
    int i;
1679
    fprintf(stderr,"%s: ispar=%d magic=",tag,f->use_parallel);
1680
    for(i=0;i<MAGIC_NUMBER_LEN;i++) {
1681
        unsigned int c = (unsigned int)magic[i];
1682
  c = c & 0x000000FF;
1683
  if(c == '\n')
1684
      fprintf(stderr," 0x%0x/'\\n'",c);
1685
  else if(c == '\r')
1686
      fprintf(stderr," 0x%0x/'\\r'",c);
1687
  else if(c < ' ')
1688
      fprintf(stderr," 0x%0x/'?'",c);
1689
  else
1690
      fprintf(stderr," 0x%0x/'%c'",c,c);
1691
    }
1692
    fprintf(stderr,"\n");
1693
    fflush(stderr);
1694
}
1695
1696
static void
1697
printlist(NClist* list, const char* tag)
1698
{
1699
    int i;
1700
    fprintf(stderr,"%s:",tag);
1701
    for(i=0;i<nclistlength(list);i++) {
1702
        fprintf(stderr," %s",(char*)nclistget(list,i));
1703
  fprintf(stderr,"[%p]",(char*)nclistget(list,i));
1704
    }
1705
    fprintf(stderr,"\n");
1706
    dbgflush();
1707
}
1708
1709
1710
#endif