Coverage Report

Created: 2026-02-24 07:08

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/netcdf-c/libdispatch/dinfermodel.c
Line
Count
Source
1
/**
2
 * @file
3
 *
4
 * Infer as much as possible from the omode + path.
5
 * Rewrite the path to a canonical form.
6
 *
7
 * Copyright 2018 University Corporation for Atmospheric
8
 * Research/Unidata. See COPYRIGHT file for more info.
9
*/
10
#include "config.h"
11
#include <stddef.h>
12
#include <stdlib.h>
13
#include <string.h>
14
#ifdef HAVE_UNISTD_H
15
#include <unistd.h>
16
#endif
17
#ifdef HAVE_SYS_TYPES_H
18
#include <sys/types.h>
19
#endif
20
#ifndef _WIN32
21
#ifdef USE_HDF5
22
#include <hdf5.h>
23
#endif /* USE_HDF5 */
24
#endif /* _WIN32 */
25
#ifdef HAVE_SYS_XATTR_H
26
#include <sys/xattr.h>
27
#endif
28
29
#include "ncdispatch.h"
30
#include "ncpathmgr.h"
31
#include "netcdf_mem.h"
32
#include "fbits.h"
33
#include "ncbytes.h"
34
#include "nclist.h"
35
#include "nclog.h"
36
#include "nchttp.h"
37
#include "ncutil.h"
38
#ifdef NETCDF_ENABLE_S3
39
#include "ncs3sdk.h"
40
#endif
41
42
#ifndef nulldup
43
 #define nulldup(x) ((x)?strdup(x):(x))
44
#endif
45
46
#undef DEBUG
47
48
/* If Defined, then use only stdio for all magic number io;
49
   otherwise use stdio or mpio as required.
50
 */
51
#undef USE_STDIO
52
53
/**
54
Sort info for open/read/close of
55
file when searching for magic numbers
56
*/
57
struct MagicFile {
58
    const char* path;
59
    struct NCURI* uri;
60
    int omode;
61
    NCmodel* model;
62
    long long filelen;
63
    int use_parallel;
64
    int iss3;
65
    void* parameters; /* !NULL if inmemory && !diskless */
66
    FILE* fp;
67
#ifdef USE_PARALLEL
68
    MPI_File fh;
69
#endif
70
    char* curlurl; /* url to use with CURLOPT_SET_URL */
71
    NC_HTTP_STATE* state;
72
#ifdef NETCDF_ENABLE_S3
73
    NCS3INFO s3;
74
    void* s3client;
75
    char* errmsg;
76
#endif
77
};
78
79
/** @internal Magic number for HDF5 files. To be consistent with
80
 * H5Fis_hdf5, use the complete HDF5 magic number */
81
static char HDF5_SIGNATURE[MAGIC_NUMBER_LEN] = "\211HDF\r\n\032\n";
82
83
328
#define modelcomplete(model) ((model)->impl != 0)
84
85
#ifdef DEBUG
86
static void dbgflush(void)
87
{
88
    fflush(stdout);
89
    fflush(stderr);
90
}
91
92
static void
93
fail(int err)
94
{
95
    return;
96
}
97
98
static int
99
check(int err)
100
{
101
    if(err != NC_NOERR)
102
  fail(err);
103
    return err;
104
}
105
#else
106
1.65k
#define check(err) (err)
107
#endif
108
109
/*
110
Define a table of "mode=" string values
111
from which the implementation can be inferred.
112
Note that only cases that can currently
113
take URLs are included.
114
*/
115
static struct FORMATMODES {
116
    const char* tag;
117
    const int impl; /* NC_FORMATX_XXX value */
118
    const int format; /* NC_FORMAT_XXX value */
119
} formatmodes[] = {
120
{"dap2",NC_FORMATX_DAP2,NC_FORMAT_CLASSIC},
121
{"dap4",NC_FORMATX_DAP4,NC_FORMAT_NETCDF4},
122
{"netcdf-3",NC_FORMATX_NC3,0}, /* Might be e.g. cdf5 */
123
{"classic",NC_FORMATX_NC3,0}, /* ditto */
124
{"netcdf-4",NC_FORMATX_NC4,NC_FORMAT_NETCDF4},
125
{"enhanced",NC_FORMATX_NC4,NC_FORMAT_NETCDF4},
126
{"udf0",NC_FORMATX_UDF0,0},
127
{"udf1",NC_FORMATX_UDF1,0},
128
{"udf2",NC_FORMATX_UDF2,0},
129
{"udf3",NC_FORMATX_UDF3,0},
130
{"udf4",NC_FORMATX_UDF4,0},
131
{"udf5",NC_FORMATX_UDF5,0},
132
{"udf6",NC_FORMATX_UDF6,0},
133
{"udf7",NC_FORMATX_UDF7,0},
134
{"udf8",NC_FORMATX_UDF8,0},
135
{"udf9",NC_FORMATX_UDF9,0},
136
{"nczarr",NC_FORMATX_NCZARR,NC_FORMAT_NETCDF4},
137
{"zarr",NC_FORMATX_NCZARR,NC_FORMAT_NETCDF4},
138
{"bytes",NC_FORMATX_NC4,NC_FORMAT_NETCDF4}, /* temporary until 3 vs 4 is determined */
139
{NULL,0},
140
};
141
142
/* Replace top-level name with defkey=defvalue */
143
static const struct MACRODEF {
144
    char* name;
145
    char* defkey;
146
    char* defvalues[4];
147
} macrodefs[] = {
148
{"zarr","mode",{"nczarr","zarr",NULL}},
149
{"dap2","mode",{"dap2",NULL}},
150
{"dap4","mode",{"dap4",NULL}},
151
{"s3","mode",{"s3","nczarr",NULL}},
152
{"bytes","mode",{"bytes",NULL}},
153
{"xarray","mode",{"zarr", NULL}},
154
{"noxarray","mode",{"nczarr", "noxarray", NULL}},
155
{"zarr","mode",{"nczarr","zarr", NULL}},
156
{"gs3","mode",{"gs3","nczarr",NULL}}, /* Google S3 API */
157
{NULL,NULL,{NULL}}
158
};
159
160
/*
161
Mode inferences: if mode contains key value, then add the inferred value;
162
Warning: be careful how this list is constructed to avoid infinite inferences.
163
In order to (mostly) avoid that consequence, any attempt to
164
infer a value that is already present will be ignored.
165
This effectively means that the inference graph
166
must be a DAG and may not have cycles.
167
You have been warned.
168
*/
169
static const struct MODEINFER {
170
    char* key;
171
    char* inference;
172
} modeinferences[] = {
173
{"zarr","nczarr"},
174
{"xarray","zarr"},
175
{"noxarray","nczarr"},
176
{"noxarray","zarr"},
177
{NULL,NULL}
178
};
179
180
/* Mode negations: if mode contains key, then remove all occurrences of the inference and repeat */
181
static const struct MODEINFER modenegations[] = {
182
{"bytes","nczarr"}, /* bytes negates (nc)zarr */
183
{"bytes","zarr"},
184
{"noxarray","xarray"},
185
{NULL,NULL}
186
};
187
188
/* Map FORMATX to readability to get magic number */
189
static struct Readable {
190
    int impl;
191
    int readable;
192
} readable[] = {
193
{NC_FORMATX_NC3,1},
194
{NC_FORMATX_NC_HDF5,1},
195
{NC_FORMATX_NC_HDF4,1},
196
{NC_FORMATX_PNETCDF,1},
197
{NC_FORMATX_DAP2,0},
198
{NC_FORMATX_DAP4,0},
199
{NC_FORMATX_UDF0,1},
200
{NC_FORMATX_UDF1,1},
201
{NC_FORMATX_UDF2,1},
202
{NC_FORMATX_UDF3,1},
203
{NC_FORMATX_UDF4,1},
204
{NC_FORMATX_UDF5,1},
205
{NC_FORMATX_UDF6,1},
206
{NC_FORMATX_UDF7,1},
207
{NC_FORMATX_UDF8,1},
208
{NC_FORMATX_UDF9,1},
209
{NC_FORMATX_NCZARR,0}, /* eventually make readable */
210
{0,0},
211
};
212
213
/* Define the known URL protocols and their interpretation */
214
static struct NCPROTOCOLLIST {
215
    const char* protocol;
216
    const char* substitute;
217
    const char* fragments; /* arbitrary fragment arguments */
218
} ncprotolist[] = {
219
    {"http",NULL,NULL},
220
    {"https",NULL,NULL},
221
    {"file",NULL,NULL},
222
    {"dods","http","mode=dap2"},
223
    {"dap4","http","mode=dap4"},
224
    {"s3","s3","mode=s3"},
225
    {"gs3","gs3","mode=gs3"},
226
    {NULL,NULL,NULL} /* Terminate search */
227
};
228
229
/* Forward */
230
static int NC_omodeinfer(int useparallel, int omode, NCmodel*);
231
static int check_file_type(const char *path, int omode, int use_parallel, void *parameters, NCmodel* model, NCURI* uri);
232
static int processuri(const char* path, NCURI** urip, NClist* fraglist);
233
static int processmacros(NClist* fraglistp, NClist* expanded);
234
static char* envvlist2string(NClist* pairs, const char*);
235
static void set_default_mode(int* cmodep);
236
static int parseonchar(const char* s, int ch, NClist* segments);
237
static int mergelist(NClist** valuesp);
238
239
static int openmagic(struct MagicFile* file);
240
static int readmagic(struct MagicFile* file, size_t pos, char* magic);
241
static int closemagic(struct MagicFile* file);
242
static int NC_interpret_magic_number(char* magic, NCmodel* model);
243
#ifdef DEBUG
244
static void printmagic(const char* tag, char* magic,struct MagicFile*);
245
static void printlist(NClist* list, const char* tag);
246
#endif
247
static int isreadable(NCURI*,NCmodel*);
248
static char* list2string(NClist*);
249
static int parsepair(const char* pair, char** keyp, char** valuep);
250
static NClist* parsemode(const char* modeval);
251
static const char* getmodekey(const NClist* envv);
252
static int replacemode(NClist* envv, const char* newval);
253
static void infernext(NClist* current, NClist* next);
254
static int negateone(const char* mode, NClist* modes);
255
static void cleanstringlist(NClist* strs, int caseinsensitive);
256
static int isdaoscontainer(const char* path);
257
258
/*
259
If the path looks like a URL, then parse it, reformat it.
260
*/
261
static int
262
processuri(const char* path, NCURI** urip, NClist* fraglenv)
263
176
{
264
176
    int stat = NC_NOERR;
265
176
    int found = 0;
266
176
    NClist* tmp = NULL;
267
176
    struct NCPROTOCOLLIST* protolist;
268
176
    NCURI* uri = NULL;
269
176
    size_t pathlen = strlen(path);
270
176
    char* str = NULL;
271
176
    const NClist* ufrags;
272
176
    size_t i;
273
274
176
    if(path == NULL || pathlen == 0) {stat = NC_EURL; goto done;}
275
276
    /* Defaults */
277
176
    if(urip) *urip = NULL;
278
279
176
    ncuriparse(path,&uri);
280
176
    if(uri == NULL) goto done; /* not url */
281
282
    /* Look up the protocol */
283
0
    for(found=0,protolist=ncprotolist;protolist->protocol;protolist++) {
284
0
        if(strcmp(uri->protocol,protolist->protocol) == 0) {
285
0
      found = 1;
286
0
      break;
287
0
  }
288
0
    }
289
0
    if(!found)
290
0
  {stat = NC_EINVAL; goto done;} /* unrecognized URL form */
291
292
    /* process the corresponding fragments for that protocol */
293
0
    if(protolist->fragments != NULL) {
294
0
  tmp = nclistnew();
295
0
  if((stat = parseonchar(protolist->fragments,'&',tmp))) goto done;
296
0
  for(i=0;i<nclistlength(tmp);i++) {
297
0
      char* key=NULL;
298
0
          char* value=NULL;
299
0
      if((stat = parsepair(nclistget(tmp,i),&key,&value))) goto done;
300
0
      if(value == NULL) value = strdup("");
301
0
      nclistpush(fraglenv,key);
302
0
          nclistpush(fraglenv,value);
303
0
  }
304
0
  nclistfreeall(tmp); tmp = NULL;
305
0
    }
306
307
    /* Substitute the protocol in any case */
308
0
    if(protolist->substitute) ncurisetprotocol(uri,protolist->substitute);
309
310
    /* capture the fragments of the url */
311
0
    ufrags = (const NClist*)ncurifragmentparams(uri);
312
0
    for(i=0;i<nclistlength(ufrags);i+=2) {
313
0
  const char* key = nclistget(ufrags,i);
314
0
  const char* value = nclistget(ufrags,i+1);
315
0
        nclistpush(fraglenv,nulldup(key));
316
0
  value = (value==NULL?"":value);
317
0
  nclistpush(fraglenv,strdup(value));
318
0
    }
319
0
    if(urip) {
320
0
  *urip = uri;
321
0
  uri = NULL;
322
0
    }
323
324
176
done:
325
176
    nclistfreeall(tmp);
326
176
    nullfree(str);
327
176
    if(uri != NULL) ncurifree(uri);
328
176
    return check(stat);
329
0
}
330
331
/* Split a key=value pair */
332
static int
333
parsepair(const char* pair, char** keyp, char** valuep)
334
0
{
335
0
    const char* p;
336
0
    char* key = NULL;
337
0
    char* value = NULL;
338
339
0
    if(pair == NULL)
340
0
        return NC_EINVAL; /* empty pair */
341
0
    if(pair[0] == '\0' || pair[0] == '=')
342
0
        return NC_EINVAL; /* no key */
343
0
    p = strchr(pair,'=');
344
0
    if(p == NULL) {
345
0
  value = NULL;
346
0
  key = strdup(pair);
347
0
    } else {
348
0
  ptrdiff_t len = (p-pair);
349
0
  if((key = malloc((size_t)len+1))==NULL) return NC_ENOMEM;
350
0
  memcpy(key,pair,(size_t)len);
351
0
  key[len] = '\0';
352
0
  if(p[1] == '\0')
353
0
      value = NULL;
354
0
  else
355
0
      value = strdup(p+1);
356
0
    }
357
0
    if(keyp) {*keyp = key; key = NULL;};
358
0
    if(valuep) {*valuep = value; value = NULL;};
359
0
    nullfree(key);
360
0
    nullfree(value);
361
0
    return NC_NOERR;
362
0
}
363
364
#if 0
365
static int
366
parseurlmode(const char* modestr, NClist* list)
367
{
368
    int stat = NC_NOERR;
369
    const char* p = NULL;
370
    const char* endp = NULL;
371
372
    if(modestr == NULL || *modestr == '\0') goto done;
373
374
    /* Split modestr at the commas or EOL */
375
    p = modestr;
376
    for(;;) {
377
  char* s;
378
  ptrdiff_t slen;
379
  endp = strchr(p,',');
380
  if(endp == NULL) endp = p + strlen(p);
381
  slen = (endp - p);
382
  if((s = malloc(slen+1)) == NULL) {stat = NC_ENOMEM; goto done;}
383
  memcpy(s,p,slen);
384
  s[slen] = '\0';
385
  nclistpush(list,s);
386
  if(*endp == '\0') break;
387
  p = endp+1;
388
    }
389
390
done:
391
    return check(stat);
392
}
393
#endif
394
395
/* Split a string at a given char */
396
static int
397
parseonchar(const char* s, int ch, NClist* segments)
398
0
{
399
0
    int stat = NC_NOERR;
400
0
    const char* p = NULL;
401
0
    const char* endp = NULL;
402
403
0
    if(s == NULL || *s == '\0') goto done;
404
405
0
    p = s;
406
0
    for(;;) {
407
0
  char* q;
408
0
  ptrdiff_t slen;
409
0
  endp = strchr(p,ch);
410
0
  if(endp == NULL) endp = p + strlen(p);
411
0
  slen = (endp - p);
412
0
  if((q = malloc((size_t)slen+1)) == NULL) {stat = NC_ENOMEM; goto done;}
413
0
  memcpy(q,p,(size_t)slen);
414
0
  q[slen] = '\0';
415
0
  nclistpush(segments,q);
416
0
  if(*endp == '\0') break;
417
0
  p = endp+1;
418
0
    }
419
420
0
done:
421
0
    return check(stat);
422
0
}
423
424
/* Convert a key,value envv pairlist into a delimited string*/
425
static char*
426
envvlist2string(NClist* envv, const char* delim)
427
0
{
428
0
    size_t i;
429
0
    NCbytes* buf = NULL;
430
0
    char* result = NULL;
431
432
0
    if(envv == NULL || nclistlength(envv) == 0) return NULL;
433
0
    buf = ncbytesnew();
434
0
    for(i=0;i<nclistlength(envv);i+=2) {
435
0
  const char* key = nclistget(envv,i);
436
0
  const char* val = nclistget(envv,i+1);
437
0
  if(key == NULL || strlen(key) == 0) continue;
438
0
  assert(val != NULL);
439
0
  if(i > 0) ncbytescat(buf,"&");
440
0
  ncbytescat(buf,key);
441
0
  if(val != NULL && val[0] != '\0') {
442
0
      ncbytescat(buf,"=");
443
0
      ncbytescat(buf,val);
444
0
  }
445
0
    }
446
0
    result = ncbytesextract(buf);
447
0
    ncbytesfree(buf);
448
0
    return result;
449
0
}
450
451
/* Given a mode= argument, fill in the impl */
452
static int
453
processmodearg(const char* arg, NCmodel* model)
454
0
{
455
0
    int stat = NC_NOERR;
456
0
    struct FORMATMODES* format = formatmodes;
457
0
    for(;format->tag;format++) {
458
0
  if(strcmp(format->tag,arg)==0) {
459
0
            model->impl = format->impl;
460
0
      if(format->format != 0) model->format = format->format;
461
0
  }
462
0
    }
463
0
    return check(stat);
464
0
}
465
466
/* Given an envv fragment list, do macro replacement */
467
static int
468
processmacros(NClist* fraglenv, NClist* expanded)
469
0
{
470
0
    size_t i;
471
0
    int stat = NC_NOERR;
472
0
    const struct MACRODEF* macros = NULL;
473
474
0
    for(i=0;i<nclistlength(fraglenv);i+=2) {
475
0
  int found = 0;
476
0
  char* key = nclistget(fraglenv,i);
477
0
  char* value = nclistget(fraglenv,i+1);
478
0
  if(strlen(value) == 0) { /* must be a singleton  */
479
0
            for(macros=macrodefs;macros->name;macros++) {
480
0
                if(strcmp(macros->name,key)==0) {
481
0
        char* const * p;
482
0
        nclistpush(expanded,strdup(macros->defkey));
483
0
        for(p=macros->defvalues;*p;p++) 
484
0
      nclistpush(expanded,strdup(*p));
485
0
        found = 1;        
486
0
        break;
487
0
          }
488
0
      }
489
0
  }
490
0
  if(!found) {/* pass thru */
491
0
      nclistpush(expanded,strdup(key));
492
0
          nclistpush(expanded,strdup(value));
493
0
  }
494
0
    }
495
496
0
    return check(stat);
497
0
}
498
499
/* Process mode flag inferences */
500
static int
501
processinferences(NClist* fraglenv)
502
0
{
503
0
    int stat = NC_NOERR;
504
0
    const char* modeval = NULL;
505
0
    NClist* newmodes = nclistnew();
506
0
    NClist* currentmodes = NULL;
507
0
    NClist* nextmodes = nclistnew();
508
0
    size_t i;
509
0
    char* newmodeval = NULL;
510
511
    /* Get "mode" entry */
512
0
    if((modeval = getmodekey(fraglenv))==NULL) goto done;
513
514
    /* Get the mode as list */
515
0
    currentmodes = parsemode(modeval);
516
517
#ifdef DEBUG
518
    printlist(currentmodes,"processinferences: initial mode list");
519
#endif
520
521
    /* Do what amounts to breadth first inferencing down the inference DAG. */
522
523
0
    for(;;) {
524
0
        NClist* tmp = NULL;
525
        /* Compute the next set of inferred modes */
526
#ifdef DEBUG
527
printlist(currentmodes,"processinferences: current mode list");
528
#endif
529
0
        infernext(currentmodes,nextmodes);
530
#ifdef DEBUG
531
printlist(nextmodes,"processinferences: next mode list");
532
#endif
533
        /* move current modes into list of newmodes */
534
0
        for(i=0;i<nclistlength(currentmodes);i++) {
535
0
      nclistpush(newmodes,nclistget(currentmodes,i));
536
0
  }
537
0
        nclistsetlength(currentmodes,0); /* clear current mode list */
538
0
        if(nclistlength(nextmodes) == 0) break; /* nothing more to do */
539
#ifdef DEBUG
540
printlist(newmodes,"processinferences: new mode list");
541
#endif
542
  /* Swap current and next */
543
0
        tmp = currentmodes;
544
0
  currentmodes = nextmodes;
545
0
  nextmodes = tmp;
546
0
        tmp = NULL;
547
0
    }
548
    /* cleanup any unused elements in currentmodes */
549
0
    nclistclearall(currentmodes);
550
551
    /* Ensure no duplicates */
552
0
    cleanstringlist(newmodes,1);
553
554
#ifdef DEBUG
555
    printlist(newmodes,"processinferences: final inferred mode list");
556
#endif
557
558
   /* Remove negative inferences */
559
0
   for(i=0;i<nclistlength(newmodes);i++) {
560
0
  const char* mode = nclistget(newmodes,i);
561
0
  negateone(mode,newmodes);
562
0
    }
563
564
    /* Store new mode value */
565
0
    if((newmodeval = list2string(newmodes))== NULL)
566
0
  {stat = NC_ENOMEM; goto done;}        
567
0
    if((stat=replacemode(fraglenv,newmodeval))) goto done;
568
0
    modeval = NULL;
569
570
0
done:
571
0
    nullfree(newmodeval);
572
0
    nclistfreeall(newmodes);
573
0
    nclistfreeall(currentmodes);
574
0
    nclistfreeall(nextmodes);
575
0
    return check(stat);
576
0
}
577
578
579
static int
580
negateone(const char* mode, NClist* newmodes)
581
0
{
582
0
    const struct MODEINFER* tests = modenegations;
583
0
    int changed = 0;
584
0
    for(;tests->key;tests++) {
585
0
  if(strcasecmp(tests->key,mode)==0) {
586
      /* Find and remove all instances of the inference value */
587
0
      for(size_t i = nclistlength(newmodes); i-- > 0;) {
588
0
    char* candidate = nclistget(newmodes,i);
589
0
    if(strcasecmp(candidate,tests->inference)==0) {
590
0
        nclistremove(newmodes,i);
591
0
        nullfree(candidate);
592
0
              changed = 1;
593
0
    }
594
0
      }
595
0
        }
596
0
    }
597
0
    return changed;
598
0
}
599
600
static void
601
infernext(NClist* current, NClist* next)
602
0
{
603
0
    size_t i;
604
0
    for(i=0;i<nclistlength(current);i++) {
605
0
        const struct MODEINFER* tests = NULL;
606
0
  const char* cur = nclistget(current,i);
607
0
        for(tests=modeinferences;tests->key;tests++) {
608
0
      if(strcasecmp(tests->key,cur)==0) {
609
          /* Append the inferred mode unless dup */
610
0
    if(!nclistmatch(next,tests->inference,1))
611
0
              nclistpush(next,strdup(tests->inference));
612
0
      }
613
0
        }
614
0
    }
615
0
}
616
617
/*
618
Given a list of strings, remove nulls and duplicates
619
*/
620
static int
621
mergelist(NClist** valuesp)
622
0
{
623
0
    size_t i,j;
624
0
    int stat = NC_NOERR;
625
0
    NClist* values = *valuesp;
626
0
    NClist* allvalues = nclistnew();
627
0
    NClist* newvalues = nclistnew();
628
0
    char* value = NULL;
629
630
0
    for(i=0;i<nclistlength(values);i++) {
631
0
  char* val1 = nclistget(values,i);
632
  /* split on commas and put pieces into allvalues */
633
0
  if((stat=parseonchar(val1,',',allvalues))) goto done;
634
0
    }
635
    /* Remove duplicates and "" */
636
0
    while(nclistlength(allvalues) > 0) {
637
0
  value = nclistremove(allvalues,0);
638
0
  if(strlen(value) == 0) {
639
0
      nullfree(value); value = NULL;
640
0
  } else {
641
0
      for(j=0;j<nclistlength(newvalues);j++) {
642
0
          char* candidate = nclistget(newvalues,j);
643
0
          if(strcasecmp(candidate,value)==0)
644
0
              {nullfree(value); value = NULL; break;}
645
0
       }
646
0
  }
647
0
  if(value != NULL) {nclistpush(newvalues,value); value = NULL;}
648
0
    }
649
    /* Make sure to have at least 1 value */
650
0
    if(nclistlength(newvalues)==0) nclistpush(newvalues,strdup(""));
651
0
    *valuesp = values; values = NULL;
652
653
0
done:
654
0
    nclistfree(allvalues);
655
0
    nclistfreeall(values);
656
0
    nclistfreeall(newvalues);
657
0
    return check(stat);
658
0
}
659
660
static int
661
lcontains(NClist* l, const char* key0)
662
0
{
663
0
    size_t i;
664
0
    for(i=0;i<nclistlength(l);i++) {
665
0
        const char* key1 = nclistget(l,i);
666
0
  if(strcasecmp(key0,key1)==0) return 1;
667
0
    }
668
0
    return 0;
669
0
}
670
671
/* Warning values should not use nclistfreeall */
672
static void
673
collectvaluesbykey(NClist* fraglenv, const char* key, NClist* values)
674
0
{
675
0
    size_t i;
676
    /* collect all the values with the same key (including this one) */
677
0
    for(i=0;i<nclistlength(fraglenv);i+=2) {
678
0
        const char* key2 = nclistget(fraglenv,i);
679
0
        if(strcasecmp(key,key2)==0) {
680
0
      const char* value2 = nclistget(fraglenv,i+1);
681
0
      nclistpush(values,value2); value2 = NULL;
682
0
  }
683
0
    }
684
0
}
685
686
/* Warning allkeys should not use nclistfreeall */
687
static void
688
collectallkeys(NClist* fraglenv, NClist* allkeys)
689
0
{
690
0
    size_t i;
691
    /* collect all the distinct keys */
692
0
    for(i=0;i<nclistlength(fraglenv);i+=2) {
693
0
  char* key = nclistget(fraglenv,i);
694
0
  if(!lcontains(allkeys,key)) {
695
0
      nclistpush(allkeys,key);
696
0
  }
697
0
    }
698
0
}
699
700
/* Given a fragment envv list, coalesce duplicate keys and remove duplicate values*/
701
static int
702
cleanfragments(NClist* fraglenv, NClist* newlist)
703
0
{
704
0
    size_t i;
705
0
    int stat = NC_NOERR;
706
0
    NClist* tmp = NULL;
707
0
    NClist* allkeys = NULL;
708
0
    NCbytes* buf = NULL;
709
0
    char* key = NULL;
710
0
    char* value = NULL;
711
712
0
    buf = ncbytesnew();
713
0
    allkeys = nclistnew();
714
0
    tmp = nclistnew();
715
716
    /* collect all unique keys */
717
0
    collectallkeys(fraglenv,allkeys);
718
    /* Collect all values for same key across all fragment pairs */
719
0
    for(i=0;i<nclistlength(allkeys);i++) {
720
0
  key = nclistget(allkeys,i);
721
0
  collectvaluesbykey(fraglenv,key,tmp);
722
  /* merge the key values, remove duplicate */
723
0
  if((stat=mergelist(&tmp))) goto done;
724
        /* Construct key,value pair and insert into newlist */
725
0
  key = strdup(key);
726
0
  nclistpush(newlist,key);
727
0
  value = list2string(tmp);
728
0
  nclistpush(newlist,value);
729
0
  nclistclear(tmp);
730
0
    }
731
0
done:
732
0
    nclistfree(allkeys);
733
0
    nclistfree(tmp);
734
0
    ncbytesfree(buf);
735
0
    return check(stat);
736
0
}
737
738
/* process non-mode fragment keys in case they hold significance; currently not */
739
static int
740
processfragmentkeys(const char* key, const char* value, NCmodel* model)
741
0
{
742
0
    return NC_NOERR;
743
0
}
744
745
/*
746
Infer from the mode + useparallel
747
only call if iscreate or file is not easily readable.
748
*/
749
static int
750
NC_omodeinfer(int useparallel, int cmode, NCmodel* model)
751
176
{
752
176
    int stat = NC_NOERR;
753
754
    /* If no format flags are set, then use default */
755
176
    if(!fIsSet(cmode,NC_FORMAT_ALL))
756
176
  set_default_mode(&cmode);
757
758
    /* Process the cmode; may override some already set flags. The
759
     * user-defined formats must be checked first. They may choose to
760
     * use some of the other flags, like NC_NETCDF4, so we must first
761
     * check NC_UDF0-NC_UDF9 before checking for any other flag. */
762
176
    int udf_found = 0;
763
    /* Lookup table for all UDF mode flags. This replaces the previous bit-shift
764
     * calculation which was fragile due to non-sequential bit positions
765
     * (bits 16, 19-25 to avoid conflicts with NC_NOATTCREORD and NC_NODIMSCALE_ATTACH). */
766
176
    static const int udf_flags[NC_MAX_UDF_FORMATS] = {
767
176
        NC_UDF0, NC_UDF1, NC_UDF2, NC_UDF3, NC_UDF4,
768
176
        NC_UDF5, NC_UDF6, NC_UDF7, NC_UDF8, NC_UDF9
769
176
    };
770
    /* Check if any UDF format flag is set in the mode */
771
1.93k
    for(int i = 0; i < NC_MAX_UDF_FORMATS; i++) {
772
1.76k
        if(fIsSet(cmode, udf_flags[i])) {
773
            /* Convert array index to format constant (handles gap in numbering) */
774
0
            int formatx = (i <= 1) ? (NC_FORMATX_UDF0 + i) : (NC_FORMATX_UDF2 + i - 2);
775
0
            model->impl = formatx;
776
0
            udf_found = 1;
777
0
            break;
778
0
        }
779
1.76k
    }
780
    
781
176
    if(udf_found)
782
0
    {
783
0
        if(fIsSet(cmode,NC_64BIT_OFFSET)) 
784
0
        {
785
0
            model->format = NC_FORMAT_64BIT_OFFSET;
786
0
        }
787
0
        else if(fIsSet(cmode,NC_64BIT_DATA))
788
0
        {
789
0
            model->format = NC_FORMAT_64BIT_DATA;
790
0
        }
791
0
        else if(fIsSet(cmode,NC_NETCDF4))
792
0
        {
793
0
            if(fIsSet(cmode,NC_CLASSIC_MODEL))
794
0
                model->format = NC_FORMAT_NETCDF4_CLASSIC;
795
0
            else
796
0
                model->format = NC_FORMAT_NETCDF4;
797
0
        }
798
0
        if(! model->format)
799
0
            model->format = NC_FORMAT_CLASSIC;
800
0
  goto done;
801
0
    }
802
803
176
    if(fIsSet(cmode,NC_64BIT_OFFSET)) {
804
0
  model->impl = NC_FORMATX_NC3;
805
0
  model->format = NC_FORMAT_64BIT_OFFSET;
806
0
        goto done;
807
0
    }
808
809
176
    if(fIsSet(cmode,NC_64BIT_DATA)) {
810
0
  model->impl = NC_FORMATX_NC3;
811
0
  model->format = NC_FORMAT_64BIT_DATA;
812
0
        goto done;
813
0
    }
814
815
176
    if(fIsSet(cmode,NC_NETCDF4)) {
816
0
  model->impl = NC_FORMATX_NC4;
817
0
        if(fIsSet(cmode,NC_CLASSIC_MODEL))
818
0
      model->format = NC_FORMAT_NETCDF4_CLASSIC;
819
0
  else
820
0
      model->format = NC_FORMAT_NETCDF4;
821
0
        goto done;
822
0
    }
823
824
    /* Default to classic model */
825
176
    model->format = NC_FORMAT_CLASSIC;
826
176
    model->impl = NC_FORMATX_NC3;
827
828
176
done:
829
    /* Apply parallel flag */
830
176
    if(useparallel) {
831
0
        if(model->impl == NC_FORMATX_NC3)
832
0
      model->impl = NC_FORMATX_PNETCDF;
833
0
    }
834
176
    return check(stat);
835
176
}
836
837
/*
838
If the mode flags do not necessarily specify the
839
format, then default it by adding in appropriate flags.
840
*/
841
842
static void
843
set_default_mode(int* modep)
844
176
{
845
176
    int mode = *modep;
846
176
    int dfaltformat;
847
848
176
    dfaltformat = nc_get_default_format();
849
176
    switch (dfaltformat) {
850
0
    case NC_FORMAT_64BIT_OFFSET: mode |= NC_64BIT_OFFSET; break;
851
0
    case NC_FORMAT_64BIT_DATA: mode |= NC_64BIT_DATA; break;
852
0
    case NC_FORMAT_NETCDF4: mode |= NC_NETCDF4; break;
853
0
    case NC_FORMAT_NETCDF4_CLASSIC: mode |= (NC_NETCDF4|NC_CLASSIC_MODEL); break;
854
176
    case NC_FORMAT_CLASSIC: /* fall thru */
855
176
    default: break; /* default to classic */
856
176
    }
857
176
    *modep = mode; /* final result */
858
176
}
859
860
/**************************************************/
861
/*
862
   Infer model for this dataset using some
863
   combination of cmode, path, and reading the dataset.
864
   See the documentation in docs/internal.dox.
865
866
@param path
867
@param omode
868
@param iscreate
869
@param useparallel
870
@param params
871
@param model
872
@param newpathp
873
*/
874
875
int
876
NC_infermodel(const char* path, int* omodep, int iscreate, int useparallel, void* params, NCmodel* model, char** newpathp)
877
176
{
878
176
    size_t i;
879
176
    int stat = NC_NOERR;
880
176
    NCURI* uri = NULL;
881
176
    int omode = *omodep;
882
176
    NClist* fraglenv = nclistnew();
883
176
    NClist* modeargs = nclistnew();
884
176
    char* sfrag = NULL;
885
176
    const char* modeval = NULL;
886
176
    char* abspath = NULL;
887
176
    NClist* tmp = NULL;
888
889
    /* Phase 1:
890
       1. convert special protocols to http|https
891
       2. begin collecting fragments
892
    */
893
176
    if((stat = processuri(path, &uri, fraglenv))) goto done;
894
895
176
    if(uri != NULL) {
896
#ifdef DEBUG
897
  printlist(fraglenv,"processuri");
898
#endif
899
900
        /* Phase 2: Expand macros and add to fraglenv */
901
0
  nclistfreeall(tmp);
902
0
  tmp = nclistnew();
903
0
        if((stat = processmacros(fraglenv,tmp))) goto done;
904
0
  nclistfreeall(fraglenv);
905
0
  fraglenv = tmp; tmp = NULL;
906
#ifdef DEBUG
907
  printlist(fraglenv,"processmacros");
908
#endif
909
  /* Cleanup the fragment list */
910
0
  nclistfreeall(tmp);
911
0
  tmp = nclistnew();
912
0
        if((stat = cleanfragments(fraglenv,tmp))) goto done;
913
0
  nclistfreeall(fraglenv);
914
0
  fraglenv = tmp; tmp = NULL;
915
916
        /* Phase 2a: Expand mode inferences and add to fraglenv */
917
0
        if((stat = processinferences(fraglenv))) goto done;
918
#ifdef DEBUG
919
  printlist(fraglenv,"processinferences");
920
#endif
921
922
        /* Phase 3: coalesce duplicate fragment keys and remove duplicate values */
923
0
  nclistfreeall(tmp);
924
0
  tmp = nclistnew();
925
0
        if((stat = cleanfragments(fraglenv,tmp))) goto done;
926
0
  nclistfreeall(fraglenv);
927
0
  fraglenv = tmp; tmp = NULL;
928
#ifdef DEBUG
929
  printlist(fraglenv,"cleanfragments");
930
#endif
931
932
        /* Phase 4: Rebuild the url fragment and rebuilt the url */
933
0
        sfrag = envvlist2string(fraglenv,"&");
934
0
        nclistfreeall(fraglenv); fraglenv = NULL;
935
#ifdef DEBUG
936
  fprintf(stderr,"frag final: %s\n",sfrag);
937
#endif
938
0
        ncurisetfragments(uri,sfrag);
939
0
        nullfree(sfrag); sfrag = NULL;
940
941
#ifdef NETCDF_ENABLE_S3
942
  /* If s3, then rebuild the url */
943
  if(NC_iss3(uri,NULL)) {
944
      NCURI* newuri = NULL;
945
      if((stat = NC_s3urlrebuild(uri,NULL,&newuri))) goto done;
946
      ncurifree(uri);
947
      uri = newuri;
948
  } else
949
#endif
950
0
  if(strcmp(uri->protocol,"file")==0) {
951
            /* convert path to absolute */
952
0
      char* canon = NULL;
953
0
      abspath = NCpathabsolute(uri->path);
954
0
      if((stat = NCpathcanonical(abspath,&canon))) goto done;
955
0
      nullfree(abspath);
956
0
      abspath = canon; canon = NULL;
957
0
      if((stat = ncurisetpath(uri,abspath))) goto done;
958
0
  }
959
  
960
  /* rebuild the path */
961
0
        if(newpathp) {
962
0
            *newpathp = ncuribuild(uri,NULL,NULL,NCURIALL);
963
#ifdef DEBUG
964
      fprintf(stderr,"newpath=|%s|\n",*newpathp); fflush(stderr);
965
#endif    
966
0
  }
967
968
        /* Phase 5: Process the mode key to see if we can tell the formatx */
969
0
        modeval = ncurifragmentlookup(uri,"mode");
970
0
        if(modeval != NULL) {
971
0
      if((stat = parseonchar(modeval,',',modeargs))) goto done;
972
0
            for(i=0;i<nclistlength(modeargs);i++) {
973
0
          const char* arg = nclistget(modeargs,i);
974
0
          if((stat=processmodearg(arg,model))) goto done;
975
0
            }
976
0
  }
977
978
        /* Phase 6: Process the non-mode keys to see if we can tell the formatx */
979
0
  if(!modelcomplete(model)) {
980
0
      size_t i;
981
0
      NClist* p = (NClist*)ncurifragmentparams(uri); /* envv format */
982
0
      for(i=0;i<nclistlength(p);i+=2) {
983
0
    const char* key = nclistget(p,0);
984
0
    const char* value = nclistget(p,1);
985
0
    if((stat=processfragmentkeys(key,value,model))) goto done;
986
0
      }
987
0
  }
988
989
        /* Phase 7: Special cases: if this is a URL and model.impl is still not defined */
990
        /* Phase7a: Default is DAP2 */
991
0
        if(!modelcomplete(model)) {
992
0
      model->impl = NC_FORMATX_DAP2;
993
0
      model->format = NC_FORMAT_NC3;
994
0
        }
995
996
176
    } else {/* Not URL */
997
176
  if(newpathp) *newpathp = NULL;
998
176
    }
999
1000
    /* Phase 8: mode inference from mode flags */
1001
    /* The modeargs did not give us a model (probably not a URL).
1002
       So look at the combination of mode flags and the useparallel flag */
1003
176
    if(!modelcomplete(model)) {
1004
176
        if((stat = NC_omodeinfer(useparallel,omode,model))) goto done;
1005
176
    }
1006
1007
    /* Phase 9: Special case for file stored in DAOS container */
1008
176
    if(isdaoscontainer(path) == NC_NOERR) {
1009
        /* This is a DAOS container, so immediately assume it is HDF5. */
1010
0
        model->impl = NC_FORMATX_NC_HDF5;
1011
0
        model->format = NC_FORMAT_NETCDF4;
1012
176
    } else {
1013
        /* Phase 10: Infer from file content, if possible;
1014
           this has highest precedence, so it may override
1015
           previous decisions. Note that we do this last
1016
           because we need previously determined model info
1017
           to guess if this file is readable.
1018
        */
1019
176
        if(!iscreate && isreadable(uri,model)) {
1020
       /* Ok, we need to try to read the file */
1021
176
            if((stat = check_file_type(path, omode, useparallel, params, model, uri))) goto done;
1022
176
        }
1023
176
    }
1024
1025
    /* Need a decision */
1026
152
    if(!modelcomplete(model))
1027
0
  {stat = NC_ENOTNC; goto done;}
1028
1029
    /* Force flag consistency */
1030
152
    switch (model->impl) {
1031
0
    case NC_FORMATX_NC4:
1032
0
    case NC_FORMATX_NC_HDF4:
1033
0
    case NC_FORMATX_DAP4:
1034
0
    case NC_FORMATX_NCZARR:
1035
0
  omode |= NC_NETCDF4;
1036
0
  if(model->format == NC_FORMAT_NETCDF4_CLASSIC)
1037
0
      omode |= NC_CLASSIC_MODEL;
1038
0
  break;
1039
152
    case NC_FORMATX_NC3:
1040
152
  omode &= ~NC_NETCDF4; /* must be netcdf-3 (CDF-1, CDF-2, CDF-5) */
1041
152
  if(model->format == NC_FORMAT_64BIT_OFFSET) omode |= NC_64BIT_OFFSET;
1042
92
  else if(model->format == NC_FORMAT_64BIT_DATA) omode |= NC_64BIT_DATA;
1043
152
  break;
1044
0
    case NC_FORMATX_PNETCDF:
1045
0
  omode &= ~NC_NETCDF4; /* must be netcdf-3 (CDF-1, CDF-2, CDF-5) */
1046
0
  if(model->format == NC_FORMAT_64BIT_OFFSET) omode |= NC_64BIT_OFFSET;
1047
0
  else if(model->format == NC_FORMAT_64BIT_DATA) omode |= NC_64BIT_DATA;
1048
0
  break;
1049
0
    case NC_FORMATX_DAP2:
1050
0
  omode &= ~(NC_NETCDF4|NC_64BIT_OFFSET|NC_64BIT_DATA|NC_CLASSIC_MODEL);
1051
0
  break;
1052
0
    case NC_FORMATX_UDF0:
1053
0
    case NC_FORMATX_UDF1:
1054
0
    case NC_FORMATX_UDF2:
1055
0
    case NC_FORMATX_UDF3:
1056
0
    case NC_FORMATX_UDF4:
1057
0
    case NC_FORMATX_UDF5:
1058
0
    case NC_FORMATX_UDF6:
1059
0
    case NC_FORMATX_UDF7:
1060
0
    case NC_FORMATX_UDF8:
1061
0
    case NC_FORMATX_UDF9:
1062
0
        if(model->format == NC_FORMAT_64BIT_OFFSET) 
1063
0
            omode |= NC_64BIT_OFFSET;
1064
0
        else if(model->format == NC_FORMAT_64BIT_DATA)
1065
0
            omode |= NC_64BIT_DATA;
1066
0
        else if(model->format == NC_FORMAT_NETCDF4)  
1067
0
            omode |= NC_NETCDF4;
1068
0
        else if(model->format == NC_FORMAT_NETCDF4_CLASSIC)  
1069
0
            omode |= NC_NETCDF4|NC_CLASSIC_MODEL;
1070
0
        break;
1071
0
    default:
1072
0
  {stat = NC_ENOTNC; goto done;}
1073
152
    }
1074
1075
176
done:
1076
176
    nullfree(sfrag);
1077
176
    nullfree(abspath);
1078
176
    ncurifree(uri);
1079
176
    nclistfreeall(modeargs);
1080
176
    nclistfreeall(fraglenv);
1081
176
    nclistfreeall(tmp);
1082
176
    *omodep = omode; /* in/out */
1083
176
    return check(stat);
1084
152
}
1085
1086
static int
1087
isreadable(NCURI* uri, NCmodel* model)
1088
176
{
1089
176
    int canread = 0;
1090
176
    struct Readable* r;
1091
    /* Step 1: Look up the implementation */
1092
176
    for(r=readable;r->impl;r++) {
1093
176
  if(model->impl == r->impl) {canread = r->readable; break;}
1094
176
    }
1095
    /* Step 2: check for bytes mode */
1096
176
    if(!canread && NC_testmode(uri,"bytes") && (model->impl == NC_FORMATX_NC4 || model->impl == NC_FORMATX_NC_HDF5))
1097
0
        canread = 1;
1098
176
    return canread;
1099
176
}
1100
1101
#if 0
1102
static char*
1103
emptyify(char* s)
1104
{
1105
    if(s == NULL) s = strdup("");
1106
    return strdup(s);
1107
}
1108
1109
static const char*
1110
nullify(const char* s)
1111
{
1112
    if(s != NULL && strlen(s) == 0)
1113
        return NULL;
1114
    return s;
1115
}
1116
#endif
1117
1118
/**************************************************/
1119
/* Envv list utilities */
1120
1121
static const char*
1122
getmodekey(const NClist* envv)
1123
0
{
1124
0
    size_t i;
1125
    /* Get "mode" entry */
1126
0
    for(i=0;i<nclistlength(envv);i+=2) {
1127
0
  char* key = NULL;
1128
0
  key = nclistget(envv,i);
1129
0
  if(strcasecmp(key,"mode")==0)
1130
0
      return nclistget(envv,i+1);
1131
0
    }
1132
0
    return NULL;
1133
0
}
1134
1135
static int
1136
replacemode(NClist* envv, const char* newval)
1137
0
{
1138
0
    size_t i;
1139
    /* Get "mode" entry */
1140
0
    for(i=0;i<nclistlength(envv);i+=2) {
1141
0
  char* key = NULL;
1142
0
  char* val = NULL;
1143
0
  key = nclistget(envv,i);
1144
0
  if(strcasecmp(key,"mode")==0) {
1145
0
      val = nclistget(envv,i+1);      
1146
0
      nclistset(envv,i+1,strdup(newval));
1147
0
      nullfree(val);
1148
0
      return NC_NOERR;
1149
0
  }
1150
0
    }
1151
0
    return NC_EINVAL;
1152
0
}
1153
1154
static NClist*
1155
parsemode(const char* modeval)
1156
0
{
1157
0
    NClist* modes = nclistnew();
1158
0
    if(modeval)
1159
0
        (void)parseonchar(modeval,',',modes);/* split on commas */
1160
0
    return modes;    
1161
0
}
1162
1163
/* Convert a list into a comma'd string */
1164
static char*
1165
list2string(NClist* list)
1166
0
{
1167
0
    size_t i;
1168
0
    NCbytes* buf = NULL;
1169
0
    char* result = NULL;
1170
1171
0
    if(list == NULL || nclistlength(list)==0) return strdup("");
1172
0
    buf = ncbytesnew();
1173
0
    for(i=0;i<nclistlength(list);i++) {
1174
0
  const char* m = nclistget(list,i);
1175
0
  if(m == NULL || strlen(m) == 0) continue;
1176
0
  if(i > 0) ncbytescat(buf,",");
1177
0
  ncbytescat(buf,m);
1178
0
    }
1179
0
    result = ncbytesextract(buf);
1180
0
    ncbytesfree(buf);
1181
0
    if(result == NULL) result = strdup("");
1182
0
    return result;
1183
0
}
1184
1185
#if 0
1186
/* Given a comma separated string, remove duplicates; mostly used to cleanup mode list */
1187
static char* 
1188
cleancommalist(const char* commalist, int caseinsensitive)
1189
{
1190
    NClist* tmp = nclistnew();
1191
    char* newlist = NULL;
1192
    if(commalist == NULL || strlen(commalist)==0) return nulldup(commalist);
1193
    (void)parseonchar(commalist,',',tmp);/* split on commas */
1194
    cleanstringlist(tmp,caseinsensitive);
1195
    newlist = list2string(tmp);
1196
    nclistfreeall(tmp);
1197
    return newlist;
1198
}
1199
#endif
1200
1201
/* Given a list of strings, remove nulls and duplicated */
1202
static void
1203
cleanstringlist(NClist* strs, int caseinsensitive)
1204
0
{
1205
0
    if(nclistlength(strs) == 0) return;
1206
    /* Remove nulls */
1207
0
    for(size_t i = nclistlength(strs); i-->0;) {
1208
0
        if(nclistget(strs,i)==NULL) nclistremove(strs,i);
1209
0
    }
1210
0
    if(nclistlength(strs) <= 1) return;
1211
    /* Remove duplicates*/
1212
0
    for(size_t i=0;i<nclistlength(strs);i++) {
1213
0
        const char* value = nclistget(strs,i);
1214
        /* look ahead for duplicates */
1215
0
        for(size_t j=nclistlength(strs)-1;j>i;j--) {
1216
0
            int match;
1217
0
            const char* candidate = nclistget(strs,j);
1218
0
            if(caseinsensitive)
1219
0
                match = (strcasecmp(value,candidate) == 0);
1220
0
            else
1221
0
                match = (strcmp(value,candidate) == 0);
1222
0
            if(match) {char* dup = nclistremove(strs,j); nullfree(dup);}
1223
0
        }
1224
0
    }
1225
0
}
1226
1227
1228
/**************************************************/
1229
/**
1230
 * @internal Given an existing file, figure out its format and return
1231
 * that format value (NC_FORMATX_XXX) in model arg. Assume any path
1232
 * conversion was already performed at a higher level.
1233
 *
1234
 * @param path File name.
1235
 * @param flags
1236
 * @param use_parallel
1237
 * @param parameters
1238
 * @param model Pointer that gets the model to use for the dispatch table.
1239
 * @param version Pointer that gets version of the file.
1240
 *
1241
 * @return ::NC_NOERR No error.
1242
 * @author Dennis Heimbigner
1243
*/
1244
static int
1245
check_file_type(const char *path, int omode, int use_parallel,
1246
       void *parameters, NCmodel* model, NCURI* uri)
1247
176
{
1248
176
    char magic[NC_MAX_MAGIC_NUMBER_LEN];
1249
176
    int status = NC_NOERR;
1250
176
    struct MagicFile magicinfo;
1251
#ifdef _WIN32
1252
    NC* nc = NULL;
1253
#endif
1254
1255
176
    memset((void*)&magicinfo,0,sizeof(magicinfo));
1256
1257
#ifdef _WIN32 /* including MINGW */
1258
    /* Windows does not handle multiple handles to the same file very well.
1259
       So if file is already open/created, then find it and just get the
1260
       model from that. */
1261
    if((nc = find_in_NCList_by_name(path)) != NULL) {
1262
  int format = 0;
1263
  /* Get the model from this NC */
1264
  if((status = nc_inq_format_extended(nc->ext_ncid,&format,NULL))) goto done;
1265
  model->impl = format;
1266
  if((status = nc_inq_format(nc->ext_ncid,&format))) goto done;
1267
  model->format = format;
1268
  goto done;
1269
    }
1270
#endif
1271
1272
176
    magicinfo.path = path; /* do not free */
1273
176
    magicinfo.uri = uri; /* do not free */
1274
176
    magicinfo.omode = omode;
1275
176
    magicinfo.model = model; /* do not free */
1276
176
    magicinfo.parameters = parameters; /* do not free */
1277
#ifdef USE_STDIO
1278
    magicinfo.use_parallel = 0;
1279
#else
1280
176
    magicinfo.use_parallel = use_parallel;
1281
176
#endif
1282
1283
176
    if((status = openmagic(&magicinfo))) goto done;
1284
1285
    /* Verify we have a large enough file */
1286
176
    if(MAGIC_NUMBER_LEN >= (unsigned long long)magicinfo.filelen)
1287
0
  {status = NC_ENOTNC; goto done;}
1288
176
    if((status = readmagic(&magicinfo,0L,magic)) != NC_NOERR) {
1289
0
  status = NC_ENOTNC;
1290
0
  goto done;
1291
0
    }
1292
1293
    /* Look at the magic number */
1294
176
    if(NC_interpret_magic_number(magic,model) == NC_NOERR
1295
152
  && model->format != 0) {
1296
152
        if (use_parallel && (model->format == NC_FORMAT_NC3 || model->impl == NC_FORMATX_NC3))
1297
            /* this is called from nc_open_par() and file is classic */
1298
0
            model->impl = NC_FORMATX_PNETCDF;
1299
152
        goto done; /* found something */
1300
152
    }
1301
1302
    /* Remaining case when implementation is an HDF5 file;
1303
       search forward at starting at 512
1304
       and doubling to see if we have HDF5 magic number */
1305
24
    {
1306
24
  size_t pos = 512L;
1307
236
        for(;;) {
1308
236
      if((pos+MAGIC_NUMBER_LEN) > (unsigned long long)magicinfo.filelen)
1309
24
    {status = NC_ENOTNC; goto done;}
1310
212
            if((status = readmagic(&magicinfo,pos,magic)) != NC_NOERR)
1311
0
          {status = NC_ENOTNC; goto done; }
1312
212
            NC_interpret_magic_number(magic,model);
1313
212
            if(model->impl == NC_FORMATX_NC4) break;
1314
      /* double and try again */
1315
212
      pos = 2*pos;
1316
212
        }
1317
24
    }
1318
176
done:
1319
176
    closemagic(&magicinfo);
1320
176
    return check(status);
1321
24
}
1322
1323
/**
1324
\internal
1325
\ingroup datasets
1326
Provide open, read and close for use when searching for magic numbers
1327
*/
1328
static int
1329
openmagic(struct MagicFile* file)
1330
176
{
1331
176
    int status = NC_NOERR;
1332
176
    if(fIsSet(file->omode,NC_INMEMORY)) {
1333
  /* Get its length */
1334
176
  NC_memio* meminfo = (NC_memio*)file->parameters;
1335
176
        assert(meminfo != NULL);
1336
176
  file->filelen = (long long)meminfo->size;
1337
176
  goto done;
1338
176
    }
1339
0
    if(file->uri != NULL) {
1340
#ifdef NETCDF_ENABLE_BYTERANGE
1341
  /* Construct a URL minus any fragment */
1342
        file->curlurl = ncuribuild(file->uri,NULL,NULL,NCURISVC);
1343
  /* Open the curl handle */
1344
        if((status=nc_http_open(file->path, &file->state))) goto done;
1345
  if((status=nc_http_size(file->state,&file->filelen))) goto done;
1346
#else /*!BYTERANGE*/
1347
0
  {status = NC_ENOTBUILT;}
1348
0
#endif /*BYTERANGE*/
1349
0
  goto done;
1350
0
    } 
1351
#ifdef USE_PARALLEL
1352
    if (file->use_parallel) {
1353
  int retval;
1354
  MPI_Offset size;
1355
        assert(file->parameters != NULL);
1356
  if((retval = MPI_File_open(((NC_MPI_INFO*)file->parameters)->comm,
1357
                                   (char*)file->path,MPI_MODE_RDONLY,
1358
                                   ((NC_MPI_INFO*)file->parameters)->info,
1359
                                   &file->fh)) != MPI_SUCCESS) {
1360
#ifdef MPI_ERR_NO_SUCH_FILE
1361
      int errorclass;
1362
      MPI_Error_class(retval, &errorclass);
1363
      if (errorclass == MPI_ERR_NO_SUCH_FILE)
1364
#ifdef NC_ENOENT
1365
          status = NC_ENOENT;
1366
#else /*!NC_ENOENT*/
1367
    status = errno;
1368
#endif /*NC_ENOENT*/
1369
      else
1370
#endif /*MPI_ERR_NO_SUCH_FILE*/
1371
          status = NC_EPARINIT;
1372
      file->fh = MPI_FILE_NULL;
1373
      goto done;
1374
  }
1375
  /* Get its length */
1376
  if((retval=MPI_File_get_size(file->fh, &size)) != MPI_SUCCESS)
1377
      {status = NC_EPARINIT; goto done;}
1378
  file->filelen = (long long)size;
1379
  goto done;
1380
    }
1381
#endif /* USE_PARALLEL */
1382
0
    {
1383
0
        if (file->path == NULL || strlen(file->path) == 0)
1384
0
            {status = NC_EINVAL; goto done;}
1385
0
        file->fp = NCfopen(file->path, "r");
1386
0
        if(file->fp == NULL)
1387
0
      {status = errno; goto done;}
1388
  /* Get its length */
1389
0
  {
1390
0
      int fd = fileno(file->fp);
1391
#ifdef _WIN32
1392
      __int64 len64 = _filelengthi64(fd);
1393
      if(len64 < 0)
1394
    {status = errno; goto done;}
1395
      file->filelen = (long long)len64;
1396
#else
1397
0
      off_t size;
1398
0
      size = lseek(fd, 0, SEEK_END);
1399
0
      if(size == -1)
1400
0
    {status = errno; goto done;}
1401
0
    file->filelen = (long long)size;
1402
0
#endif
1403
0
  }
1404
0
        int retval2 = fseek(file->fp, 0L, SEEK_SET);        
1405
0
      if(retval2 != 0)
1406
0
    {status = errno; goto done;}
1407
0
    }
1408
176
done:
1409
176
    return check(status);
1410
0
}
1411
1412
static int
1413
readmagic(struct MagicFile* file, size_t pos, char* magic)
1414
388
{
1415
388
    int status = NC_NOERR;
1416
388
    NCbytes* buf = ncbytesnew();
1417
1418
388
    memset(magic,0,MAGIC_NUMBER_LEN);
1419
388
    if(fIsSet(file->omode,NC_INMEMORY)) {
1420
388
  char* mempos;
1421
388
  NC_memio* meminfo = (NC_memio*)file->parameters;
1422
388
  if((pos + MAGIC_NUMBER_LEN) > meminfo->size)
1423
0
      {status = NC_EINMEMORY; goto done;}
1424
388
  mempos = ((char*)meminfo->memory) + pos;
1425
388
  memcpy((void*)magic,mempos,MAGIC_NUMBER_LEN);
1426
#ifdef DEBUG
1427
  printmagic("XXX: readmagic",magic,file);
1428
#endif
1429
388
    } else if(file->uri != NULL) {
1430
#ifdef NETCDF_ENABLE_BYTERANGE
1431
        size64_t start = (size64_t)pos;
1432
        size64_t count = MAGIC_NUMBER_LEN;
1433
        status = nc_http_read(file->state, start, count, buf);
1434
        if (status == NC_NOERR) {
1435
            if (ncbyteslength(buf) != count)
1436
                status = NC_EINVAL;
1437
            else
1438
                memcpy(magic, ncbytescontents(buf), count);
1439
        }
1440
#endif
1441
0
    } else {
1442
#ifdef USE_PARALLEL
1443
        if (file->use_parallel) {
1444
      MPI_Status mstatus;
1445
      int retval;
1446
      if((retval = MPI_File_read_at_all(file->fh, pos, magic,
1447
          MAGIC_NUMBER_LEN, MPI_CHAR, &mstatus)) != MPI_SUCCESS)
1448
          {status = NC_EPARINIT; goto done;}
1449
        }
1450
        else
1451
#endif /* USE_PARALLEL */
1452
0
        { /* Ordinary read */
1453
0
            long i;
1454
0
            i = fseek(file->fp, (long)pos, SEEK_SET);
1455
0
            if (i < 0) { status = errno; goto done; }
1456
0
            ncbytessetlength(buf, 0);
1457
0
            if ((status = NC_readfileF(file->fp, buf, MAGIC_NUMBER_LEN))) goto done;
1458
0
            memcpy(magic, ncbytescontents(buf), MAGIC_NUMBER_LEN);
1459
0
        }
1460
0
    }
1461
1462
388
done:
1463
388
    ncbytesfree(buf);
1464
388
    if(file && file->fp) clearerr(file->fp);
1465
388
    return check(status);
1466
388
}
1467
1468
/**
1469
 * Close the file opened to check for magic number.
1470
 *
1471
 * @param file pointer to the MagicFile struct for this open file.
1472
 * @returns NC_NOERR for success
1473
 * @returns NC_EPARINIT if there was a problem closing file with MPI
1474
 * (parallel builds only).
1475
 * @author Dennis Heimbigner
1476
 */
1477
static int
1478
closemagic(struct MagicFile* file)
1479
176
{
1480
176
    int status = NC_NOERR;
1481
1482
176
    if(fIsSet(file->omode,NC_INMEMORY)) {
1483
  /* noop */
1484
176
    } else if(file->uri != NULL) {
1485
#ifdef NETCDF_ENABLE_BYTERANGE
1486
      status = nc_http_close(file->state);
1487
#endif
1488
0
      nullfree(file->curlurl);
1489
0
    } else {
1490
#ifdef USE_PARALLEL
1491
        if (file->use_parallel) {
1492
      int retval;
1493
      if(file->fh != MPI_FILE_NULL
1494
         && (retval = MPI_File_close(&file->fh)) != MPI_SUCCESS)
1495
        {status = NC_EPARINIT; return status;}
1496
        } else
1497
#endif
1498
0
        {
1499
0
      if(file->fp) fclose(file->fp);
1500
0
        }
1501
0
    }
1502
176
    return status;
1503
176
}
1504
1505
/*!
1506
  Interpret the magic number found in the header of a netCDF file.
1507
  This function interprets the magic number/string contained in the header of a netCDF file and sets the appropriate NC_FORMATX flags.
1508
1509
  @param[in] magic Pointer to a character array with the magic number block.
1510
  @param[out] model Pointer to an integer to hold the corresponding netCDF type.
1511
  @param[out] version Pointer to an integer to hold the corresponding netCDF version.
1512
  @returns NC_NOERR if a legitimate file type found
1513
  @returns NC_ENOTNC otherwise
1514
1515
\internal
1516
\ingroup datasets
1517
1518
*/
1519
static int
1520
NC_interpret_magic_number(char* magic, NCmodel* model)
1521
388
{
1522
388
    int status = NC_NOERR;
1523
388
    int tmpimpl = 0;
1524
    /* Look at the magic number - save any UDF format on entry */
1525
388
    if(model->impl >= NC_FORMATX_UDF0 && model->impl <= NC_FORMATX_UDF1)
1526
0
        tmpimpl = model->impl;
1527
388
    else if(model->impl >= NC_FORMATX_UDF2 && model->impl <= NC_FORMATX_UDF9)
1528
0
        tmpimpl = model->impl;
1529
1530
    /* Use the complete magic number string for HDF5 */
1531
388
    if(memcmp(magic,HDF5_SIGNATURE,sizeof(HDF5_SIGNATURE))==0) {
1532
0
  model->impl = NC_FORMATX_NC4;
1533
0
  model->format = NC_FORMAT_NETCDF4;
1534
0
  goto done;
1535
0
    }
1536
388
    if(magic[0] == '\016' && magic[1] == '\003'
1537
16
              && magic[2] == '\023' && magic[3] == '\001') {
1538
0
  model->impl = NC_FORMATX_NC_HDF4;
1539
0
  model->format = NC_FORMAT_NETCDF4;
1540
0
  goto done;
1541
0
    }
1542
388
    if(magic[0] == 'C' && magic[1] == 'D' && magic[2] == 'F') {
1543
187
        if(magic[3] == '\001') {
1544
59
      model->impl = NC_FORMATX_NC3;
1545
59
      model->format = NC_FORMAT_CLASSIC;
1546
59
      goto done;
1547
59
  }
1548
128
        if(magic[3] == '\002') {
1549
68
      model->impl = NC_FORMATX_NC3;
1550
68
      model->format = NC_FORMAT_64BIT_OFFSET;
1551
68
      goto done;
1552
68
        }
1553
60
        if(magic[3] == '\005') {
1554
47
    model->impl = NC_FORMATX_NC3;
1555
47
    model->format = NC_FORMAT_64BIT_DATA;
1556
47
    goto done;
1557
47
  }
1558
60
     }
1559
     /* No match  */
1560
214
     if (!tmpimpl) 
1561
214
         status = NC_ENOTNC;         
1562
1563
214
     goto done;
1564
1565
388
done:
1566
     /* if model->impl was any UDF format (0-9) on entry, make it so on exit */
1567
388
     if(tmpimpl)
1568
0
         model->impl = tmpimpl;
1569
     /* if this is a UDF magic_number update the model->impl */
1570
4.26k
     for(int i = 0; i < NC_MAX_UDF_FORMATS; i++) {
1571
3.88k
         if (strlen(UDF_magic_numbers[i]) && !strncmp(UDF_magic_numbers[i], magic,
1572
0
                                                       strlen(UDF_magic_numbers[i])))
1573
0
         {
1574
0
             int formatx = (i <= 1) ? (NC_FORMATX_UDF0 + i) : (NC_FORMATX_UDF2 + i - 2);
1575
0
             model->impl = formatx;
1576
0
             status = NC_NOERR;
1577
0
             break;
1578
0
         }
1579
3.88k
     }    
1580
1581
388
     return check(status);
1582
388
}
1583
1584
/* Define macros to wrap getxattr and listxattrcalls */
1585
#ifdef __APPLE__
1586
#define GETXATTR(path,p,xvalue,xlen) getxattr(path, p, xvalue, (size_t)xlen, 0, 0);
1587
#define LISTXATTR(path,xlist,xlen) listxattr(path, xlist, (size_t)xlen, 0)
1588
#else
1589
#define GETXATTR(path,p,xvalue,xlen) getxattr(path, p, xvalue, (size_t)xlen);
1590
#define LISTXATTR(path,xlist,xlen) listxattr(path, xlist, (size_t)xlen)
1591
#endif
1592
1593
/* Return NC_NOERR if path is a DAOS container; return NC_EXXX otherwise */
1594
static int
1595
isdaoscontainer(const char* path)
1596
176
{
1597
176
    int stat = NC_ENOTNC; /* default is that this is not a DAOS container */
1598
176
#ifndef _WIN32
1599
#ifdef USE_HDF5
1600
#if H5_VERSION_GE(1,12,0)
1601
    htri_t accessible;
1602
    hid_t fapl_id;
1603
    int rc;
1604
    /* Check for a DAOS container */
1605
    if((fapl_id = H5Pcreate(H5P_FILE_ACCESS)) < 0) {stat = NC_EHDFERR; goto done;}
1606
    H5Pset_fapl_sec2(fapl_id);
1607
    accessible = H5Fis_accessible(path, fapl_id);
1608
    H5Pclose(fapl_id); /* Ignore any error */
1609
    rc = 0;
1610
    if(accessible > 0) {
1611
#ifdef HAVE_SYS_XATTR_H
1612
  ssize_t xlen;
1613
  xlen = LISTXATTR(path,NULL,0);
1614
        if(xlen > 0) {
1615
        char* xlist = NULL;
1616
      char* xvalue = NULL;
1617
      char* p;
1618
      char* endp;
1619
      if((xlist = (char*)calloc(1,(size_t)xlen))==NULL)
1620
    {stat = NC_ENOMEM; goto done;}
1621
      (void)LISTXATTR(path,xlist,xlen);
1622
      p = xlist; endp = p + xlen; /* delimit names */
1623
      /* walk the list of xattr names */
1624
      for(;p < endp;p += (strlen(p)+1)) {
1625
    /* The popen version looks for the string ".daos";
1626
                   It would be nice if we know whether that occurred
1627
       int the xattr's name or it value.
1628
       Oh well, we will do the general search */
1629
    /* Look for '.daos' in the key */
1630
    if(strstr(p,".daos") != NULL) {rc = 1; break;} /* success */
1631
    /* Else get the p'th xattr's value size */
1632
    xlen = GETXATTR(path,p,NULL,0);
1633
    if((xvalue = (char*)calloc(1,(size_t)xlen))==NULL)
1634
        {stat = NC_ENOMEM; goto done;}
1635
    /* Read the value */
1636
    (void)GETXATTR(path,p,xvalue,xlen);
1637
    /* Look for '.daos' in the value */
1638
    if(strstr(xvalue,".daos") != NULL) {rc = 1; break;} /* success */
1639
      }
1640
        }
1641
#else /*!HAVE_SYS_XATTR_H*/
1642
1643
#ifdef HAVE_GETFATTR
1644
  {
1645
      FILE *fp;
1646
      char cmd[4096];
1647
      memset(cmd,0,sizeof(cmd));
1648
      snprintf(cmd,sizeof(cmd),"getfattr \"%s\" | grep -c '.daos'",path);
1649
      fp = popen(cmd, "r");
1650
      if(fp != NULL) {
1651
        fscanf(fp, "%d", &rc);
1652
        pclose(fp);
1653
      } else {
1654
        rc = 0; /* Cannot test; assume not DAOS */
1655
      }
1656
  }
1657
    }
1658
#else /*!HAVE_GETFATTR*/
1659
    /* We just can't test for DAOS container.*/
1660
    rc = 0;
1661
#endif /*HAVE_GETFATTR*/
1662
#endif /*HAVE_SYS_XATTR_H*/
1663
    }
1664
    /* Test for DAOS container */
1665
    stat = (rc == 1 ? NC_NOERR : NC_ENOTNC);
1666
done:
1667
#endif
1668
#endif
1669
176
#endif
1670
    errno = 0; /* reset */
1671
176
    return stat;
1672
176
}
1673
1674
#ifdef DEBUG
1675
static void
1676
printmagic(const char* tag, char* magic, struct MagicFile* f)
1677
{
1678
    int i;
1679
    fprintf(stderr,"%s: ispar=%d magic=",tag,f->use_parallel);
1680
    for(i=0;i<MAGIC_NUMBER_LEN;i++) {
1681
        unsigned int c = (unsigned int)magic[i];
1682
  c = c & 0x000000FF;
1683
  if(c == '\n')
1684
      fprintf(stderr," 0x%0x/'\\n'",c);
1685
  else if(c == '\r')
1686
      fprintf(stderr," 0x%0x/'\\r'",c);
1687
  else if(c < ' ')
1688
      fprintf(stderr," 0x%0x/'?'",c);
1689
  else
1690
      fprintf(stderr," 0x%0x/'%c'",c,c);
1691
    }
1692
    fprintf(stderr,"\n");
1693
    fflush(stderr);
1694
}
1695
1696
static void
1697
printlist(NClist* list, const char* tag)
1698
{
1699
    int i;
1700
    fprintf(stderr,"%s:",tag);
1701
    for(i=0;i<nclistlength(list);i++) {
1702
        fprintf(stderr," %s",(char*)nclistget(list,i));
1703
  fprintf(stderr,"[%p]",(char*)nclistget(list,i));
1704
    }
1705
    fprintf(stderr,"\n");
1706
    dbgflush();
1707
}
1708
1709
1710
#endif