Coverage Report

Created: 2023-05-28 06:42

/src/netcdf-c/libdispatch/ds3util.c
Line
Count
Source (jump to first uncovered line)
1
/*********************************************************************
2
 *   Copyright 2018, UCAR/Unidata
3
 *   See netcdf/COPYRIGHT file for copying and redistribution conditions.
4
 *********************************************************************/
5
6
#include "config.h"
7
#include <stdlib.h>
8
#include <string.h>
9
#include <stdio.h>
10
#include <assert.h>
11
#ifdef HAVE_UNISTD_H
12
#include <unistd.h>
13
#endif
14
#ifdef HAVE_SYS_STAT_H
15
#include <sys/stat.h>
16
#endif
17
#ifdef HAVE_FCNTL_H
18
#include <fcntl.h>
19
#endif
20
#ifdef _MSC_VER
21
#include <io.h>
22
#endif
23
24
#include "netcdf.h"
25
#include "ncuri.h"
26
#include "nclist.h"
27
#include "ncrc.h"
28
#include "ncs3sdk.h"
29
30
#undef AWSDEBUG
31
32
0
#define AWSHOST ".amazonaws.com"
33
34
enum URLFORMAT {UF_NONE=0, UF_VIRTUAL=1, UF_PATH=2, UF_S3=3, UF_OTHER=4};
35
36
/* Forward */
37
static int endswith(const char* s, const char* suffix);
38
39
/**************************************************/
40
/* Generic S3 Utilities */
41
42
/*
43
Rebuild an S3 url into a canonical path-style url.
44
If region is not in the host, then use specified region
45
if provided, otherwise us-east-1.
46
@param url (in) the current url
47
@param region (in) region to use if needed; NULL => us-east-1
48
    (out) region from url or the input region
49
@param bucketp  (in) bucket to use if needed
50
    (out) bucket from url
51
@param pathurlp (out) the resulting pathified url string
52
*/
53
54
int
55
NC_s3urlrebuild(NCURI* url, char** inoutbucketp, char** inoutregionp, NCURI** newurlp)
56
0
{
57
0
    int i,stat = NC_NOERR;
58
0
    NClist* hostsegments = NULL;
59
0
    NClist* pathsegments = NULL;
60
0
    NCbytes* buf = ncbytesnew();
61
0
    NCURI* newurl = NULL;
62
0
    char* bucket = NULL;
63
0
    char* host = NULL;
64
0
    char* path = NULL;
65
0
    char* region = NULL;
66
    
67
0
    if(url == NULL)
68
0
        {stat = NC_EURL; goto done;}
69
70
    /* Parse the hostname */
71
0
    hostsegments = nclistnew();
72
    /* split the hostname by "." */
73
0
    if((stat = NC_split_delim(url->host,'.',hostsegments))) goto done;
74
75
    /* Parse the path*/
76
0
    pathsegments = nclistnew();
77
    /* split the path by "/" */
78
0
    if((stat = NC_split_delim(url->path,'/',pathsegments))) goto done;
79
80
    /* Distinguish path-style from virtual-host style from s3: and from other.
81
       Virtual: https://<bucket-name>.s3.<region>.amazonaws.com/<path>        (1)
82
            or: https://<bucket-name>.s3.amazonaws.com/<path> -- region defaults to us-east-1 (2)
83
       Path: https://s3.<region>.amazonaws.com/<bucket-name>/<path>       (3)
84
         or: https://s3.amazonaws.com/<bucket-name>/<path> -- region defaults to us-east-1      (4)
85
       S3: s3://<bucket-name>/<path>                (5)
86
      Other: https://<host>/<bucket-name>/<path>            (6)
87
    */
88
0
    if(url->host == NULL || strlen(url->host) == 0)
89
0
        {stat = NC_EURL; goto done;}
90
0
    if(strcmp(url->protocol,"s3")==0 && nclistlength(hostsegments)==1) { /* Format (5) */
91
0
  bucket = nclistremove(hostsegments,0);
92
  /* region unknown at this point */
93
0
    } else if(endswith(url->host,AWSHOST)) { /* Virtual or path */
94
  /* If we find a bucket as part of the host, then remove it */
95
0
  switch (nclistlength(hostsegments)) {
96
0
  default: stat = NC_EURL; goto done;
97
0
  case 3: /* Format (4) */ 
98
      /* region unknown at this point */
99
          /* bucket unknown at this point */
100
0
      break;
101
0
  case 4: /* Format (2) or (3) */
102
0
            if(strcasecmp(nclistget(hostsegments,1),"s3")==0) { /* Format (2) */
103
          /* region unknown at this point */
104
0
          bucket = nclistremove(hostsegments,0); /* Note removeal */
105
0
            } else if(strcasecmp(nclistget(hostsegments,0),"s3")==0) { /* Format (3) */
106
0
          region = strdup(nclistget(hostsegments,1));
107
          /* bucket unknown at this point */
108
0
      } else /* ! Format (2) and ! Format (3) => error */
109
0
          {stat = NC_EURL; goto done;}
110
0
      break;
111
0
  case 5: /* Format (1) */
112
0
            if(strcasecmp(nclistget(hostsegments,1),"s3")!=0)
113
0
          {stat = NC_EURL; goto done;}
114
0
      region = strdup(nclistget(hostsegments,2));
115
0
          bucket = strdup(nclistremove(hostsegments,0));
116
0
      break;
117
0
  }
118
0
    } else { /* Presume Format (6) */
119
0
        if((host = strdup(url->host))==NULL)
120
0
      {stat = NC_ENOMEM; goto done;}
121
        /* region is unknown */
122
  /* bucket is unknown */
123
0
    }
124
125
    /* region = (1) from url, (2) inoutregion, (3) default */
126
0
    if(region == NULL)
127
0
  region = (inoutregionp?nulldup(*inoutregionp):NULL);
128
0
    if(region == NULL) {
129
0
        const char* region0 = NULL;
130
  /* Get default region */
131
0
  if((stat = NC_getdefaults3region(url,&region0))) goto done;
132
0
  region = strdup(region0);
133
0
    }
134
0
    if(region == NULL) {stat = NC_ES3; goto done;}
135
136
    /* bucket = (1) from url, (2) inoutbucket */
137
0
    if(bucket == NULL && nclistlength(pathsegments) > 0) {
138
0
  bucket = nclistremove(pathsegments,0); /* Get from the URL path; will reinsert below */
139
0
    }
140
0
    if(bucket == NULL)
141
0
  bucket = (inoutbucketp?nulldup(*inoutbucketp):NULL);
142
0
    if(bucket == NULL) {stat = NC_ES3; goto done;}
143
144
0
    if(host == NULL) { /* Construct the revised host */
145
0
        ncbytescat(buf,"s3.");
146
0
        ncbytescat(buf,region);
147
0
        ncbytescat(buf,AWSHOST);
148
0
        host = ncbytesextract(buf);
149
0
    }
150
151
    /* Construct the revised path */
152
0
    ncbytesclear(buf);
153
0
    ncbytescat(buf,"/");
154
0
    if(bucket == NULL)
155
0
        {stat = NC_EURL; goto done;}
156
0
    ncbytescat(buf,bucket);
157
0
    for(i=0;i<nclistlength(pathsegments);i++) {
158
0
  ncbytescat(buf,"/");
159
0
  ncbytescat(buf,nclistget(pathsegments,i));
160
0
    }
161
0
    path = ncbytesextract(buf);
162
    /* complete the new url */
163
0
    if((newurl=ncuriclone(url))==NULL) {stat = NC_ENOMEM; goto done;}
164
0
    ncurisetprotocol(newurl,"https");
165
0
    ncurisethost(newurl,host);
166
0
    ncurisetpath(newurl,path);
167
    /* Rebuild the url->url */
168
0
    ncurirebuild(newurl);
169
    /* return various items */
170
#ifdef AWSDEBUG
171
    fprintf(stderr,">>> NC_s3urlrebuild: final=%s bucket=%s region=%s\n",uri->uri,bucket,region);
172
#endif
173
0
    if(newurlp) {*newurlp = newurl; newurl = NULL;}
174
0
    if(inoutbucketp) {*inoutbucketp = bucket; bucket = NULL;}
175
0
    if(inoutregionp) {*inoutregionp = region; region = NULL;}
176
177
0
done:
178
0
    nullfree(region);
179
0
    nullfree(bucket)
180
0
    nullfree(host)
181
0
    nullfree(path)
182
0
    ncurifree(newurl);
183
0
    ncbytesfree(buf);
184
0
    nclistfreeall(hostsegments);
185
0
    nclistfreeall(pathsegments);
186
0
    return stat;
187
0
}
188
189
static int
190
endswith(const char* s, const char* suffix)
191
0
{
192
0
    ssize_t ls, lsf, delta;
193
0
    if(s == NULL || suffix == NULL) return 0;
194
0
    ls = strlen(s);
195
0
    lsf = strlen(suffix);
196
0
    delta = (ls - lsf);
197
0
    if(delta < 0) return 0;
198
0
    if(memcmp(s+delta,suffix,lsf)!=0) return 0;
199
0
    return 1;
200
0
}
201
202
/**************************************************/
203
/* S3 utilities */
204
205
EXTERNL int
206
NC_s3urlprocess(NCURI* url, NCS3INFO* s3)
207
0
{
208
0
    int stat = NC_NOERR;
209
0
    NCURI* url2 = NULL;
210
0
    NClist* pathsegments = NULL;
211
0
    const char* profile0 = NULL;
212
213
0
    if(url == NULL || s3 == NULL)
214
0
        {stat = NC_EURL; goto done;}
215
    /* Get current profile */
216
0
    if((stat = NC_getactives3profile(url,&profile0))) goto done;
217
0
    if(profile0 == NULL) profile0 = "no";
218
0
    s3->profile = strdup(profile0);
219
220
    /* Rebuild the URL to path format and get a usable region and optional bucket*/
221
0
    if((stat = NC_s3urlrebuild(url,&s3->bucket,&s3->region,&url2))) goto done;
222
0
    s3->host = strdup(url2->host);
223
    /* construct the rootkey minus the leading bucket */
224
0
    pathsegments = nclistnew();
225
0
    if((stat = NC_split_delim(url2->path,'/',pathsegments))) goto done;
226
0
    if(nclistlength(pathsegments) > 0) {
227
0
  char* seg = nclistremove(pathsegments,0);
228
0
        nullfree(seg);
229
0
    }
230
0
    if((stat = NC_join(pathsegments,&s3->rootkey))) goto done;
231
232
0
done:
233
0
    ncurifree(url2);
234
0
    nclistfreeall(pathsegments);
235
0
    return stat;
236
0
}
237
238
int
239
NC_s3clone(NCS3INFO* s3, NCS3INFO** news3p)
240
0
{
241
0
    NCS3INFO* news3 = NULL;
242
0
    if(s3 && news3p) {
243
0
  if((news3 = (NCS3INFO*)calloc(1,sizeof(NCS3INFO)))==NULL)
244
0
           return NC_ENOMEM;
245
0
  if((news3->host = nulldup(s3->host))==NULL) return NC_ENOMEM;
246
0
  if((news3->region = nulldup(s3->region))==NULL) return NC_ENOMEM;
247
0
  if((news3->bucket = nulldup(s3->bucket))==NULL) return NC_ENOMEM;
248
0
  if((news3->rootkey = nulldup(s3->rootkey))==NULL) return NC_ENOMEM;
249
0
  if((news3->profile = nulldup(s3->profile))==NULL) return NC_ENOMEM;
250
0
    }
251
0
    if(news3p) {*news3p = news3; news3 = NULL;}
252
0
    else {NC_s3clear(news3); nullfree(news3);}
253
0
    return NC_NOERR;
254
0
}
255
256
int
257
NC_s3clear(NCS3INFO* s3)
258
0
{
259
0
    if(s3) {
260
0
  nullfree(s3->host); s3->host = NULL;
261
0
  nullfree(s3->region); s3->region = NULL;
262
0
  nullfree(s3->bucket); s3->bucket = NULL;
263
0
  nullfree(s3->rootkey); s3->rootkey = NULL;
264
0
  nullfree(s3->profile); s3->profile = NULL;
265
0
    }
266
0
    return NC_NOERR;
267
0
}
268
269
/*
270
Check if a url has indicators that signal an S3 url.
271
*/
272
273
int
274
NC_iss3(NCURI* uri)
275
0
{
276
0
    int iss3 = 0;
277
278
0
    if(uri == NULL) goto done; /* not a uri */
279
    /* is the protocol "s3"? */
280
0
    if(strcasecmp(uri->protocol,"s3")==0) {iss3 = 1; goto done;}
281
    /* Is "s3" in the mode list? */
282
0
    if(NC_testmode(uri,"s3")) {iss3 = 1; goto done;}    
283
    /* Last chance; see if host looks s3'y */
284
0
    if(endswith(uri->host,AWSHOST)) {iss3 = 1; goto done;}
285
    
286
0
done:
287
0
    return iss3;
288
0
}
289
290
const char*
291
NC_s3dumps3info(NCS3INFO* info)
292
0
{
293
0
    static char text[8192];
294
0
    snprintf(text,sizeof(text),"host=%s region=%s bucket=%s rootkey=%s profile=%s",
295
0
    (info->host?info->host:"null"),
296
0
    (info->region?info->region:"null"),
297
0
    (info->bucket?info->bucket:"null"),
298
0
    (info->rootkey?info->rootkey:"null"),
299
0
    (info->profile?info->profile:"null"));
300
0
    return text;
301
0
}
302