/src/netcdf-c/libdispatch/ds3util.c
Line | Count | Source (jump to first uncovered line) |
1 | | /********************************************************************* |
2 | | * Copyright 2018, UCAR/Unidata |
3 | | * See netcdf/COPYRIGHT file for copying and redistribution conditions. |
4 | | *********************************************************************/ |
5 | | |
6 | | #include "config.h" |
7 | | #include <stdlib.h> |
8 | | #include <string.h> |
9 | | #include <stdio.h> |
10 | | #include <assert.h> |
11 | | #ifdef HAVE_UNISTD_H |
12 | | #include <unistd.h> |
13 | | #endif |
14 | | #ifdef HAVE_SYS_STAT_H |
15 | | #include <sys/stat.h> |
16 | | #endif |
17 | | #ifdef HAVE_FCNTL_H |
18 | | #include <fcntl.h> |
19 | | #endif |
20 | | #ifdef _MSC_VER |
21 | | #include <io.h> |
22 | | #endif |
23 | | |
24 | | #include "netcdf.h" |
25 | | #include "ncuri.h" |
26 | | #include "nclist.h" |
27 | | #include "ncrc.h" |
28 | | #include "ncs3sdk.h" |
29 | | |
30 | | #undef AWSDEBUG |
31 | | |
32 | 0 | #define AWSHOST ".amazonaws.com" |
33 | | |
34 | | enum URLFORMAT {UF_NONE=0, UF_VIRTUAL=1, UF_PATH=2, UF_S3=3, UF_OTHER=4}; |
35 | | |
36 | | /* Forward */ |
37 | | static int endswith(const char* s, const char* suffix); |
38 | | |
39 | | /**************************************************/ |
40 | | /* Generic S3 Utilities */ |
41 | | |
42 | | /* |
43 | | Rebuild an S3 url into a canonical path-style url. |
44 | | If region is not in the host, then use specified region |
45 | | if provided, otherwise us-east-1. |
46 | | @param url (in) the current url |
47 | | @param region (in) region to use if needed; NULL => us-east-1 |
48 | | (out) region from url or the input region |
49 | | @param bucketp (in) bucket to use if needed |
50 | | (out) bucket from url |
51 | | @param pathurlp (out) the resulting pathified url string |
52 | | */ |
53 | | |
54 | | int |
55 | | NC_s3urlrebuild(NCURI* url, char** inoutbucketp, char** inoutregionp, NCURI** newurlp) |
56 | 0 | { |
57 | 0 | int i,stat = NC_NOERR; |
58 | 0 | NClist* hostsegments = NULL; |
59 | 0 | NClist* pathsegments = NULL; |
60 | 0 | NCbytes* buf = ncbytesnew(); |
61 | 0 | NCURI* newurl = NULL; |
62 | 0 | char* bucket = NULL; |
63 | 0 | char* host = NULL; |
64 | 0 | char* path = NULL; |
65 | 0 | char* region = NULL; |
66 | | |
67 | 0 | if(url == NULL) |
68 | 0 | {stat = NC_EURL; goto done;} |
69 | | |
70 | | /* Parse the hostname */ |
71 | 0 | hostsegments = nclistnew(); |
72 | | /* split the hostname by "." */ |
73 | 0 | if((stat = NC_split_delim(url->host,'.',hostsegments))) goto done; |
74 | | |
75 | | /* Parse the path*/ |
76 | 0 | pathsegments = nclistnew(); |
77 | | /* split the path by "/" */ |
78 | 0 | if((stat = NC_split_delim(url->path,'/',pathsegments))) goto done; |
79 | | |
80 | | /* Distinguish path-style from virtual-host style from s3: and from other. |
81 | | Virtual: https://<bucket-name>.s3.<region>.amazonaws.com/<path> (1) |
82 | | or: https://<bucket-name>.s3.amazonaws.com/<path> -- region defaults to us-east-1 (2) |
83 | | Path: https://s3.<region>.amazonaws.com/<bucket-name>/<path> (3) |
84 | | or: https://s3.amazonaws.com/<bucket-name>/<path> -- region defaults to us-east-1 (4) |
85 | | S3: s3://<bucket-name>/<path> (5) |
86 | | Other: https://<host>/<bucket-name>/<path> (6) |
87 | | */ |
88 | 0 | if(url->host == NULL || strlen(url->host) == 0) |
89 | 0 | {stat = NC_EURL; goto done;} |
90 | 0 | if(strcmp(url->protocol,"s3")==0 && nclistlength(hostsegments)==1) { /* Format (5) */ |
91 | 0 | bucket = nclistremove(hostsegments,0); |
92 | | /* region unknown at this point */ |
93 | 0 | } else if(endswith(url->host,AWSHOST)) { /* Virtual or path */ |
94 | | /* If we find a bucket as part of the host, then remove it */ |
95 | 0 | switch (nclistlength(hostsegments)) { |
96 | 0 | default: stat = NC_EURL; goto done; |
97 | 0 | case 3: /* Format (4) */ |
98 | | /* region unknown at this point */ |
99 | | /* bucket unknown at this point */ |
100 | 0 | break; |
101 | 0 | case 4: /* Format (2) or (3) */ |
102 | 0 | if(strcasecmp(nclistget(hostsegments,1),"s3")==0) { /* Format (2) */ |
103 | | /* region unknown at this point */ |
104 | 0 | bucket = nclistremove(hostsegments,0); /* Note removeal */ |
105 | 0 | } else if(strcasecmp(nclistget(hostsegments,0),"s3")==0) { /* Format (3) */ |
106 | 0 | region = strdup(nclistget(hostsegments,1)); |
107 | | /* bucket unknown at this point */ |
108 | 0 | } else /* ! Format (2) and ! Format (3) => error */ |
109 | 0 | {stat = NC_EURL; goto done;} |
110 | 0 | break; |
111 | 0 | case 5: /* Format (1) */ |
112 | 0 | if(strcasecmp(nclistget(hostsegments,1),"s3")!=0) |
113 | 0 | {stat = NC_EURL; goto done;} |
114 | 0 | region = strdup(nclistget(hostsegments,2)); |
115 | 0 | bucket = strdup(nclistremove(hostsegments,0)); |
116 | 0 | break; |
117 | 0 | } |
118 | 0 | } else { /* Presume Format (6) */ |
119 | 0 | if((host = strdup(url->host))==NULL) |
120 | 0 | {stat = NC_ENOMEM; goto done;} |
121 | | /* region is unknown */ |
122 | | /* bucket is unknown */ |
123 | 0 | } |
124 | | |
125 | | /* region = (1) from url, (2) inoutregion, (3) default */ |
126 | 0 | if(region == NULL) |
127 | 0 | region = (inoutregionp?nulldup(*inoutregionp):NULL); |
128 | 0 | if(region == NULL) { |
129 | 0 | const char* region0 = NULL; |
130 | | /* Get default region */ |
131 | 0 | if((stat = NC_getdefaults3region(url,®ion0))) goto done; |
132 | 0 | region = strdup(region0); |
133 | 0 | } |
134 | 0 | if(region == NULL) {stat = NC_ES3; goto done;} |
135 | | |
136 | | /* bucket = (1) from url, (2) inoutbucket */ |
137 | 0 | if(bucket == NULL && nclistlength(pathsegments) > 0) { |
138 | 0 | bucket = nclistremove(pathsegments,0); /* Get from the URL path; will reinsert below */ |
139 | 0 | } |
140 | 0 | if(bucket == NULL) |
141 | 0 | bucket = (inoutbucketp?nulldup(*inoutbucketp):NULL); |
142 | 0 | if(bucket == NULL) {stat = NC_ES3; goto done;} |
143 | | |
144 | 0 | if(host == NULL) { /* Construct the revised host */ |
145 | 0 | ncbytescat(buf,"s3."); |
146 | 0 | ncbytescat(buf,region); |
147 | 0 | ncbytescat(buf,AWSHOST); |
148 | 0 | host = ncbytesextract(buf); |
149 | 0 | } |
150 | | |
151 | | /* Construct the revised path */ |
152 | 0 | ncbytesclear(buf); |
153 | 0 | ncbytescat(buf,"/"); |
154 | 0 | if(bucket == NULL) |
155 | 0 | {stat = NC_EURL; goto done;} |
156 | 0 | ncbytescat(buf,bucket); |
157 | 0 | for(i=0;i<nclistlength(pathsegments);i++) { |
158 | 0 | ncbytescat(buf,"/"); |
159 | 0 | ncbytescat(buf,nclistget(pathsegments,i)); |
160 | 0 | } |
161 | 0 | path = ncbytesextract(buf); |
162 | | /* complete the new url */ |
163 | 0 | if((newurl=ncuriclone(url))==NULL) {stat = NC_ENOMEM; goto done;} |
164 | 0 | ncurisetprotocol(newurl,"https"); |
165 | 0 | ncurisethost(newurl,host); |
166 | 0 | ncurisetpath(newurl,path); |
167 | | /* Rebuild the url->url */ |
168 | 0 | ncurirebuild(newurl); |
169 | | /* return various items */ |
170 | | #ifdef AWSDEBUG |
171 | | fprintf(stderr,">>> NC_s3urlrebuild: final=%s bucket=%s region=%s\n",uri->uri,bucket,region); |
172 | | #endif |
173 | 0 | if(newurlp) {*newurlp = newurl; newurl = NULL;} |
174 | 0 | if(inoutbucketp) {*inoutbucketp = bucket; bucket = NULL;} |
175 | 0 | if(inoutregionp) {*inoutregionp = region; region = NULL;} |
176 | |
|
177 | 0 | done: |
178 | 0 | nullfree(region); |
179 | 0 | nullfree(bucket) |
180 | 0 | nullfree(host) |
181 | 0 | nullfree(path) |
182 | 0 | ncurifree(newurl); |
183 | 0 | ncbytesfree(buf); |
184 | 0 | nclistfreeall(hostsegments); |
185 | 0 | nclistfreeall(pathsegments); |
186 | 0 | return stat; |
187 | 0 | } |
188 | | |
189 | | static int |
190 | | endswith(const char* s, const char* suffix) |
191 | 0 | { |
192 | 0 | ssize_t ls, lsf, delta; |
193 | 0 | if(s == NULL || suffix == NULL) return 0; |
194 | 0 | ls = strlen(s); |
195 | 0 | lsf = strlen(suffix); |
196 | 0 | delta = (ls - lsf); |
197 | 0 | if(delta < 0) return 0; |
198 | 0 | if(memcmp(s+delta,suffix,lsf)!=0) return 0; |
199 | 0 | return 1; |
200 | 0 | } |
201 | | |
202 | | /**************************************************/ |
203 | | /* S3 utilities */ |
204 | | |
205 | | EXTERNL int |
206 | | NC_s3urlprocess(NCURI* url, NCS3INFO* s3) |
207 | 0 | { |
208 | 0 | int stat = NC_NOERR; |
209 | 0 | NCURI* url2 = NULL; |
210 | 0 | NClist* pathsegments = NULL; |
211 | 0 | const char* profile0 = NULL; |
212 | |
|
213 | 0 | if(url == NULL || s3 == NULL) |
214 | 0 | {stat = NC_EURL; goto done;} |
215 | | /* Get current profile */ |
216 | 0 | if((stat = NC_getactives3profile(url,&profile0))) goto done; |
217 | 0 | if(profile0 == NULL) profile0 = "no"; |
218 | 0 | s3->profile = strdup(profile0); |
219 | | |
220 | | /* Rebuild the URL to path format and get a usable region and optional bucket*/ |
221 | 0 | if((stat = NC_s3urlrebuild(url,&s3->bucket,&s3->region,&url2))) goto done; |
222 | 0 | s3->host = strdup(url2->host); |
223 | | /* construct the rootkey minus the leading bucket */ |
224 | 0 | pathsegments = nclistnew(); |
225 | 0 | if((stat = NC_split_delim(url2->path,'/',pathsegments))) goto done; |
226 | 0 | if(nclistlength(pathsegments) > 0) { |
227 | 0 | char* seg = nclistremove(pathsegments,0); |
228 | 0 | nullfree(seg); |
229 | 0 | } |
230 | 0 | if((stat = NC_join(pathsegments,&s3->rootkey))) goto done; |
231 | | |
232 | 0 | done: |
233 | 0 | ncurifree(url2); |
234 | 0 | nclistfreeall(pathsegments); |
235 | 0 | return stat; |
236 | 0 | } |
237 | | |
238 | | int |
239 | | NC_s3clone(NCS3INFO* s3, NCS3INFO** news3p) |
240 | 0 | { |
241 | 0 | NCS3INFO* news3 = NULL; |
242 | 0 | if(s3 && news3p) { |
243 | 0 | if((news3 = (NCS3INFO*)calloc(1,sizeof(NCS3INFO)))==NULL) |
244 | 0 | return NC_ENOMEM; |
245 | 0 | if((news3->host = nulldup(s3->host))==NULL) return NC_ENOMEM; |
246 | 0 | if((news3->region = nulldup(s3->region))==NULL) return NC_ENOMEM; |
247 | 0 | if((news3->bucket = nulldup(s3->bucket))==NULL) return NC_ENOMEM; |
248 | 0 | if((news3->rootkey = nulldup(s3->rootkey))==NULL) return NC_ENOMEM; |
249 | 0 | if((news3->profile = nulldup(s3->profile))==NULL) return NC_ENOMEM; |
250 | 0 | } |
251 | 0 | if(news3p) {*news3p = news3; news3 = NULL;} |
252 | 0 | else {NC_s3clear(news3); nullfree(news3);} |
253 | 0 | return NC_NOERR; |
254 | 0 | } |
255 | | |
256 | | int |
257 | | NC_s3clear(NCS3INFO* s3) |
258 | 0 | { |
259 | 0 | if(s3) { |
260 | 0 | nullfree(s3->host); s3->host = NULL; |
261 | 0 | nullfree(s3->region); s3->region = NULL; |
262 | 0 | nullfree(s3->bucket); s3->bucket = NULL; |
263 | 0 | nullfree(s3->rootkey); s3->rootkey = NULL; |
264 | 0 | nullfree(s3->profile); s3->profile = NULL; |
265 | 0 | } |
266 | 0 | return NC_NOERR; |
267 | 0 | } |
268 | | |
269 | | /* |
270 | | Check if a url has indicators that signal an S3 url. |
271 | | */ |
272 | | |
273 | | int |
274 | | NC_iss3(NCURI* uri) |
275 | 0 | { |
276 | 0 | int iss3 = 0; |
277 | |
|
278 | 0 | if(uri == NULL) goto done; /* not a uri */ |
279 | | /* is the protocol "s3"? */ |
280 | 0 | if(strcasecmp(uri->protocol,"s3")==0) {iss3 = 1; goto done;} |
281 | | /* Is "s3" in the mode list? */ |
282 | 0 | if(NC_testmode(uri,"s3")) {iss3 = 1; goto done;} |
283 | | /* Last chance; see if host looks s3'y */ |
284 | 0 | if(endswith(uri->host,AWSHOST)) {iss3 = 1; goto done;} |
285 | | |
286 | 0 | done: |
287 | 0 | return iss3; |
288 | 0 | } |
289 | | |
290 | | const char* |
291 | | NC_s3dumps3info(NCS3INFO* info) |
292 | 0 | { |
293 | 0 | static char text[8192]; |
294 | 0 | snprintf(text,sizeof(text),"host=%s region=%s bucket=%s rootkey=%s profile=%s", |
295 | 0 | (info->host?info->host:"null"), |
296 | 0 | (info->region?info->region:"null"), |
297 | 0 | (info->bucket?info->bucket:"null"), |
298 | 0 | (info->rootkey?info->rootkey:"null"), |
299 | 0 | (info->profile?info->profile:"null")); |
300 | 0 | return text; |
301 | 0 | } |
302 | | |