/src/netcdf-c/libdispatch/ds3util.c
Line | Count | Source (jump to first uncovered line) |
1 | | /********************************************************************* |
2 | | * Copyright 2018, UCAR/Unidata |
3 | | * See netcdf/COPYRIGHT file for copying and redistribution conditions. |
4 | | *********************************************************************/ |
5 | | |
6 | | #include "config.h" |
7 | | #include <stdlib.h> |
8 | | #include <string.h> |
9 | | #include <stdio.h> |
10 | | #include <assert.h> |
11 | | #ifdef HAVE_UNISTD_H |
12 | | #include <unistd.h> |
13 | | #endif |
14 | | #ifdef HAVE_SYS_STAT_H |
15 | | #include <sys/stat.h> |
16 | | #endif |
17 | | #ifdef HAVE_FCNTL_H |
18 | | #include <fcntl.h> |
19 | | #endif |
20 | | #ifdef _MSC_VER |
21 | | #include <io.h> |
22 | | #endif |
23 | | |
24 | | #include "netcdf.h" |
25 | | #include "ncuri.h" |
26 | | #include "ncrc.h" |
27 | | |
28 | | |
29 | | #undef AWSDEBUG |
30 | | |
31 | 0 | #define AWSHOST ".amazonaws.com" |
32 | | |
33 | | enum URLFORMAT {UF_NONE=0, UF_VIRTUAL=1, UF_PATH=2, UF_S3=3, UF_OTHER=4}; |
34 | | |
35 | | /* Forward */ |
36 | | static int endswith(const char* s, const char* suffix); |
37 | | |
38 | | /**************************************************/ |
39 | | /* Generic S3 Utilities */ |
40 | | |
41 | | /* |
42 | | Rebuild an S3 url into a canonical path-style url. |
43 | | If region is not in the host, then use specified region |
44 | | if provided, otherwise us-east-1. |
45 | | @param url (in) the current url |
46 | | @param region (in) region to use if needed; NULL => us-east-1 |
47 | | (out) region from url or the input region |
48 | | @param pathurlp (out) the resulting pathified url string |
49 | | @param bucketp (out) the bucket from the url |
50 | | */ |
51 | | |
52 | | int |
53 | | NC_s3urlrebuild(NCURI* url, NCURI** newurlp, char** bucketp, char** outregionp) |
54 | 0 | { |
55 | 0 | int i,stat = NC_NOERR; |
56 | 0 | NClist* hostsegments = NULL; |
57 | 0 | NClist* pathsegments = NULL; |
58 | 0 | NCbytes* buf = ncbytesnew(); |
59 | 0 | NCURI* newurl = NULL; |
60 | 0 | char* bucket = NULL; |
61 | 0 | char* host = NULL; |
62 | 0 | char* path = NULL; |
63 | 0 | char* region = NULL; |
64 | | |
65 | 0 | if(url == NULL) |
66 | 0 | {stat = NC_EURL; goto done;} |
67 | | |
68 | | /* Parse the hostname */ |
69 | 0 | hostsegments = nclistnew(); |
70 | | /* split the hostname by "." */ |
71 | 0 | if((stat = NC_split_delim(url->host,'.',hostsegments))) goto done; |
72 | | |
73 | | /* Parse the path*/ |
74 | 0 | pathsegments = nclistnew(); |
75 | | /* split the path by "/" */ |
76 | 0 | if((stat = NC_split_delim(url->path,'/',pathsegments))) goto done; |
77 | | |
78 | | /* Distinguish path-style from virtual-host style from s3: and from other. |
79 | | Virtual: https://bucket-name.s3.Region.amazonaws.com/<path> |
80 | | Path: https://s3.Region.amazonaws.com/bucket-name/<path> |
81 | | S3: s3://bucket-name/<path> |
82 | | Other: https://<host>/bucketname/<path> |
83 | | */ |
84 | 0 | if(url->host == NULL || strlen(url->host) == 0) |
85 | 0 | {stat = NC_EURL; goto done;} |
86 | 0 | if(strcmp(url->protocol,"s3")==0 && nclistlength(hostsegments)==1) { |
87 | 0 | bucket = strdup(url->host); |
88 | 0 | region = NULL; /* unknown at this point */ |
89 | 0 | } else if(endswith(url->host,AWSHOST)) { /* Virtual or path */ |
90 | 0 | switch (nclistlength(hostsegments)) { |
91 | 0 | default: stat = NC_EURL; goto done; |
92 | 0 | case 4: |
93 | 0 | if(strcasecmp(nclistget(hostsegments,0),"s3")!=0) |
94 | 0 | {stat = NC_EURL; goto done;} |
95 | 0 | region = strdup(nclistget(hostsegments,1)); |
96 | 0 | if(nclistlength(pathsegments) > 0) |
97 | 0 | bucket = nclistremove(pathsegments,0); |
98 | 0 | break; |
99 | 0 | case 5: |
100 | 0 | if(strcasecmp(nclistget(hostsegments,1),"s3")!=0) |
101 | 0 | {stat = NC_EURL; goto done;} |
102 | 0 | region = strdup(nclistget(hostsegments,2)); |
103 | 0 | bucket = strdup(nclistget(hostsegments,0)); |
104 | 0 | break; |
105 | 0 | } |
106 | 0 | } else { |
107 | 0 | if((host = strdup(url->host))==NULL) |
108 | 0 | {stat = NC_ENOMEM; goto done;} |
109 | | /* region is unknown */ |
110 | 0 | region = NULL; |
111 | | /* bucket is assumed to be start of the path */ |
112 | 0 | if(nclistlength(pathsegments) > 0) |
113 | 0 | bucket = nclistremove(pathsegments,0); |
114 | 0 | } |
115 | | /* If region is null, use default */ |
116 | 0 | if(region == NULL) { |
117 | 0 | const char* region0 = NULL; |
118 | | /* Get default region */ |
119 | 0 | if((stat = NC_getdefaults3region(url,®ion0))) goto done; |
120 | 0 | region = strdup(region0); |
121 | 0 | } |
122 | 0 | if(host == NULL) { /* Construct the revised host */ |
123 | 0 | ncbytescat(buf,"s3."); |
124 | 0 | ncbytescat(buf,region); |
125 | 0 | ncbytescat(buf,AWSHOST); |
126 | 0 | host = ncbytesextract(buf); |
127 | 0 | } |
128 | | |
129 | | /* Construct the revised path */ |
130 | 0 | ncbytesclear(buf); |
131 | 0 | ncbytescat(buf,"/"); |
132 | 0 | if(bucket == NULL) |
133 | 0 | {stat = NC_EURL; goto done;} |
134 | 0 | ncbytescat(buf,bucket); |
135 | 0 | for(i=0;i<nclistlength(pathsegments);i++) { |
136 | 0 | ncbytescat(buf,"/"); |
137 | 0 | ncbytescat(buf,nclistget(pathsegments,i)); |
138 | 0 | } |
139 | 0 | path = ncbytesextract(buf); |
140 | | /* complete the new url */ |
141 | 0 | if((newurl=ncuriclone(url))==NULL) {stat = NC_ENOMEM; goto done;} |
142 | 0 | ncurisetprotocol(newurl,"https"); |
143 | 0 | ncurisethost(newurl,host); |
144 | 0 | ncurisetpath(newurl,path); |
145 | | /* Rebuild the url->url */ |
146 | 0 | ncurirebuild(newurl); |
147 | | /* return various items */ |
148 | | #ifdef AWSDEBUG |
149 | | fprintf(stderr,">>> NC_s3urlrebuild: final=%s bucket=%s region=%s\n",uri->uri,bucket,region); |
150 | | #endif |
151 | 0 | if(newurlp) {*newurlp = newurl; newurl = NULL;} |
152 | 0 | if(bucketp) {*bucketp = bucket; bucket = NULL;} |
153 | 0 | if(outregionp) {*outregionp = region; region = NULL;} |
154 | |
|
155 | 0 | done: |
156 | 0 | nullfree(region); |
157 | 0 | nullfree(bucket) |
158 | 0 | nullfree(host) |
159 | 0 | nullfree(path) |
160 | 0 | ncurifree(newurl); |
161 | 0 | ncbytesfree(buf); |
162 | 0 | nclistfreeall(hostsegments); |
163 | 0 | nclistfreeall(pathsegments); |
164 | 0 | return stat; |
165 | 0 | } |
166 | | |
167 | | static int |
168 | | endswith(const char* s, const char* suffix) |
169 | 0 | { |
170 | 0 | ssize_t ls, lsf, delta; |
171 | 0 | if(s == NULL || suffix == NULL) return 0; |
172 | 0 | ls = strlen(s); |
173 | 0 | lsf = strlen(suffix); |
174 | 0 | delta = (ls - lsf); |
175 | 0 | if(delta < 0) return 0; |
176 | 0 | if(memcmp(s+delta,suffix,lsf)!=0) return 0; |
177 | 0 | return 1; |
178 | 0 | } |
179 | | |
180 | | /**************************************************/ |
181 | | /* S3 utilities */ |
182 | | |
183 | | EXTERNL int |
184 | | NC_s3urlprocess(NCURI* url, NCS3INFO* s3) |
185 | 0 | { |
186 | 0 | int stat = NC_NOERR; |
187 | 0 | NCURI* url2 = NULL; |
188 | 0 | NClist* pathsegments = NULL; |
189 | 0 | const char* profile0 = NULL; |
190 | |
|
191 | 0 | if(url == NULL || s3 == NULL) |
192 | 0 | {stat = NC_EURL; goto done;} |
193 | | /* Get current profile */ |
194 | 0 | if((stat = NC_getactives3profile(url,&profile0))) goto done; |
195 | 0 | if(profile0 == NULL) profile0 = "none"; |
196 | 0 | s3->profile = strdup(profile0); |
197 | | |
198 | | /* Rebuild the URL to path format and get a usable region*/ |
199 | 0 | if((stat = NC_s3urlrebuild(url,&url2,&s3->bucket,&s3->region))) goto done; |
200 | 0 | s3->host = strdup(url2->host); |
201 | | /* construct the rootkey minus the leading bucket */ |
202 | 0 | pathsegments = nclistnew(); |
203 | 0 | if((stat = NC_split_delim(url2->path,'/',pathsegments))) goto done; |
204 | 0 | if(nclistlength(pathsegments) > 0) { |
205 | 0 | char* seg = nclistremove(pathsegments,0); |
206 | 0 | nullfree(seg); |
207 | 0 | } |
208 | 0 | if((stat = NC_join(pathsegments,&s3->rootkey))) goto done; |
209 | | |
210 | 0 | done: |
211 | 0 | ncurifree(url2); |
212 | 0 | nclistfreeall(pathsegments); |
213 | 0 | return stat; |
214 | 0 | } |
215 | | |
216 | | int |
217 | | NC_s3clear(NCS3INFO* s3) |
218 | 0 | { |
219 | 0 | if(s3) { |
220 | 0 | nullfree(s3->host); s3->host = NULL; |
221 | 0 | nullfree(s3->region); s3->region = NULL; |
222 | 0 | nullfree(s3->bucket); s3->bucket = NULL; |
223 | 0 | nullfree(s3->rootkey); s3->rootkey = NULL; |
224 | 0 | nullfree(s3->profile); s3->profile = NULL; |
225 | 0 | } |
226 | 0 | return NC_NOERR; |
227 | 0 | } |
228 | | |
229 | | /* |
230 | | Check if a url has indicators that signal an S3 url. |
231 | | */ |
232 | | |
233 | | int |
234 | | NC_iss3(NCURI* uri) |
235 | 0 | { |
236 | 0 | int iss3 = 0; |
237 | |
|
238 | 0 | if(uri == NULL) goto done; /* not a uri */ |
239 | | /* is the protocol "s3"? */ |
240 | 0 | if(strcasecmp(uri->protocol,"s3")==0) {iss3 = 1; goto done;} |
241 | | /* Is "s3" in the mode list? */ |
242 | 0 | if(NC_testmode(uri,"s3")) {iss3 = 1; goto done;} |
243 | | /* Last chance; see if host looks s3'y */ |
244 | 0 | if(endswith(uri->host,AWSHOST)) {iss3 = 1; goto done;} |
245 | | |
246 | 0 | done: |
247 | 0 | return iss3; |
248 | 0 | } |
249 | | |