Line | Count | Source (jump to first uncovered line) |
1 | | /* multipart.c -- GA4GH redirection and multipart backend for file streams. |
2 | | |
3 | | Copyright (C) 2016-2017 Genome Research Ltd. |
4 | | |
5 | | Author: John Marshall <jm18@sanger.ac.uk> |
6 | | |
7 | | Permission is hereby granted, free of charge, to any person obtaining a copy |
8 | | of this software and associated documentation files (the "Software"), to deal |
9 | | in the Software without restriction, including without limitation the rights |
10 | | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
11 | | copies of the Software, and to permit persons to whom the Software is |
12 | | furnished to do so, subject to the following conditions: |
13 | | |
14 | | The above copyright notice and this permission notice shall be included in |
15 | | all copies or substantial portions of the Software. |
16 | | |
17 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
18 | | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
19 | | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
20 | | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
21 | | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
22 | | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
23 | | DEALINGS IN THE SOFTWARE. */ |
24 | | |
25 | | #define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h |
26 | | #include <config.h> |
27 | | |
28 | | #include <stdio.h> |
29 | | #include <string.h> |
30 | | #include <errno.h> |
31 | | |
32 | | #include "htslib/kstring.h" |
33 | | |
34 | | #include "hts_internal.h" |
35 | | #include "hfile_internal.h" |
36 | | |
37 | | #ifndef EPROTO |
38 | | #define EPROTO ENOEXEC |
39 | | #endif |
40 | | |
41 | | typedef struct hfile_part { |
42 | | char *url; |
43 | | char **headers; |
44 | | } hfile_part; |
45 | | |
46 | | typedef struct { |
47 | | hFILE base; |
48 | | hfile_part *parts; |
49 | | size_t nparts, maxparts, current; |
50 | | hFILE *currentfp; |
51 | | } hFILE_multipart; |
52 | | |
53 | | static void free_part(hfile_part *p) |
54 | 0 | { |
55 | 0 | free(p->url); |
56 | 0 | if (p->headers) { |
57 | 0 | char **hdr; |
58 | 0 | for (hdr = p->headers; *hdr; hdr++) free(*hdr); |
59 | 0 | free(p->headers); |
60 | 0 | } |
61 | |
|
62 | 0 | p->url = NULL; |
63 | 0 | p->headers = NULL; |
64 | 0 | } |
65 | | |
66 | | static void free_all_parts(hFILE_multipart *fp) |
67 | 0 | { |
68 | 0 | size_t i; |
69 | 0 | for (i = 0; i < fp->nparts; i++) free_part(&fp->parts[i]); |
70 | 0 | free(fp->parts); |
71 | 0 | } |
72 | | |
73 | | static ssize_t multipart_read(hFILE *fpv, void *buffer, size_t nbytes) |
74 | 0 | { |
75 | 0 | hFILE_multipart *fp = (hFILE_multipart *) fpv; |
76 | 0 | size_t n; |
77 | |
|
78 | 0 | open_next: |
79 | 0 | if (fp->currentfp == NULL) { |
80 | 0 | if (fp->current < fp->nparts) { |
81 | 0 | const hfile_part *p = &fp->parts[fp->current]; |
82 | 0 | hts_log_debug("Opening part #%zu of %zu: \"%.120s%s\"", |
83 | 0 | fp->current+1, fp->nparts, p->url, |
84 | 0 | (strlen(p->url) > 120)? "..." : ""); |
85 | |
|
86 | 0 | fp->currentfp = p->headers? |
87 | 0 | hopen(p->url, "r:", |
88 | 0 | "httphdr:v", p->headers, |
89 | 0 | "auth_token_enabled", "false", NULL) |
90 | 0 | : hopen(p->url, "r:", "auth_token_enabled", "false", NULL); |
91 | |
|
92 | 0 | if (fp->currentfp == NULL) return -1; |
93 | 0 | } |
94 | 0 | else return 0; // No more parts, so we're truly at EOF |
95 | 0 | } |
96 | | |
97 | 0 | n = fp->currentfp->mobile? |
98 | 0 | fp->currentfp->backend->read(fp->currentfp, buffer, nbytes) |
99 | 0 | : hread(fp->currentfp, buffer, nbytes); |
100 | |
|
101 | 0 | if (n == 0) { |
102 | | // We're at EOF on this part, so set up the next part |
103 | 0 | hFILE *prevfp = fp->currentfp; |
104 | 0 | free_part(&fp->parts[fp->current]); |
105 | 0 | fp->current++; |
106 | 0 | fp->currentfp = NULL; |
107 | 0 | if (hclose(prevfp) < 0) return -1; |
108 | 0 | goto open_next; |
109 | 0 | } |
110 | | |
111 | 0 | return n; // Number of bytes read by (or an error from) fp->currentfp |
112 | 0 | } |
113 | | |
114 | | static ssize_t multipart_write(hFILE *fpv, const void *buffer, size_t nbytes) |
115 | 0 | { |
116 | 0 | errno = EROFS; |
117 | 0 | return -1; |
118 | 0 | } |
119 | | |
120 | | static off_t multipart_seek(hFILE *fpv, off_t offset, int whence) |
121 | 0 | { |
122 | 0 | errno = ESPIPE; |
123 | 0 | return -1; |
124 | 0 | } |
125 | | |
126 | | static int multipart_close(hFILE *fpv) |
127 | 0 | { |
128 | 0 | hFILE_multipart *fp = (hFILE_multipart *) fpv; |
129 | |
|
130 | 0 | free_all_parts(fp); |
131 | 0 | if (fp->currentfp) { |
132 | 0 | if (hclose(fp->currentfp) < 0) return -1; |
133 | 0 | } |
134 | | |
135 | 0 | return 0; |
136 | 0 | } |
137 | | |
138 | | static const struct hFILE_backend multipart_backend = |
139 | | { |
140 | | multipart_read, multipart_write, multipart_seek, NULL, multipart_close |
141 | | }; |
142 | | |
143 | | // Returns 'v' (valid value), 'i' (invalid; required GA4GH field missing), |
144 | | // or upon encountering an unexpected token, that token's type. |
145 | | // Explicit `return '?'` means a JSON parsing error, typically a member key |
146 | | // that is not a string. An unexpected token may be a valid token that was |
147 | | // not the type expected for a particular GA4GH field, or it may be '?' or |
148 | | // '\0' which should be propagated. |
149 | | static char |
150 | | parse_ga4gh_body_json(hFILE_multipart *fp, hFILE *json, |
151 | | kstring_t *b, kstring_t *header) |
152 | 0 | { |
153 | 0 | hts_json_token t; |
154 | |
|
155 | 0 | if (hts_json_fnext(json, &t, b) != '{') return t.type; |
156 | 0 | while (hts_json_fnext(json, &t, b) != '}') { |
157 | 0 | if (t.type != 's') return '?'; |
158 | | |
159 | 0 | if (strcmp(t.str, "urls") == 0) { |
160 | 0 | if (hts_json_fnext(json, &t, b) != '[') return t.type; |
161 | | |
162 | 0 | while (hts_json_fnext(json, &t, b) != ']') { |
163 | 0 | hfile_part *part; |
164 | 0 | size_t n = 0, max = 0; |
165 | |
|
166 | 0 | hts_expand(hfile_part, fp->nparts+1, fp->maxparts, fp->parts); |
167 | 0 | part = &fp->parts[fp->nparts++]; |
168 | 0 | part->url = NULL; |
169 | 0 | part->headers = NULL; |
170 | |
|
171 | 0 | if (t.type != '{') return t.type; |
172 | 0 | while (hts_json_fnext(json, &t, b) != '}') { |
173 | 0 | if (t.type != 's') return '?'; |
174 | | |
175 | 0 | if (strcmp(t.str, "url") == 0) { |
176 | 0 | if (hts_json_fnext(json, &t, b) != 's') return t.type; |
177 | 0 | part->url = ks_release(b); |
178 | 0 | } |
179 | 0 | else if (strcmp(t.str, "headers") == 0) { |
180 | 0 | if (hts_json_fnext(json, &t, b) != '{') return t.type; |
181 | | |
182 | 0 | while (hts_json_fnext(json, &t, header) != '}') { |
183 | 0 | if (t.type != 's') return '?'; |
184 | | |
185 | 0 | if (hts_json_fnext(json, &t, b) != 's') |
186 | 0 | return t.type; |
187 | | |
188 | 0 | kputs(": ", header); |
189 | 0 | kputs(t.str, header); |
190 | 0 | n++; |
191 | 0 | hts_expand(char *, n+1, max, part->headers); |
192 | 0 | part->headers[n-1] = ks_release(header); |
193 | 0 | part->headers[n] = NULL; |
194 | 0 | } |
195 | 0 | } |
196 | 0 | else if (hts_json_fskip_value(json, '\0') != 'v') |
197 | 0 | return '?'; |
198 | 0 | } |
199 | | |
200 | 0 | if (! part->url) return 'i'; |
201 | 0 | } |
202 | 0 | } |
203 | 0 | else if (strcmp(t.str, "format") == 0) { |
204 | 0 | if (hts_json_fnext(json, &t, b) != 's') return t.type; |
205 | | |
206 | 0 | hts_log_debug("GA4GH JSON redirection to multipart %s data", t.str); |
207 | 0 | } |
208 | 0 | else if (hts_json_fskip_value(json, '\0') != 'v') return '?'; |
209 | 0 | } |
210 | | |
211 | 0 | return 'v'; |
212 | 0 | } |
213 | | |
214 | | // Returns 'v' (valid value), 'i' (invalid; required GA4GH field missing), |
215 | | // or upon encountering an unexpected token, that token's type. |
216 | | // Explicit `return '?'` means a JSON parsing error, typically a member key |
217 | | // that is not a string. An unexpected token may be a valid token that was |
218 | | // not the type expected for a particular GA4GH field, or it may be '?' or |
219 | | // '\0' which should be propagated. |
220 | | static char |
221 | | parse_ga4gh_redirect_json(hFILE_multipart *fp, hFILE *json, |
222 | 0 | kstring_t *b, kstring_t *header) { |
223 | 0 | hts_json_token t; |
224 | |
|
225 | 0 | if (hts_json_fnext(json, &t, b) != '{') return t.type; |
226 | 0 | while (hts_json_fnext(json, &t, b) != '}') { |
227 | 0 | if (t.type != 's') return '?'; |
228 | | |
229 | 0 | if (strcmp(t.str, "htsget") == 0) { |
230 | 0 | char ret = parse_ga4gh_body_json(fp, json, b, header); |
231 | 0 | if (ret != 'v') return ret; |
232 | 0 | } |
233 | 0 | else return '?'; |
234 | 0 | } |
235 | | |
236 | 0 | if (hts_json_fnext(json, &t, b) != '\0') return '?'; |
237 | | |
238 | 0 | return 'v'; |
239 | 0 | } |
240 | | |
241 | | hFILE *hopen_htsget_redirect(hFILE *hfile, const char *mode) |
242 | 0 | { |
243 | 0 | hFILE_multipart *fp; |
244 | 0 | kstring_t s1 = { 0, 0, NULL }, s2 = { 0, 0, NULL }; |
245 | 0 | char ret; |
246 | |
|
247 | 0 | fp = (hFILE_multipart *) hfile_init(sizeof (hFILE_multipart), mode, 0); |
248 | 0 | if (fp == NULL) return NULL; |
249 | | |
250 | 0 | fp->parts = NULL; |
251 | 0 | fp->nparts = fp->maxparts = 0; |
252 | |
|
253 | 0 | ret = parse_ga4gh_redirect_json(fp, hfile, &s1, &s2); |
254 | 0 | free(s1.s); |
255 | 0 | free(s2.s); |
256 | 0 | if (ret != 'v') { |
257 | 0 | free_all_parts(fp); |
258 | 0 | hfile_destroy((hFILE *) fp); |
259 | 0 | errno = (ret == '?' || ret == '\0')? EPROTO : EINVAL; |
260 | 0 | return NULL; |
261 | 0 | } |
262 | | |
263 | 0 | fp->current = 0; |
264 | 0 | fp->currentfp = NULL; |
265 | 0 | fp->base.backend = &multipart_backend; |
266 | 0 | return &fp->base; |
267 | 0 | } |