Coverage Report

Created: 2023-01-17 06:24

/src/htslib/multipart.c
Line
Count
Source (jump to first uncovered line)
1
/*  multipart.c -- GA4GH redirection and multipart backend for file streams.
2
3
    Copyright (C) 2016-2017 Genome Research Ltd.
4
5
    Author: John Marshall <jm18@sanger.ac.uk>
6
7
Permission is hereby granted, free of charge, to any person obtaining a copy
8
of this software and associated documentation files (the "Software"), to deal
9
in the Software without restriction, including without limitation the rights
10
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
copies of the Software, and to permit persons to whom the Software is
12
furnished to do so, subject to the following conditions:
13
14
The above copyright notice and this permission notice shall be included in
15
all copies or substantial portions of the Software.
16
17
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23
DEALINGS IN THE SOFTWARE.  */
24
25
#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
26
#include <config.h>
27
28
#include <stdio.h>
29
#include <string.h>
30
#include <errno.h>
31
32
#include "htslib/kstring.h"
33
34
#include "hts_internal.h"
35
#include "hfile_internal.h"
36
37
#ifndef EPROTO
38
#define EPROTO ENOEXEC
39
#endif
40
41
typedef struct hfile_part {
42
    char *url;
43
    char **headers;
44
} hfile_part;
45
46
typedef struct {
47
    hFILE base;
48
    hfile_part *parts;
49
    size_t nparts, maxparts, current;
50
    hFILE *currentfp;
51
} hFILE_multipart;
52
53
static void free_part(hfile_part *p)
54
0
{
55
0
    free(p->url);
56
0
    if (p->headers) {
57
0
        char **hdr;
58
0
        for (hdr = p->headers; *hdr; hdr++) free(*hdr);
59
0
        free(p->headers);
60
0
    }
61
62
0
    p->url = NULL;
63
0
    p->headers = NULL;
64
0
}
65
66
static void free_all_parts(hFILE_multipart *fp)
67
0
{
68
0
    size_t i;
69
0
    for (i = 0; i < fp->nparts; i++) free_part(&fp->parts[i]);
70
0
    free(fp->parts);
71
0
}
72
73
static ssize_t multipart_read(hFILE *fpv, void *buffer, size_t nbytes)
74
0
{
75
0
    hFILE_multipart *fp = (hFILE_multipart *) fpv;
76
0
    size_t n;
77
78
0
open_next:
79
0
    if (fp->currentfp == NULL) {
80
0
        if (fp->current < fp->nparts) {
81
0
            const hfile_part *p = &fp->parts[fp->current];
82
0
            hts_log_debug("Opening part #%zu of %zu: \"%.120s%s\"",
83
0
                fp->current+1, fp->nparts, p->url,
84
0
                (strlen(p->url) > 120)? "..." : "");
85
86
0
            fp->currentfp = p->headers?
87
0
                  hopen(p->url, "r:",
88
0
                        "httphdr:v", p->headers,
89
0
                        "auth_token_enabled", "false", NULL)
90
0
                : hopen(p->url, "r:", "auth_token_enabled", "false", NULL);
91
92
0
            if (fp->currentfp == NULL) return -1;
93
0
        }
94
0
        else return 0;  // No more parts, so we're truly at EOF
95
0
    }
96
97
0
    n = fp->currentfp->mobile?
98
0
          fp->currentfp->backend->read(fp->currentfp, buffer, nbytes)
99
0
        : hread(fp->currentfp, buffer, nbytes);
100
101
0
    if (n == 0) {
102
        // We're at EOF on this part, so set up the next part
103
0
        hFILE *prevfp = fp->currentfp;
104
0
        free_part(&fp->parts[fp->current]);
105
0
        fp->current++;
106
0
        fp->currentfp = NULL;
107
0
        if (hclose(prevfp) < 0) return -1;
108
0
        goto open_next;
109
0
    }
110
111
0
    return n;  // Number of bytes read by (or an error from) fp->currentfp
112
0
}
113
114
static ssize_t multipart_write(hFILE *fpv, const void *buffer, size_t nbytes)
115
0
{
116
0
    errno = EROFS;
117
0
    return -1;
118
0
}
119
120
static off_t multipart_seek(hFILE *fpv, off_t offset, int whence)
121
0
{
122
0
    errno = ESPIPE;
123
0
    return -1;
124
0
}
125
126
static int multipart_close(hFILE *fpv)
127
0
{
128
0
    hFILE_multipart *fp = (hFILE_multipart *) fpv;
129
130
0
    free_all_parts(fp);
131
0
    if (fp->currentfp) {
132
0
        if (hclose(fp->currentfp) < 0) return -1;
133
0
    }
134
135
0
    return 0;
136
0
}
137
138
static const struct hFILE_backend multipart_backend =
139
{
140
    multipart_read, multipart_write, multipart_seek, NULL, multipart_close
141
};
142
143
// Returns 'v' (valid value), 'i' (invalid; required GA4GH field missing),
144
// or upon encountering an unexpected token, that token's type.
145
// Explicit `return '?'` means a JSON parsing error, typically a member key
146
// that is not a string.  An unexpected token may be a valid token that was
147
// not the type expected for a particular GA4GH field, or it may be '?' or
148
// '\0' which should be propagated.
149
static char
150
parse_ga4gh_body_json(hFILE_multipart *fp, hFILE *json,
151
                      kstring_t *b, kstring_t *header)
152
0
{
153
0
    hts_json_token t;
154
155
0
    if (hts_json_fnext(json, &t, b) != '{') return t.type;
156
0
    while (hts_json_fnext(json, &t, b) != '}') {
157
0
        if (t.type != 's') return '?';
158
159
0
        if (strcmp(t.str, "urls") == 0) {
160
0
            if (hts_json_fnext(json, &t, b) != '[') return t.type;
161
162
0
            while (hts_json_fnext(json, &t, b) != ']') {
163
0
                hfile_part *part;
164
0
                size_t n = 0, max = 0;
165
166
0
                hts_expand(hfile_part, fp->nparts+1, fp->maxparts, fp->parts);
167
0
                part = &fp->parts[fp->nparts++];
168
0
                part->url = NULL;
169
0
                part->headers = NULL;
170
171
0
                if (t.type != '{') return t.type;
172
0
                while (hts_json_fnext(json, &t, b) != '}') {
173
0
                    if (t.type != 's') return '?';
174
175
0
                    if (strcmp(t.str, "url") == 0) {
176
0
                        if (hts_json_fnext(json, &t, b) != 's') return t.type;
177
0
                        part->url = ks_release(b);
178
0
                    }
179
0
                    else if (strcmp(t.str, "headers") == 0) {
180
0
                        if (hts_json_fnext(json, &t, b) != '{') return t.type;
181
182
0
                        while (hts_json_fnext(json, &t, header) != '}') {
183
0
                            if (t.type != 's') return '?';
184
185
0
                            if (hts_json_fnext(json, &t, b) != 's')
186
0
                                return t.type;
187
188
0
                            kputs(": ", header);
189
0
                            kputs(t.str, header);
190
0
                            n++;
191
0
                            hts_expand(char *, n+1, max, part->headers);
192
0
                            part->headers[n-1] = ks_release(header);
193
0
                            part->headers[n] = NULL;
194
0
                        }
195
0
                    }
196
0
                    else if (hts_json_fskip_value(json, '\0') != 'v')
197
0
                        return '?';
198
0
                }
199
200
0
                if (! part->url) return 'i';
201
0
            }
202
0
        }
203
0
        else if (strcmp(t.str, "format") == 0) {
204
0
            if (hts_json_fnext(json, &t, b) != 's') return t.type;
205
206
0
            hts_log_debug("GA4GH JSON redirection to multipart %s data", t.str);
207
0
        }
208
0
        else if (hts_json_fskip_value(json, '\0') != 'v') return '?';
209
0
    }
210
211
0
    return 'v';
212
0
}
213
214
// Returns 'v' (valid value), 'i' (invalid; required GA4GH field missing),
215
// or upon encountering an unexpected token, that token's type.
216
// Explicit `return '?'` means a JSON parsing error, typically a member key
217
// that is not a string.  An unexpected token may be a valid token that was
218
// not the type expected for a particular GA4GH field, or it may be '?' or
219
// '\0' which should be propagated.
220
static char
221
parse_ga4gh_redirect_json(hFILE_multipart *fp, hFILE *json,
222
0
                          kstring_t *b, kstring_t *header) {
223
0
    hts_json_token t;
224
225
0
    if (hts_json_fnext(json, &t, b) != '{') return t.type;
226
0
    while (hts_json_fnext(json, &t, b) != '}') {
227
0
        if (t.type != 's') return '?';
228
229
0
        if (strcmp(t.str, "htsget") == 0) {
230
0
            char ret = parse_ga4gh_body_json(fp, json, b, header);
231
0
            if (ret != 'v') return ret;
232
0
        }
233
0
        else return '?';
234
0
    }
235
236
0
    if (hts_json_fnext(json, &t, b) != '\0') return '?';
237
238
0
    return 'v';
239
0
}
240
241
hFILE *hopen_htsget_redirect(hFILE *hfile, const char *mode)
242
0
{
243
0
    hFILE_multipart *fp;
244
0
    kstring_t s1 = { 0, 0, NULL }, s2 = { 0, 0, NULL };
245
0
    char ret;
246
247
0
    fp = (hFILE_multipart *) hfile_init(sizeof (hFILE_multipart), mode, 0);
248
0
    if (fp == NULL) return NULL;
249
250
0
    fp->parts = NULL;
251
0
    fp->nparts = fp->maxparts = 0;
252
253
0
    ret = parse_ga4gh_redirect_json(fp, hfile, &s1, &s2);
254
0
    free(s1.s);
255
0
    free(s2.s);
256
0
    if (ret != 'v') {
257
0
        free_all_parts(fp);
258
0
        hfile_destroy((hFILE *) fp);
259
0
        errno = (ret == '?' || ret == '\0')? EPROTO : EINVAL;
260
0
        return NULL;
261
0
    }
262
263
0
    fp->current = 0;
264
0
    fp->currentfp = NULL;
265
0
    fp->base.backend = &multipart_backend;
266
0
    return &fp->base;
267
0
}