Line | Count | Source |
1 | | /* faidx.c -- FASTA and FASTQ random access. |
2 | | |
3 | | Copyright (C) 2008, 2009, 2013-2020, 2022, 2024 Genome Research Ltd. |
4 | | Portions copyright (C) 2011 Broad Institute. |
5 | | |
6 | | Author: Heng Li <lh3@sanger.ac.uk> |
7 | | |
8 | | Permission is hereby granted, free of charge, to any person obtaining a copy |
9 | | of this software and associated documentation files (the "Software"), to deal |
10 | | in the Software without restriction, including without limitation the rights |
11 | | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
12 | | copies of the Software, and to permit persons to whom the Software is |
13 | | furnished to do so, subject to the following conditions: |
14 | | |
15 | | The above copyright notice and this permission notice shall be included in |
16 | | all copies or substantial portions of the Software. |
17 | | |
18 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
19 | | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
20 | | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
21 | | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
22 | | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
23 | | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
24 | | DEALINGS IN THE SOFTWARE. */ |
25 | | |
26 | | #define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h |
27 | | #include <config.h> |
28 | | |
29 | | #include <ctype.h> |
30 | | #include <string.h> |
31 | | #include <stdlib.h> |
32 | | #include <stdio.h> |
33 | | #include <inttypes.h> |
34 | | #include <errno.h> |
35 | | #include <limits.h> |
36 | | #include <unistd.h> |
37 | | #include <assert.h> |
38 | | |
39 | | #include "htslib/bgzf.h" |
40 | | #include "htslib/faidx.h" |
41 | | #include "htslib/hfile.h" |
42 | | #include "htslib/hts_alloc.h" |
43 | | #include "htslib/khash.h" |
44 | | #include "htslib/kstring.h" |
45 | | #include "hts_internal.h" |
46 | | |
47 | | // Faster isgraph; assumes ASCII |
48 | 0 | static inline int isgraph_(unsigned char c) { |
49 | 0 | return c > ' ' && c <= '~'; |
50 | 0 | } |
51 | | |
52 | | #ifdef isgraph |
53 | | # undef isgraph |
54 | | #endif |
55 | 0 | #define isgraph isgraph_ |
56 | | |
57 | | // An optimised bgzf_getc. |
58 | | // We could consider moving this to bgzf.h, but our own code uses it here only. |
59 | 0 | static inline int bgzf_getc_(BGZF *fp) { |
60 | 0 | if (fp->block_offset+1 < fp->block_length) { |
61 | 0 | int c = ((unsigned char*)fp->uncompressed_block)[fp->block_offset++]; |
62 | 0 | fp->uncompressed_address++; |
63 | 0 | return c; |
64 | 0 | } |
65 | | |
66 | 0 | return bgzf_getc(fp); |
67 | 0 | } |
68 | 0 | #define bgzf_getc bgzf_getc_ |
69 | | |
70 | | typedef struct { |
71 | | int id; // faidx_t->name[id] is for this struct. |
72 | | uint32_t line_extra; // Bytes to skip at the end of each line |
73 | | // Usually 1 for \n or 2 for \r\n ending |
74 | | uint64_t line_blen; // Number of bases (or quality values) on each line |
75 | | // Total line length = line_blen + line_extra |
76 | | uint64_t len; // Length of entire sequence |
77 | | uint64_t seq_offset; // File offset to start of sequence |
78 | | uint64_t qual_offset; // File offset to start of quality values (if fastq) |
79 | | } faidx1_t; |
80 | | KHASH_MAP_INIT_STR(s, faidx1_t) |
81 | | |
82 | | struct faidx_t { |
83 | | BGZF *bgzf; |
84 | | int n, m; |
85 | | char **name; |
86 | | khash_t(s) *hash; |
87 | | enum fai_format_options format; |
88 | | }; |
89 | | |
90 | | static int fai_name2id(void *v, const char *ref) |
91 | 0 | { |
92 | 0 | faidx_t *fai = (faidx_t *)v; |
93 | 0 | khint_t k = kh_get(s, fai->hash, ref); |
94 | 0 | return k == kh_end(fai->hash) ? -1 : kh_val(fai->hash, k).id; |
95 | 0 | } |
96 | | |
97 | | static inline int fai_insert_index(faidx_t *idx, const char *name, uint64_t len, uint64_t line_len, uint64_t line_blen, uint64_t seq_offset, uint64_t qual_offset) |
98 | 0 | { |
99 | 0 | if (!name) { |
100 | 0 | hts_log_error("Malformed line: no name"); |
101 | 0 | return -1; |
102 | 0 | } |
103 | 0 | if (line_len > HTS_POS_MAX || line_blen > HTS_POS_MAX) { |
104 | 0 | hts_log_error("Malformed line: length out of bounds"); |
105 | 0 | return -1; |
106 | 0 | } |
107 | 0 | if (line_len < line_blen) { |
108 | 0 | hts_log_error("Malformed line: width (%"PRIu64") less than base count (%"PRIu64")", |
109 | 0 | line_len, line_blen); |
110 | 0 | return -1; |
111 | 0 | } |
112 | 0 | if (line_len - line_blen > UINT32_MAX) { |
113 | 0 | hts_log_error("Malformed line: difference between width (%"PRIu64") and base count (%"PRIu64") too large", |
114 | 0 | line_len, line_blen); |
115 | 0 | return -1; |
116 | 0 | } |
117 | | |
118 | 0 | char *name_key = strdup(name); |
119 | 0 | int absent; |
120 | 0 | khint_t k = kh_put(s, idx->hash, name_key, &absent); |
121 | 0 | faidx1_t *v = &kh_value(idx->hash, k); |
122 | |
|
123 | 0 | if (! absent) { |
124 | 0 | hts_log_warning("Ignoring duplicate sequence \"%s\" at byte offset %" PRIu64, name, seq_offset); |
125 | 0 | free(name_key); |
126 | 0 | return 0; |
127 | 0 | } |
128 | | |
129 | 0 | if (idx->n == idx->m) { |
130 | 0 | char **tmp; |
131 | 0 | idx->m = idx->m? idx->m<<1 : 16; |
132 | 0 | if (!(tmp = hts_realloc_p(idx->name, sizeof(char*), idx->m))) { |
133 | 0 | hts_log_error("Out of memory"); |
134 | 0 | return -1; |
135 | 0 | } |
136 | 0 | idx->name = tmp; |
137 | 0 | } |
138 | 0 | v->id = idx->n; |
139 | 0 | idx->name[idx->n++] = name_key; |
140 | 0 | v->len = len; |
141 | 0 | v->line_extra = (uint32_t) (line_len - line_blen); |
142 | 0 | v->line_blen = line_blen; |
143 | 0 | v->seq_offset = seq_offset; |
144 | 0 | v->qual_offset = qual_offset; |
145 | |
|
146 | 0 | return 0; |
147 | 0 | } |
148 | | |
149 | | |
150 | 0 | static faidx_t *fai_build_core(BGZF *bgzf) { |
151 | 0 | kstring_t name = { 0, 0, NULL }; |
152 | 0 | int c, read_done, line_num; |
153 | 0 | faidx_t *idx; |
154 | 0 | uint64_t seq_offset, qual_offset; |
155 | 0 | uint64_t seq_len, qual_len; |
156 | 0 | uint64_t char_len, cl, line_len, ll; |
157 | 0 | enum read_state {OUT_READ, IN_NAME, IN_SEQ, SEQ_END, IN_QUAL} state; |
158 | |
|
159 | 0 | idx = (faidx_t*)calloc(1, sizeof(faidx_t)); |
160 | 0 | idx->hash = kh_init(s); |
161 | 0 | idx->format = FAI_NONE; |
162 | |
|
163 | 0 | state = OUT_READ, read_done = 0, line_num = 1; |
164 | 0 | seq_offset = qual_offset = seq_len = qual_len = char_len = cl = line_len = ll = 0; |
165 | |
|
166 | 0 | while ((c = bgzf_getc(bgzf)) >= 0) { |
167 | 0 | switch (state) { |
168 | 0 | case OUT_READ: |
169 | 0 | switch (c) { |
170 | 0 | case '>': |
171 | 0 | if (idx->format == FAI_FASTQ) { |
172 | 0 | hts_log_error("Found '>' in a FASTQ file, error at line %d", line_num); |
173 | 0 | goto fail; |
174 | 0 | } |
175 | | |
176 | 0 | idx->format = FAI_FASTA; |
177 | 0 | state = IN_NAME; |
178 | 0 | break; |
179 | | |
180 | 0 | case '@': |
181 | 0 | if (idx->format == FAI_FASTA) { |
182 | 0 | hts_log_error("Found '@' in a FASTA file, error at line %d", line_num); |
183 | 0 | goto fail; |
184 | 0 | } |
185 | | |
186 | 0 | idx->format = FAI_FASTQ; |
187 | 0 | state = IN_NAME; |
188 | 0 | break; |
189 | | |
190 | 0 | case '\r': |
191 | | // Blank line with cr-lf ending? |
192 | 0 | if ((c = bgzf_getc(bgzf)) == '\n') { |
193 | 0 | line_num++; |
194 | 0 | } else { |
195 | 0 | hts_log_error("Format error, carriage return not followed by new line at line %d", line_num); |
196 | 0 | goto fail; |
197 | 0 | } |
198 | 0 | break; |
199 | | |
200 | 0 | case '\n': |
201 | | // just move onto the next line |
202 | 0 | line_num++; |
203 | 0 | break; |
204 | | |
205 | 0 | default: { |
206 | 0 | char s[4] = { '"', c, '"', '\0' }; |
207 | 0 | hts_log_error("Format error, unexpected %s at line %d", isprint(c) ? s : "character", line_num); |
208 | 0 | goto fail; |
209 | 0 | } |
210 | 0 | } |
211 | 0 | break; |
212 | | |
213 | 0 | case IN_NAME: |
214 | 0 | if (read_done) { |
215 | 0 | if (fai_insert_index(idx, name.s, seq_len, line_len, char_len, seq_offset, qual_offset) != 0) |
216 | 0 | goto fail; |
217 | | |
218 | 0 | read_done = 0; |
219 | 0 | } |
220 | | |
221 | 0 | name.l = 0; |
222 | |
|
223 | 0 | do { |
224 | 0 | if (!isspace(c)) { |
225 | 0 | kputc(c, &name); |
226 | 0 | } else if (name.l > 0 || c == '\n') { |
227 | 0 | break; |
228 | 0 | } |
229 | 0 | } while ((c = bgzf_getc(bgzf)) >= 0); |
230 | |
|
231 | 0 | kputsn("", 0, &name); |
232 | |
|
233 | 0 | if (c < 0) { |
234 | 0 | hts_log_error("The last entry '%s' has no sequence at line %d", name.s, line_num); |
235 | 0 | goto fail; |
236 | 0 | } |
237 | | |
238 | | // read the rest of the line if necessary |
239 | 0 | if (c != '\n') while ((c = bgzf_getc(bgzf)) >= 0 && c != '\n'); |
240 | |
|
241 | 0 | state = IN_SEQ; seq_len = qual_len = char_len = line_len = 0; |
242 | 0 | seq_offset = bgzf_utell(bgzf); |
243 | 0 | line_num++; |
244 | 0 | break; |
245 | | |
246 | 0 | case IN_SEQ: |
247 | 0 | if (idx->format == FAI_FASTA) { |
248 | 0 | if (c == '\n') { |
249 | 0 | state = OUT_READ; |
250 | 0 | line_num++; |
251 | 0 | continue; |
252 | 0 | } else if (c == '>') { |
253 | 0 | state = IN_NAME; |
254 | 0 | continue; |
255 | 0 | } |
256 | 0 | } else if (idx->format == FAI_FASTQ) { |
257 | 0 | if (c == '+') { |
258 | 0 | state = IN_QUAL; |
259 | 0 | if (c != '\n') while ((c = bgzf_getc(bgzf)) >= 0 && c != '\n'); |
260 | 0 | qual_offset = bgzf_utell(bgzf); |
261 | 0 | line_num++; |
262 | 0 | continue; |
263 | 0 | } else if (c == '\n') { |
264 | 0 | hts_log_error("Inlined empty line is not allowed in sequence '%s' at line %d", name.s, line_num); |
265 | 0 | goto fail; |
266 | 0 | } |
267 | 0 | } |
268 | | |
269 | 0 | ll = cl = 0; |
270 | |
|
271 | 0 | if (idx->format == FAI_FASTA) read_done = 1; |
272 | |
|
273 | 0 | do { |
274 | 0 | ll++; |
275 | 0 | if (isgraph(c)) cl++; |
276 | 0 | } while ((c = bgzf_getc(bgzf)) >= 0 && c != '\n'); |
277 | |
|
278 | 0 | ll++; seq_len += cl; |
279 | |
|
280 | 0 | if (line_len == 0) { |
281 | 0 | line_len = ll; |
282 | 0 | char_len = cl; |
283 | 0 | } else if (line_len > ll) { |
284 | |
|
285 | 0 | if (idx->format == FAI_FASTA) |
286 | 0 | state = OUT_READ; |
287 | 0 | else |
288 | 0 | state = SEQ_END; |
289 | |
|
290 | 0 | } else if (line_len < ll) { |
291 | 0 | hts_log_error("Different line length in sequence '%s' at line %d", name.s, line_num); |
292 | 0 | goto fail; |
293 | 0 | } |
294 | | |
295 | 0 | line_num++; |
296 | 0 | break; |
297 | | |
298 | 0 | case SEQ_END: |
299 | 0 | if (c == '+') { |
300 | 0 | state = IN_QUAL; |
301 | 0 | while ((c = bgzf_getc(bgzf)) >= 0 && c != '\n'); |
302 | 0 | qual_offset = bgzf_utell(bgzf); |
303 | 0 | line_num++; |
304 | 0 | } else { |
305 | 0 | hts_log_error("Format error, expecting '+', got '%c' at line %d", c, line_num); |
306 | 0 | goto fail; |
307 | 0 | } |
308 | 0 | break; |
309 | | |
310 | 0 | case IN_QUAL: |
311 | 0 | if (c == '\n') { |
312 | 0 | if (!read_done) { |
313 | 0 | hts_log_error("Inlined empty line is not allowed in quality of sequence '%s' at line %d", name.s, line_num); |
314 | 0 | goto fail; |
315 | 0 | } |
316 | | |
317 | 0 | state = OUT_READ; |
318 | 0 | line_num++; |
319 | 0 | continue; |
320 | 0 | } else if (c == '@' && read_done) { |
321 | 0 | state = IN_NAME; |
322 | 0 | continue; |
323 | 0 | } |
324 | | |
325 | 0 | ll = cl = 0; |
326 | |
|
327 | 0 | do { |
328 | 0 | ll++; |
329 | 0 | if (isgraph(c)) cl++; |
330 | 0 | } while ((c = bgzf_getc(bgzf)) >= 0 && c != '\n'); |
331 | |
|
332 | 0 | ll++; qual_len += cl; |
333 | |
|
334 | 0 | if (line_len < ll) { |
335 | 0 | hts_log_error("Quality line length too long in '%s' at line %d", name.s, line_num); |
336 | 0 | goto fail; |
337 | 0 | } else if (qual_len == seq_len) { |
338 | 0 | read_done = 1; |
339 | 0 | } else if (qual_len > seq_len) { |
340 | 0 | hts_log_error("Quality length longer than sequence in '%s' at line %d", name.s, line_num); |
341 | 0 | goto fail; |
342 | 0 | } else if (line_len > ll) { |
343 | 0 | hts_log_error("Quality line length too short in '%s' at line %d", name.s, line_num); |
344 | 0 | goto fail; |
345 | 0 | } |
346 | | |
347 | 0 | line_num++; |
348 | 0 | break; |
349 | 0 | } |
350 | 0 | } |
351 | | |
352 | 0 | if (read_done) { |
353 | 0 | if (fai_insert_index(idx, name.s, seq_len, line_len, char_len, seq_offset, qual_offset) != 0) |
354 | 0 | goto fail; |
355 | 0 | } else { |
356 | 0 | hts_log_error("File truncated at line %d", line_num); |
357 | 0 | goto fail; |
358 | 0 | } |
359 | | |
360 | 0 | free(name.s); |
361 | 0 | return idx; |
362 | | |
363 | 0 | fail: |
364 | 0 | free(name.s); |
365 | 0 | fai_destroy(idx); |
366 | 0 | return NULL; |
367 | 0 | } |
368 | | |
369 | | |
370 | 0 | static int fai_save(const faidx_t *fai, hFILE *fp) { |
371 | 0 | khint_t k; |
372 | 0 | int i; |
373 | 0 | char buf[96]; // Must be big enough for format below. |
374 | |
|
375 | 0 | for (i = 0; i < fai->n; ++i) { |
376 | 0 | faidx1_t x; |
377 | 0 | k = kh_get(s, fai->hash, fai->name[i]); |
378 | 0 | assert(k < kh_end(fai->hash)); |
379 | 0 | x = kh_value(fai->hash, k); |
380 | |
|
381 | 0 | if (fai->format == FAI_FASTA) { |
382 | 0 | snprintf(buf, sizeof(buf), |
383 | 0 | "\t%"PRIu64"\t%"PRIu64"\t%"PRIu64"\t%"PRIu64"\n", |
384 | 0 | x.len, x.seq_offset, x.line_blen, x.line_blen + x.line_extra); |
385 | 0 | } else { |
386 | 0 | snprintf(buf, sizeof(buf), |
387 | 0 | "\t%"PRIu64"\t%"PRIu64"\t%"PRIu64"\t%"PRIu64"\t%"PRIu64"\n", |
388 | 0 | x.len, x.seq_offset, x.line_blen, |
389 | 0 | x.line_blen + x.line_extra, x.qual_offset); |
390 | 0 | } |
391 | |
|
392 | 0 | if (hputs(fai->name[i], fp) != 0) return -1; |
393 | 0 | if (hputs(buf, fp) != 0) return -1; |
394 | 0 | } |
395 | 0 | return 0; |
396 | 0 | } |
397 | | |
398 | | |
399 | | static faidx_t *fai_read(hFILE *fp, const char *fname, int format) |
400 | 0 | { |
401 | 0 | faidx_t *fai; |
402 | 0 | char *buf = NULL, *p; |
403 | 0 | ssize_t l, lnum = 1; |
404 | |
|
405 | 0 | fai = (faidx_t*)calloc(1, sizeof(faidx_t)); |
406 | 0 | if (!fai) return NULL; |
407 | | |
408 | 0 | fai->hash = kh_init(s); |
409 | 0 | if (!fai->hash) goto fail; |
410 | | |
411 | 0 | buf = (char*)calloc(0x10000, 1); |
412 | 0 | if (!buf) goto fail; |
413 | | |
414 | 0 | while ((l = hgetln(buf, 0x10000, fp)) > 0) { |
415 | 0 | uint64_t line_len, line_blen; |
416 | 0 | uint64_t len; |
417 | 0 | uint64_t seq_offset; |
418 | 0 | uint64_t qual_offset = 0; |
419 | 0 | int n; |
420 | |
|
421 | 0 | for (p = buf; *p && !isspace_c(*p); ++p); |
422 | |
|
423 | 0 | if (p - buf < l) { |
424 | 0 | *p = 0; ++p; |
425 | 0 | } |
426 | |
|
427 | 0 | if (format == FAI_FASTA) { |
428 | 0 | n = sscanf(p, "%"SCNu64"%"SCNu64"%"SCNu64"%"SCNu64, &len, &seq_offset, &line_blen, &line_len); |
429 | |
|
430 | 0 | if (n != 4) { |
431 | 0 | hts_log_error("Could not understand FASTA index %s line %zd", fname, lnum); |
432 | 0 | goto fail; |
433 | 0 | } |
434 | 0 | } else { |
435 | 0 | n = sscanf(p, "%"SCNu64"%"SCNu64"%"SCNu64"%"SCNu64"%"SCNu64, &len, &seq_offset, &line_blen, &line_len, &qual_offset); |
436 | |
|
437 | 0 | if (n != 5) { |
438 | 0 | if (n == 4) { |
439 | 0 | hts_log_error("Possibly this is a FASTA index, try using faidx. Problem in %s line %zd", fname, lnum); |
440 | 0 | } else { |
441 | 0 | hts_log_error("Could not understand FASTQ index %s line %zd", fname, lnum); |
442 | 0 | } |
443 | |
|
444 | 0 | goto fail; |
445 | 0 | } |
446 | 0 | } |
447 | | |
448 | 0 | if (fai_insert_index(fai, buf, len, line_len, line_blen, seq_offset, qual_offset) != 0) { |
449 | 0 | goto fail; |
450 | 0 | } |
451 | | |
452 | 0 | if (buf[l - 1] == '\n') ++lnum; |
453 | 0 | } |
454 | | |
455 | 0 | if (l < 0) { |
456 | 0 | hts_log_error("Error while reading %s: %s", fname, strerror(errno)); |
457 | 0 | goto fail; |
458 | 0 | } |
459 | 0 | free(buf); |
460 | 0 | return fai; |
461 | | |
462 | 0 | fail: |
463 | 0 | free(buf); |
464 | 0 | fai_destroy(fai); |
465 | 0 | return NULL; |
466 | 0 | } |
467 | | |
468 | | void fai_destroy(faidx_t *fai) |
469 | 2 | { |
470 | 2 | int i; |
471 | 2 | if (!fai) return; |
472 | 0 | for (i = 0; i < fai->n; ++i) free(fai->name[i]); |
473 | 0 | free(fai->name); |
474 | 0 | kh_destroy(s, fai->hash); |
475 | 0 | if (fai->bgzf) bgzf_close(fai->bgzf); |
476 | 0 | free(fai); |
477 | 0 | } |
478 | | |
479 | | |
480 | | static int fai_build3_core(const char *fn, const char *fnfai, const char *fngzi) |
481 | 2 | { |
482 | 2 | kstring_t fai_kstr = { 0, 0, NULL }; |
483 | 2 | kstring_t gzi_kstr = { 0, 0, NULL }; |
484 | 2 | BGZF *bgzf = NULL; |
485 | 2 | hFILE *fp = NULL; |
486 | 2 | faidx_t *fai = NULL; |
487 | 2 | int save_errno, res; |
488 | 2 | char *file_type; |
489 | | |
490 | 2 | bgzf = bgzf_open(fn, "r"); |
491 | | |
492 | 2 | if ( !bgzf ) { |
493 | 2 | hts_log_error("Failed to open the file %s : %s", fn, strerror(errno)); |
494 | 2 | goto fail; |
495 | 2 | } |
496 | | |
497 | 0 | if ( bgzf->is_compressed ) { |
498 | 0 | if (bgzf_index_build_init(bgzf) != 0) { |
499 | 0 | hts_log_error("Failed to allocate bgzf index"); |
500 | 0 | goto fail; |
501 | 0 | } |
502 | 0 | } |
503 | | |
504 | 0 | fai = fai_build_core(bgzf); |
505 | |
|
506 | 0 | if ( !fai ) { |
507 | 0 | if (bgzf->is_compressed && bgzf->is_gzip) { |
508 | 0 | hts_log_error("Cannot index files compressed with gzip, please use bgzip"); |
509 | 0 | } |
510 | 0 | goto fail; |
511 | 0 | } |
512 | | |
513 | 0 | if (fai->format == FAI_FASTA) { |
514 | 0 | file_type = "FASTA"; |
515 | 0 | } else { |
516 | 0 | file_type = "FASTQ"; |
517 | 0 | } |
518 | |
|
519 | 0 | if (!fnfai) { |
520 | 0 | if (ksprintf(&fai_kstr, "%s.fai", fn) < 0) goto fail; |
521 | 0 | fnfai = fai_kstr.s; |
522 | 0 | } |
523 | | |
524 | 0 | if (!fngzi) { |
525 | 0 | if (ksprintf(&gzi_kstr, "%s.gzi", fn) < 0) goto fail; |
526 | 0 | fngzi = gzi_kstr.s; |
527 | 0 | } |
528 | | |
529 | 0 | if ( bgzf->is_compressed ) { |
530 | 0 | if (bgzf_index_dump(bgzf, fngzi, NULL) < 0) { |
531 | 0 | hts_log_error("Failed to make bgzf index %s", fngzi); |
532 | 0 | goto fail; |
533 | 0 | } |
534 | 0 | } |
535 | | |
536 | 0 | res = bgzf_close(bgzf); |
537 | 0 | bgzf = NULL; |
538 | |
|
539 | 0 | if (res < 0) { |
540 | 0 | hts_log_error("Error on closing %s : %s", fn, strerror(errno)); |
541 | 0 | goto fail; |
542 | 0 | } |
543 | | |
544 | 0 | fp = hopen(fnfai, "wb"); |
545 | |
|
546 | 0 | if ( !fp ) { |
547 | 0 | hts_log_error("Failed to open %s index %s : %s", file_type, fnfai, strerror(errno)); |
548 | 0 | goto fail; |
549 | 0 | } |
550 | | |
551 | 0 | if (fai_save(fai, fp) != 0) { |
552 | 0 | hts_log_error("Failed to write %s index %s : %s", file_type, fnfai, strerror(errno)); |
553 | 0 | goto fail; |
554 | 0 | } |
555 | | |
556 | 0 | if (hclose(fp) != 0) { |
557 | 0 | hts_log_error("Failed on closing %s index %s : %s", file_type, fnfai, strerror(errno)); |
558 | 0 | goto fail; |
559 | 0 | } |
560 | | |
561 | 0 | free(fai_kstr.s); |
562 | 0 | free(gzi_kstr.s); |
563 | 0 | fai_destroy(fai); |
564 | 0 | return 0; |
565 | | |
566 | 2 | fail: |
567 | 2 | save_errno = errno; |
568 | 2 | free(fai_kstr.s); |
569 | 2 | free(gzi_kstr.s); |
570 | 2 | bgzf_close(bgzf); |
571 | 2 | fai_destroy(fai); |
572 | 2 | errno = save_errno; |
573 | 2 | return -1; |
574 | 0 | } |
575 | | |
576 | | |
577 | 2 | int fai_build3(const char *fn, const char *fnfai, const char *fngzi) { |
578 | 2 | return fai_build3_core(fn, fnfai, fngzi); |
579 | 2 | } |
580 | | |
581 | | |
582 | 2 | int fai_build(const char *fn) { |
583 | 2 | return fai_build3(fn, NULL, NULL); |
584 | 2 | } |
585 | | |
586 | | |
587 | | static faidx_t *fai_load3_core(const char *fn, const char *fnfai, const char *fngzi, |
588 | | int flags, int format) |
589 | 0 | { |
590 | 0 | kstring_t fai_kstr = { 0, 0, NULL }; |
591 | 0 | kstring_t gzi_kstr = { 0, 0, NULL }; |
592 | 0 | hFILE *fp = NULL; |
593 | 0 | faidx_t *fai = NULL; |
594 | 0 | int res, gzi_index_needed = 0; |
595 | 0 | char *file_type; |
596 | |
|
597 | 0 | if (format == FAI_FASTA) { |
598 | 0 | file_type = "FASTA"; |
599 | 0 | } else { |
600 | 0 | file_type = "FASTQ"; |
601 | 0 | } |
602 | |
|
603 | 0 | if (fn == NULL) |
604 | 0 | return NULL; |
605 | | |
606 | 0 | if (fnfai == NULL) { |
607 | 0 | if (ksprintf(&fai_kstr, "%s.fai", fn) < 0) goto fail; |
608 | 0 | fnfai = fai_kstr.s; |
609 | 0 | } |
610 | 0 | if (fngzi == NULL) { |
611 | 0 | if (ksprintf(&gzi_kstr, "%s.gzi", fn) < 0) goto fail; |
612 | 0 | fngzi = gzi_kstr.s; |
613 | 0 | } |
614 | | |
615 | 0 | fp = hopen(fnfai, "rb"); |
616 | |
|
617 | 0 | if (fp) { |
618 | | // index file present, check if a compressed index is needed |
619 | 0 | hFILE *gz = NULL; |
620 | 0 | BGZF *bgzf = bgzf_open(fn, "rb"); |
621 | |
|
622 | 0 | if (bgzf == 0) { |
623 | 0 | hts_log_error("Failed to open %s file %s", file_type, fn); |
624 | 0 | goto fail; |
625 | 0 | } |
626 | | |
627 | 0 | if (bgzf_compression(bgzf) == 2) { // BGZF compression |
628 | 0 | if ((gz = hopen(fngzi, "rb")) == 0) { |
629 | |
|
630 | 0 | if (!(flags & FAI_CREATE) || errno != ENOENT) { |
631 | 0 | hts_log_error("Failed to open %s index %s: %s", file_type, fngzi, strerror(errno)); |
632 | 0 | bgzf_close(bgzf); |
633 | 0 | goto fail; |
634 | 0 | } |
635 | | |
636 | 0 | gzi_index_needed = 1; |
637 | 0 | res = hclose(fp); // closed as going to be re-indexed |
638 | |
|
639 | 0 | if (res < 0) { |
640 | 0 | hts_log_error("Failed on closing %s index %s : %s", file_type, fnfai, strerror(errno)); |
641 | 0 | goto fail; |
642 | 0 | } |
643 | 0 | } else { |
644 | 0 | res = hclose(gz); |
645 | |
|
646 | 0 | if (res < 0) { |
647 | 0 | hts_log_error("Failed on closing %s index %s : %s", file_type, fngzi, strerror(errno)); |
648 | 0 | goto fail; |
649 | 0 | } |
650 | 0 | } |
651 | 0 | } |
652 | | |
653 | 0 | bgzf_close(bgzf); |
654 | 0 | } |
655 | | |
656 | 0 | if (fp == 0 || gzi_index_needed) { |
657 | 0 | if (!(flags & FAI_CREATE) || errno != ENOENT) { |
658 | 0 | hts_log_error("Failed to open %s index %s: %s", file_type, fnfai, strerror(errno)); |
659 | 0 | goto fail; |
660 | 0 | } |
661 | | |
662 | 0 | hts_log_info("Build %s index", file_type); |
663 | |
|
664 | 0 | if (fai_build3_core(fn, fnfai, fngzi) < 0) { |
665 | 0 | goto fail; |
666 | 0 | } |
667 | | |
668 | 0 | fp = hopen(fnfai, "rb"); |
669 | 0 | if (fp == 0) { |
670 | 0 | hts_log_error("Failed to open %s index %s: %s", file_type, fnfai, strerror(errno)); |
671 | 0 | goto fail; |
672 | 0 | } |
673 | 0 | } |
674 | | |
675 | 0 | fai = fai_read(fp, fnfai, format); |
676 | 0 | if (fai == NULL) { |
677 | 0 | hts_log_error("Failed to read %s index %s", file_type, fnfai); |
678 | 0 | goto fail; |
679 | 0 | } |
680 | | |
681 | 0 | res = hclose(fp); |
682 | 0 | fp = NULL; |
683 | 0 | if (res < 0) { |
684 | 0 | hts_log_error("Failed on closing %s index %s : %s", file_type, fnfai, strerror(errno)); |
685 | 0 | goto fail; |
686 | 0 | } |
687 | | |
688 | 0 | fai->bgzf = bgzf_open(fn, "rb"); |
689 | 0 | if (fai->bgzf == 0) { |
690 | 0 | hts_log_error("Failed to open %s file %s", file_type, fn); |
691 | 0 | goto fail; |
692 | 0 | } |
693 | | |
694 | 0 | if ( fai->bgzf->is_compressed==1 ) { |
695 | 0 | if ( bgzf_index_load(fai->bgzf, fngzi, NULL) < 0 ) { |
696 | 0 | hts_log_error("Failed to load .gzi index: %s", fngzi); |
697 | 0 | goto fail; |
698 | 0 | } |
699 | 0 | } |
700 | 0 | free(fai_kstr.s); |
701 | 0 | free(gzi_kstr.s); |
702 | 0 | return fai; |
703 | | |
704 | 0 | fail: |
705 | 0 | if (fai) fai_destroy(fai); |
706 | 0 | if (fp) hclose_abruptly(fp); |
707 | 0 | free(fai_kstr.s); |
708 | 0 | free(gzi_kstr.s); |
709 | 0 | return NULL; |
710 | 0 | } |
711 | | |
712 | | |
713 | | faidx_t *fai_load3(const char *fn, const char *fnfai, const char *fngzi, |
714 | 0 | int flags) { |
715 | 0 | return fai_load3_core(fn, fnfai, fngzi, flags, FAI_FASTA); |
716 | 0 | } |
717 | | |
718 | | |
719 | | faidx_t *fai_load(const char *fn) |
720 | 0 | { |
721 | 0 | return fai_load3(fn, NULL, NULL, FAI_CREATE); |
722 | 0 | } |
723 | | |
724 | | |
725 | | faidx_t *fai_load3_format(const char *fn, const char *fnfai, const char *fngzi, |
726 | 0 | int flags, enum fai_format_options format) { |
727 | 0 | return fai_load3_core(fn, fnfai, fngzi, flags, format); |
728 | 0 | } |
729 | | |
730 | | |
731 | 0 | faidx_t *fai_load_format(const char *fn, enum fai_format_options format) { |
732 | 0 | return fai_load3_format(fn, NULL, NULL, FAI_CREATE, format); |
733 | 0 | } |
734 | | |
735 | | |
736 | | static char *fai_retrieve(const faidx_t *fai, const faidx1_t *val, |
737 | 0 | uint64_t offset, hts_pos_t beg, hts_pos_t end, hts_pos_t *len) { |
738 | 0 | char *buffer, *s; |
739 | 0 | ssize_t nread, remaining, firstline_len, firstline_blen; |
740 | 0 | int ret; |
741 | |
|
742 | 0 | if ((uint64_t) end - (uint64_t) beg >= SIZE_MAX - 2) { |
743 | 0 | hts_log_error("Range %"PRId64"..%"PRId64" too big", beg, end); |
744 | 0 | *len = -1; |
745 | 0 | return NULL; |
746 | 0 | } |
747 | | |
748 | 0 | if (val->line_blen <= 0) { |
749 | 0 | hts_log_error("Invalid line length in index: %"PRIu64, val->line_blen); |
750 | 0 | *len = -1; |
751 | 0 | return NULL; |
752 | 0 | } |
753 | | |
754 | 0 | uint64_t line_len = val->line_blen + val->line_extra; |
755 | |
|
756 | 0 | ret = bgzf_useek(fai->bgzf, |
757 | 0 | offset |
758 | 0 | + beg / val->line_blen * line_len |
759 | 0 | + beg % val->line_blen, SEEK_SET); |
760 | |
|
761 | 0 | if (ret < 0) { |
762 | 0 | *len = -1; |
763 | 0 | hts_log_error("Failed to retrieve block. (Seeking in a compressed, .gzi unindexed, file?)"); |
764 | 0 | return NULL; |
765 | 0 | } |
766 | | |
767 | | // Over-allocate so there is extra space for one end-of-line sequence |
768 | 0 | buffer = hts_malloc(hts_add_sat3(end - beg, line_len - val->line_blen, 1)); |
769 | 0 | if (!buffer) { |
770 | 0 | *len = -1; |
771 | 0 | return NULL; |
772 | 0 | } |
773 | | |
774 | 0 | remaining = *len = end - beg; |
775 | 0 | firstline_blen = val->line_blen - beg % val->line_blen; |
776 | | |
777 | | // Special case when the entire interval requested is within a single FASTA/Q line |
778 | 0 | if (remaining <= firstline_blen) { |
779 | 0 | nread = bgzf_read_small(fai->bgzf, buffer, remaining); |
780 | 0 | if (nread < remaining) goto error; |
781 | 0 | buffer[nread] = '\0'; |
782 | 0 | return buffer; |
783 | 0 | } |
784 | | |
785 | 0 | s = buffer; |
786 | 0 | firstline_len = line_len - beg % val->line_blen; |
787 | | |
788 | | // Read the (partial) first line and its line terminator, but increment s past the |
789 | | // line contents only, so the terminator characters will be overwritten by the next line. |
790 | 0 | nread = bgzf_read_small(fai->bgzf, s, firstline_len); |
791 | 0 | if (nread < firstline_len) goto error; |
792 | 0 | s += firstline_blen; |
793 | 0 | remaining -= firstline_blen; |
794 | | |
795 | | // Similarly read complete lines and their line terminator characters, but overwrite the latter. |
796 | 0 | while (remaining > val->line_blen) { |
797 | 0 | nread = bgzf_read_small(fai->bgzf, s, line_len); |
798 | 0 | if (nread < (ssize_t) line_len) goto error; |
799 | 0 | s += val->line_blen; |
800 | 0 | remaining -= val->line_blen; |
801 | 0 | } |
802 | | |
803 | 0 | if (remaining > 0) { |
804 | 0 | nread = bgzf_read_small(fai->bgzf, s, remaining); |
805 | 0 | if (nread < remaining) goto error; |
806 | 0 | s += remaining; |
807 | 0 | } |
808 | | |
809 | 0 | *s = '\0'; |
810 | 0 | return buffer; |
811 | | |
812 | 0 | error: |
813 | 0 | hts_log_error("Failed to retrieve block: %s", |
814 | 0 | (nread == 0)? "unexpected end of file" : "error reading file"); |
815 | 0 | free(buffer); |
816 | 0 | *len = -1; |
817 | 0 | return NULL; |
818 | 0 | } |
819 | | |
820 | | static int fai_get_val(const faidx_t *fai, const char *str, |
821 | 0 | hts_pos_t *len, faidx1_t *val, hts_pos_t *fbeg, hts_pos_t *fend) { |
822 | 0 | khiter_t iter; |
823 | 0 | khash_t(s) *h; |
824 | 0 | int id; |
825 | 0 | hts_pos_t beg, end; |
826 | |
|
827 | 0 | if (!fai_parse_region(fai, str, &id, &beg, &end, 0)) { |
828 | 0 | hts_log_warning("Reference %s not found in FASTA file, returning empty sequence", str); |
829 | 0 | *len = -2; |
830 | 0 | return 1; |
831 | 0 | } |
832 | | |
833 | 0 | h = fai->hash; |
834 | 0 | iter = kh_get(s, h, faidx_iseq(fai, id)); |
835 | 0 | if (iter >= kh_end(h)) { |
836 | | // should have already been caught above |
837 | 0 | abort(); |
838 | 0 | } |
839 | 0 | *val = kh_value(h, iter); |
840 | |
|
841 | 0 | if (beg >= val->len) beg = val->len; |
842 | 0 | if (end >= val->len) end = val->len; |
843 | 0 | if (beg > end) beg = end; |
844 | |
|
845 | 0 | *fbeg = beg; |
846 | 0 | *fend = end; |
847 | |
|
848 | 0 | return 0; |
849 | 0 | } |
850 | | |
851 | | /* |
852 | | * Returns hts_pos_t for historic reasons (line_blen was uint32_t, |
853 | | * but this function returned a wider value in case it was expanded |
854 | | * which has now happened). It also needs to return a negative value |
855 | | * on error. |
856 | | */ |
857 | | hts_pos_t fai_line_length(const faidx_t *fai, const char *str) |
858 | 0 | { |
859 | 0 | faidx1_t val; |
860 | 0 | int64_t beg, end; |
861 | 0 | hts_pos_t len; |
862 | |
|
863 | 0 | if (fai_get_val(fai, str, &len, &val, &beg, &end)) |
864 | 0 | return -1; |
865 | 0 | else |
866 | 0 | return (val.line_blen <= HTS_POS_MAX |
867 | 0 | ? (hts_pos_t) val.line_blen |
868 | 0 | : (hts_pos_t) -1); |
869 | 0 | } |
870 | | |
871 | | char *fai_fetch64(const faidx_t *fai, const char *str, hts_pos_t *len) |
872 | 0 | { |
873 | 0 | faidx1_t val; |
874 | 0 | int64_t beg, end; |
875 | |
|
876 | 0 | if (fai_get_val(fai, str, len, &val, &beg, &end)) { |
877 | 0 | return NULL; |
878 | 0 | } |
879 | | |
880 | | // now retrieve the sequence |
881 | 0 | return fai_retrieve(fai, &val, val.seq_offset, beg, end, len); |
882 | 0 | } |
883 | | |
884 | | char *fai_fetch(const faidx_t *fai, const char *str, int *len) |
885 | 0 | { |
886 | 0 | hts_pos_t len64; |
887 | 0 | char *ret = fai_fetch64(fai, str, &len64); |
888 | 0 | *len = len64 < INT_MAX ? len64 : INT_MAX; // trunc |
889 | 0 | return ret; |
890 | 0 | } |
891 | | |
892 | 0 | char *fai_fetchqual64(const faidx_t *fai, const char *str, hts_pos_t *len) { |
893 | 0 | faidx1_t val; |
894 | 0 | int64_t beg, end; |
895 | |
|
896 | 0 | if (fai_get_val(fai, str, len, &val, &beg, &end)) { |
897 | 0 | return NULL; |
898 | 0 | } |
899 | | |
900 | | // now retrieve the sequence |
901 | 0 | return fai_retrieve(fai, &val, val.qual_offset, beg, end, len); |
902 | 0 | } |
903 | | |
904 | 0 | char *fai_fetchqual(const faidx_t *fai, const char *str, int *len) { |
905 | 0 | hts_pos_t len64; |
906 | 0 | char *ret = fai_fetchqual64(fai, str, &len64); |
907 | 0 | *len = len64 < INT_MAX ? len64 : INT_MAX; // trunc |
908 | 0 | return ret; |
909 | 0 | } |
910 | | |
911 | | int faidx_fetch_nseq(const faidx_t *fai) |
912 | 0 | { |
913 | 0 | return fai->n; |
914 | 0 | } |
915 | | |
916 | | int faidx_nseq(const faidx_t *fai) |
917 | 0 | { |
918 | 0 | return fai->n; |
919 | 0 | } |
920 | | |
921 | | const char *faidx_iseq(const faidx_t *fai, int i) |
922 | 0 | { |
923 | 0 | return fai->name[i]; |
924 | 0 | } |
925 | | |
926 | | hts_pos_t faidx_seq_len64(const faidx_t *fai, const char *seq) |
927 | 0 | { |
928 | 0 | khint_t k = kh_get(s, fai->hash, seq); |
929 | 0 | if ( k == kh_end(fai->hash) ) return -1; |
930 | 0 | return kh_val(fai->hash, k).len; |
931 | 0 | } |
932 | | |
933 | | int faidx_seq_len(const faidx_t *fai, const char *seq) |
934 | 0 | { |
935 | 0 | hts_pos_t len = faidx_seq_len64(fai, seq); |
936 | 0 | return len < INT_MAX ? len : INT_MAX; |
937 | 0 | } |
938 | | |
939 | | static int faidx_adjust_position(const faidx_t *fai, int end_adjust, |
940 | | faidx1_t *val_out, const char *c_name, |
941 | | hts_pos_t *p_beg_i, hts_pos_t *p_end_i, |
942 | 0 | hts_pos_t *len) { |
943 | 0 | khiter_t iter; |
944 | 0 | faidx1_t *val; |
945 | | |
946 | | // Adjust position |
947 | 0 | iter = kh_get(s, fai->hash, c_name); |
948 | |
|
949 | 0 | if (iter == kh_end(fai->hash)) { |
950 | 0 | if (len) |
951 | 0 | *len = -2; |
952 | 0 | hts_log_error("The sequence \"%s\" was not found", c_name); |
953 | 0 | return 1; |
954 | 0 | } |
955 | | |
956 | 0 | val = &kh_value(fai->hash, iter); |
957 | |
|
958 | 0 | if (val_out) |
959 | 0 | *val_out = *val; |
960 | |
|
961 | 0 | if(*p_end_i < *p_beg_i) |
962 | 0 | *p_beg_i = *p_end_i; |
963 | |
|
964 | 0 | if(*p_beg_i < 0) |
965 | 0 | *p_beg_i = 0; |
966 | 0 | else if(val->len <= *p_beg_i) |
967 | 0 | *p_beg_i = val->len; |
968 | |
|
969 | 0 | if(*p_end_i < 0) |
970 | 0 | *p_end_i = 0; |
971 | 0 | else if(val->len <= *p_end_i) |
972 | 0 | *p_end_i = val->len - end_adjust; |
973 | |
|
974 | 0 | return 0; |
975 | 0 | } |
976 | | |
977 | | int fai_adjust_region(const faidx_t *fai, int tid, |
978 | | hts_pos_t *beg, hts_pos_t *end) |
979 | 0 | { |
980 | 0 | hts_pos_t orig_beg, orig_end; |
981 | |
|
982 | 0 | if (!fai || !beg || !end || tid < 0 || tid >= fai->n) |
983 | 0 | return -1; |
984 | | |
985 | 0 | orig_beg = *beg; |
986 | 0 | orig_end = *end; |
987 | 0 | if (faidx_adjust_position(fai, 0, NULL, fai->name[tid], beg, end, NULL) != 0) { |
988 | 0 | hts_log_error("Inconsistent faidx internal state - couldn't find \"%s\"", |
989 | 0 | fai->name[tid]); |
990 | 0 | return -1; |
991 | 0 | } |
992 | | |
993 | 0 | return ((orig_beg != *beg ? 1 : 0) | |
994 | 0 | (orig_end != *end && orig_end < HTS_POS_MAX ? 2 : 0)); |
995 | 0 | } |
996 | | |
997 | | char *faidx_fetch_seq64(const faidx_t *fai, const char *c_name, hts_pos_t p_beg_i, hts_pos_t p_end_i, hts_pos_t *len) |
998 | 0 | { |
999 | 0 | faidx1_t val; |
1000 | | |
1001 | | // Adjust position |
1002 | 0 | if (faidx_adjust_position(fai, 1, &val, c_name, &p_beg_i, &p_end_i, len)) { |
1003 | 0 | return NULL; |
1004 | 0 | } |
1005 | | |
1006 | | // Now retrieve the sequence |
1007 | 0 | return fai_retrieve(fai, &val, val.seq_offset, p_beg_i, p_end_i + 1, len); |
1008 | 0 | } |
1009 | | |
1010 | | char *faidx_fetch_seq(const faidx_t *fai, const char *c_name, int p_beg_i, int p_end_i, int *len) |
1011 | 0 | { |
1012 | 0 | hts_pos_t len64; |
1013 | 0 | char *ret = faidx_fetch_seq64(fai, c_name, p_beg_i, p_end_i, &len64); |
1014 | 0 | *len = len64 < INT_MAX ? len64 : INT_MAX; // trunc |
1015 | 0 | return ret; |
1016 | 0 | } |
1017 | | |
1018 | | char *faidx_fetch_qual64(const faidx_t *fai, const char *c_name, hts_pos_t p_beg_i, hts_pos_t p_end_i, hts_pos_t *len) |
1019 | 0 | { |
1020 | 0 | faidx1_t val; |
1021 | | |
1022 | | // Adjust position |
1023 | 0 | if (faidx_adjust_position(fai, 1, &val, c_name, &p_beg_i, &p_end_i, len)) { |
1024 | 0 | return NULL; |
1025 | 0 | } |
1026 | | |
1027 | | // Now retrieve the sequence |
1028 | 0 | return fai_retrieve(fai, &val, val.qual_offset, p_beg_i, p_end_i + 1, len); |
1029 | 0 | } |
1030 | | |
1031 | | char *faidx_fetch_qual(const faidx_t *fai, const char *c_name, int p_beg_i, int p_end_i, int *len) |
1032 | 0 | { |
1033 | 0 | hts_pos_t len64; |
1034 | 0 | char *ret = faidx_fetch_qual64(fai, c_name, p_beg_i, p_end_i, &len64); |
1035 | 0 | *len = len64 < INT_MAX ? len64 : INT_MAX; // trunc |
1036 | 0 | return ret; |
1037 | 0 | } |
1038 | | |
1039 | | int faidx_has_seq(const faidx_t *fai, const char *seq) |
1040 | 0 | { |
1041 | 0 | khiter_t iter = kh_get(s, fai->hash, seq); |
1042 | 0 | if (iter == kh_end(fai->hash)) return 0; |
1043 | 0 | return 1; |
1044 | 0 | } |
1045 | | |
1046 | | const char *fai_parse_region(const faidx_t *fai, const char *s, |
1047 | | int *tid, hts_pos_t *beg, hts_pos_t *end, |
1048 | | int flags) |
1049 | 0 | { |
1050 | 0 | return hts_parse_region(s, tid, beg, end, fai_name2id, (void *)fai, flags); |
1051 | 0 | } |
1052 | | |
1053 | 0 | void fai_set_cache_size(faidx_t *fai, int cache_size) { |
1054 | 0 | bgzf_set_cache_size(fai->bgzf, cache_size); |
1055 | 0 | } |
1056 | | |
1057 | | // Adds a thread pool to the underlying BGZF layer. |
1058 | 0 | int fai_thread_pool(faidx_t *fai, struct hts_tpool *pool, int qsize) { |
1059 | 0 | return bgzf_thread_pool(fai->bgzf, pool, qsize); |
1060 | 0 | } |
1061 | | |
1062 | 0 | char *fai_path(const char *fa) { |
1063 | 0 | char *fai = NULL; |
1064 | 0 | if (!fa) { |
1065 | 0 | hts_log_error("No reference file specified"); |
1066 | 0 | } else { |
1067 | 0 | char *fai_tmp = strstr(fa, HTS_IDX_DELIM); |
1068 | 0 | if (fai_tmp) { |
1069 | 0 | fai_tmp += strlen(HTS_IDX_DELIM); |
1070 | 0 | fai = strdup(fai_tmp); |
1071 | 0 | if (!fai) |
1072 | 0 | hts_log_error("Failed to allocate memory"); |
1073 | 0 | } else { |
1074 | 0 | if (hisremote(fa)) { |
1075 | 0 | fai = hts_idx_locatefn(fa, ".fai"); // get the remote fai file name, if any, but do not download the file |
1076 | 0 | if (!fai) |
1077 | 0 | hts_log_error("Failed to locate index file for remote reference file '%s'", fa); |
1078 | 0 | } else{ |
1079 | 0 | if (hts_idx_check_local(fa, HTS_FMT_FAI, &fai) == 0 && fai) { |
1080 | 0 | if (fai_build3(fa, fai, NULL) == -1) { // create local fai file by indexing local fasta |
1081 | 0 | hts_log_error("Failed to build index file for reference file '%s'", fa); |
1082 | 0 | free(fai); |
1083 | 0 | fai = NULL; |
1084 | 0 | } |
1085 | 0 | } |
1086 | 0 | } |
1087 | 0 | } |
1088 | 0 | } |
1089 | |
|
1090 | 0 | return fai; |
1091 | 0 | } |