Line | Count | Source (jump to first uncovered line) |
1 | | /* faidx.c -- FASTA and FASTQ random access. |
2 | | |
3 | | Copyright (C) 2008, 2009, 2013-2020, 2022, 2024 Genome Research Ltd. |
4 | | Portions copyright (C) 2011 Broad Institute. |
5 | | |
6 | | Author: Heng Li <lh3@sanger.ac.uk> |
7 | | |
8 | | Permission is hereby granted, free of charge, to any person obtaining a copy |
9 | | of this software and associated documentation files (the "Software"), to deal |
10 | | in the Software without restriction, including without limitation the rights |
11 | | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
12 | | copies of the Software, and to permit persons to whom the Software is |
13 | | furnished to do so, subject to the following conditions: |
14 | | |
15 | | The above copyright notice and this permission notice shall be included in |
16 | | all copies or substantial portions of the Software. |
17 | | |
18 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
19 | | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
20 | | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
21 | | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
22 | | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
23 | | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
24 | | DEALINGS IN THE SOFTWARE. */ |
25 | | |
26 | | #define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h |
27 | | #include <config.h> |
28 | | |
29 | | #include <ctype.h> |
30 | | #include <string.h> |
31 | | #include <stdlib.h> |
32 | | #include <stdio.h> |
33 | | #include <inttypes.h> |
34 | | #include <errno.h> |
35 | | #include <limits.h> |
36 | | #include <unistd.h> |
37 | | #include <assert.h> |
38 | | |
39 | | #include "htslib/bgzf.h" |
40 | | #include "htslib/faidx.h" |
41 | | #include "htslib/hfile.h" |
42 | | #include "htslib/khash.h" |
43 | | #include "htslib/kstring.h" |
44 | | #include "hts_internal.h" |
45 | | |
46 | | // Faster isgraph; assumes ASCII |
47 | 1.15M | static inline int isgraph_(unsigned char c) { |
48 | 1.15M | return c > ' ' && c <= '~'; |
49 | 1.15M | } |
50 | | |
51 | | #ifdef isgraph |
52 | | # undef isgraph |
53 | | #endif |
54 | 1.15M | #define isgraph isgraph_ |
55 | | |
56 | | // An optimised bgzf_getc. |
57 | | // We could consider moving this to bgzf.h, but our own code uses it here only. |
58 | 4.35M | static inline int bgzf_getc_(BGZF *fp) { |
59 | 4.35M | if (fp->block_offset+1 < fp->block_length) { |
60 | 4.35M | int c = ((unsigned char*)fp->uncompressed_block)[fp->block_offset++]; |
61 | 4.35M | fp->uncompressed_address++; |
62 | 4.35M | return c; |
63 | 4.35M | } |
64 | | |
65 | 2.58k | return bgzf_getc(fp); |
66 | 4.35M | } |
67 | 4.35M | #define bgzf_getc bgzf_getc_ |
68 | | |
69 | | typedef struct { |
70 | | int id; // faidx_t->name[id] is for this struct. |
71 | | uint32_t line_len, line_blen; |
72 | | uint64_t len; |
73 | | uint64_t seq_offset; |
74 | | uint64_t qual_offset; |
75 | | } faidx1_t; |
76 | | KHASH_MAP_INIT_STR(s, faidx1_t) |
77 | | |
78 | | struct faidx_t { |
79 | | BGZF *bgzf; |
80 | | int n, m; |
81 | | char **name; |
82 | | khash_t(s) *hash; |
83 | | enum fai_format_options format; |
84 | | }; |
85 | | |
86 | | static int fai_name2id(void *v, const char *ref) |
87 | 0 | { |
88 | 0 | faidx_t *fai = (faidx_t *)v; |
89 | 0 | khint_t k = kh_get(s, fai->hash, ref); |
90 | 0 | return k == kh_end(fai->hash) ? -1 : kh_val(fai->hash, k).id; |
91 | 0 | } |
92 | | |
93 | | static inline int fai_insert_index(faidx_t *idx, const char *name, uint64_t len, uint32_t line_len, uint32_t line_blen, uint64_t seq_offset, uint64_t qual_offset) |
94 | 20.5k | { |
95 | 20.5k | if (!name) { |
96 | 0 | hts_log_error("Malformed line"); |
97 | 0 | return -1; |
98 | 0 | } |
99 | | |
100 | 20.5k | char *name_key = strdup(name); |
101 | 20.5k | int absent; |
102 | 20.5k | khint_t k = kh_put(s, idx->hash, name_key, &absent); |
103 | 20.5k | faidx1_t *v = &kh_value(idx->hash, k); |
104 | | |
105 | 20.5k | if (! absent) { |
106 | 16.8k | hts_log_warning("Ignoring duplicate sequence \"%s\" at byte offset %" PRIu64, name, seq_offset); |
107 | 16.8k | free(name_key); |
108 | 16.8k | return 0; |
109 | 16.8k | } |
110 | | |
111 | 3.63k | if (idx->n == idx->m) { |
112 | 482 | char **tmp; |
113 | 482 | idx->m = idx->m? idx->m<<1 : 16; |
114 | 482 | if (!(tmp = (char**)realloc(idx->name, sizeof(char*) * idx->m))) { |
115 | 0 | hts_log_error("Out of memory"); |
116 | 0 | return -1; |
117 | 0 | } |
118 | 482 | idx->name = tmp; |
119 | 482 | } |
120 | 3.63k | v->id = idx->n; |
121 | 3.63k | idx->name[idx->n++] = name_key; |
122 | 3.63k | v->len = len; |
123 | 3.63k | v->line_len = line_len; |
124 | 3.63k | v->line_blen = line_blen; |
125 | 3.63k | v->seq_offset = seq_offset; |
126 | 3.63k | v->qual_offset = qual_offset; |
127 | | |
128 | 3.63k | return 0; |
129 | 3.63k | } |
130 | | |
131 | | |
132 | 889 | static faidx_t *fai_build_core(BGZF *bgzf) { |
133 | 889 | kstring_t name = { 0, 0, NULL }; |
134 | 889 | int c, read_done, line_num; |
135 | 889 | faidx_t *idx; |
136 | 889 | uint64_t seq_offset, qual_offset; |
137 | 889 | uint64_t seq_len, qual_len; |
138 | 889 | uint64_t char_len, cl, line_len, ll; |
139 | 889 | enum read_state {OUT_READ, IN_NAME, IN_SEQ, SEQ_END, IN_QUAL} state; |
140 | | |
141 | 889 | idx = (faidx_t*)calloc(1, sizeof(faidx_t)); |
142 | 889 | idx->hash = kh_init(s); |
143 | 889 | idx->format = FAI_NONE; |
144 | | |
145 | 889 | state = OUT_READ, read_done = 0, line_num = 1; |
146 | 889 | seq_offset = qual_offset = seq_len = qual_len = char_len = cl = line_len = ll = 0; |
147 | | |
148 | 92.9k | while ((c = bgzf_getc(bgzf)) >= 0) { |
149 | 92.6k | switch (state) { |
150 | 10.4k | case OUT_READ: |
151 | 10.4k | switch (c) { |
152 | 8.92k | case '>': |
153 | 8.92k | if (idx->format == FAI_FASTQ) { |
154 | 11 | hts_log_error("Found '>' in a FASTQ file, error at line %d", line_num); |
155 | 11 | goto fail; |
156 | 11 | } |
157 | | |
158 | 8.91k | idx->format = FAI_FASTA; |
159 | 8.91k | state = IN_NAME; |
160 | 8.91k | break; |
161 | | |
162 | 411 | case '@': |
163 | 411 | if (idx->format == FAI_FASTA) { |
164 | 6 | hts_log_error("Found '@' in a FASTA file, error at line %d", line_num); |
165 | 6 | goto fail; |
166 | 6 | } |
167 | | |
168 | 405 | idx->format = FAI_FASTQ; |
169 | 405 | state = IN_NAME; |
170 | 405 | break; |
171 | | |
172 | 38 | case '\r': |
173 | | // Blank line with cr-lf ending? |
174 | 38 | if ((c = bgzf_getc(bgzf)) == '\n') { |
175 | 18 | line_num++; |
176 | 20 | } else { |
177 | 20 | hts_log_error("Format error, carriage return not followed by new line at line %d", line_num); |
178 | 20 | goto fail; |
179 | 20 | } |
180 | 18 | break; |
181 | | |
182 | 984 | case '\n': |
183 | | // just move onto the next line |
184 | 984 | line_num++; |
185 | 984 | break; |
186 | | |
187 | 83 | default: { |
188 | 83 | char s[4] = { '"', c, '"', '\0' }; |
189 | 83 | hts_log_error("Format error, unexpected %s at line %d", isprint(c) ? s : "character", line_num); |
190 | 83 | goto fail; |
191 | 38 | } |
192 | 10.4k | } |
193 | 10.3k | break; |
194 | | |
195 | 30.1k | case IN_NAME: |
196 | 30.1k | if (read_done) { |
197 | 20.3k | if (fai_insert_index(idx, name.s, seq_len, line_len, char_len, seq_offset, qual_offset) != 0) |
198 | 0 | goto fail; |
199 | | |
200 | 20.3k | read_done = 0; |
201 | 20.3k | } |
202 | | |
203 | 30.1k | name.l = 0; |
204 | | |
205 | 892k | do { |
206 | 892k | if (!isspace(c)) { |
207 | 859k | kputc(c, &name); |
208 | 859k | } else if (name.l > 0 || c == '\n') { |
209 | 29.9k | break; |
210 | 29.9k | } |
211 | 892k | } while ((c = bgzf_getc(bgzf)) >= 0); |
212 | | |
213 | 0 | kputsn("", 0, &name); |
214 | | |
215 | 30.1k | if (c < 0) { |
216 | 205 | hts_log_error("The last entry '%s' has no sequence at line %d", name.s, line_num); |
217 | 205 | goto fail; |
218 | 205 | } |
219 | | |
220 | | // read the rest of the line if necessary |
221 | 2.23M | if (c != '\n') while ((c = bgzf_getc(bgzf)) >= 0 && c != '\n'); |
222 | | |
223 | 29.9k | state = IN_SEQ; seq_len = qual_len = char_len = line_len = 0; |
224 | 29.9k | seq_offset = bgzf_utell(bgzf); |
225 | 29.9k | line_num++; |
226 | 29.9k | break; |
227 | | |
228 | 51.6k | case IN_SEQ: |
229 | 51.6k | if (idx->format == FAI_FASTA) { |
230 | 51.0k | if (c == '\n') { |
231 | 5.96k | state = OUT_READ; |
232 | 5.96k | line_num++; |
233 | 5.96k | continue; |
234 | 45.0k | } else if (c == '>') { |
235 | 20.8k | state = IN_NAME; |
236 | 20.8k | continue; |
237 | 20.8k | } |
238 | 51.0k | } else if (idx->format == FAI_FASTQ) { |
239 | 590 | if (c == '+') { |
240 | 89 | state = IN_QUAL; |
241 | 2.24k | if (c != '\n') while ((c = bgzf_getc(bgzf)) >= 0 && c != '\n'); |
242 | 89 | qual_offset = bgzf_utell(bgzf); |
243 | 89 | line_num++; |
244 | 89 | continue; |
245 | 501 | } else if (c == '\n') { |
246 | 10 | hts_log_error("Inlined empty line is not allowed in sequence '%s' at line %d", name.s, line_num); |
247 | 10 | goto fail; |
248 | 10 | } |
249 | 590 | } |
250 | | |
251 | 24.7k | ll = cl = 0; |
252 | | |
253 | 24.7k | if (idx->format == FAI_FASTA) read_done = 1; |
254 | | |
255 | 1.08M | do { |
256 | 1.08M | ll++; |
257 | 1.08M | if (isgraph(c)) cl++; |
258 | 1.08M | } while ((c = bgzf_getc(bgzf)) >= 0 && c != '\n'); |
259 | | |
260 | 24.7k | ll++; seq_len += cl; |
261 | | |
262 | 24.7k | if (line_len == 0) { |
263 | 20.8k | line_len = ll; |
264 | 20.8k | char_len = cl; |
265 | 20.8k | } else if (line_len > ll) { |
266 | | |
267 | 2.78k | if (idx->format == FAI_FASTA) |
268 | 2.60k | state = OUT_READ; |
269 | 177 | else |
270 | 177 | state = SEQ_END; |
271 | | |
272 | 2.78k | } else if (line_len < ll) { |
273 | 45 | hts_log_error("Different line length in sequence '%s' at line %d", name.s, line_num); |
274 | 45 | goto fail; |
275 | 45 | } |
276 | | |
277 | 24.6k | line_num++; |
278 | 24.6k | break; |
279 | | |
280 | 162 | case SEQ_END: |
281 | 162 | if (c == '+') { |
282 | 151 | state = IN_QUAL; |
283 | 6.59k | while ((c = bgzf_getc(bgzf)) >= 0 && c != '\n'); |
284 | 151 | qual_offset = bgzf_utell(bgzf); |
285 | 151 | line_num++; |
286 | 151 | } else { |
287 | 11 | hts_log_error("Format error, expecting '+', got '%c' at line %d", c, line_num); |
288 | 11 | goto fail; |
289 | 11 | } |
290 | 151 | break; |
291 | | |
292 | 258 | case IN_QUAL: |
293 | 258 | if (c == '\n') { |
294 | 22 | if (!read_done) { |
295 | 10 | hts_log_error("Inlined empty line is not allowed in quality of sequence '%s' at line %d", name.s, line_num); |
296 | 10 | goto fail; |
297 | 10 | } |
298 | | |
299 | 12 | state = OUT_READ; |
300 | 12 | line_num++; |
301 | 12 | continue; |
302 | 236 | } else if (c == '@' && read_done) { |
303 | 10 | state = IN_NAME; |
304 | 10 | continue; |
305 | 10 | } |
306 | | |
307 | 226 | ll = cl = 0; |
308 | | |
309 | 71.4k | do { |
310 | 71.4k | ll++; |
311 | 71.4k | if (isgraph(c)) cl++; |
312 | 71.4k | } while ((c = bgzf_getc(bgzf)) >= 0 && c != '\n'); |
313 | | |
314 | 226 | ll++; qual_len += cl; |
315 | | |
316 | 226 | if (line_len < ll) { |
317 | 93 | hts_log_error("Quality line length too long in '%s' at line %d", name.s, line_num); |
318 | 93 | goto fail; |
319 | 133 | } else if (qual_len == seq_len) { |
320 | 52 | read_done = 1; |
321 | 81 | } else if (qual_len > seq_len) { |
322 | 11 | hts_log_error("Quality length longer than sequence in '%s' at line %d", name.s, line_num); |
323 | 11 | goto fail; |
324 | 70 | } else if (line_len > ll) { |
325 | 41 | hts_log_error("Quality line length too short in '%s' at line %d", name.s, line_num); |
326 | 41 | goto fail; |
327 | 41 | } |
328 | | |
329 | 81 | line_num++; |
330 | 81 | break; |
331 | 92.6k | } |
332 | 92.6k | } |
333 | | |
334 | 343 | if (read_done) { |
335 | 176 | if (fai_insert_index(idx, name.s, seq_len, line_len, char_len, seq_offset, qual_offset) != 0) |
336 | 0 | goto fail; |
337 | 176 | } else { |
338 | 167 | hts_log_error("File truncated at line %d", line_num); |
339 | 167 | goto fail; |
340 | 167 | } |
341 | | |
342 | 176 | free(name.s); |
343 | 176 | return idx; |
344 | | |
345 | 713 | fail: |
346 | 713 | free(name.s); |
347 | 713 | fai_destroy(idx); |
348 | 713 | return NULL; |
349 | 343 | } |
350 | | |
351 | | |
352 | 0 | static int fai_save(const faidx_t *fai, hFILE *fp) { |
353 | 0 | khint_t k; |
354 | 0 | int i; |
355 | 0 | char buf[96]; // Must be big enough for format below. |
356 | |
|
357 | 0 | for (i = 0; i < fai->n; ++i) { |
358 | 0 | faidx1_t x; |
359 | 0 | k = kh_get(s, fai->hash, fai->name[i]); |
360 | 0 | assert(k < kh_end(fai->hash)); |
361 | 0 | x = kh_value(fai->hash, k); |
362 | |
|
363 | 0 | if (fai->format == FAI_FASTA) { |
364 | 0 | snprintf(buf, sizeof(buf), |
365 | 0 | "\t%"PRIu64"\t%"PRIu64"\t%"PRIu32"\t%"PRIu32"\n", |
366 | 0 | x.len, x.seq_offset, x.line_blen, x.line_len); |
367 | 0 | } else { |
368 | 0 | snprintf(buf, sizeof(buf), |
369 | 0 | "\t%"PRIu64"\t%"PRIu64"\t%"PRIu32"\t%"PRIu32"\t%"PRIu64"\n", |
370 | 0 | x.len, x.seq_offset, x.line_blen, x.line_len, x.qual_offset); |
371 | 0 | } |
372 | |
|
373 | 0 | if (hputs(fai->name[i], fp) != 0) return -1; |
374 | 0 | if (hputs(buf, fp) != 0) return -1; |
375 | 0 | } |
376 | 0 | return 0; |
377 | 0 | } |
378 | | |
379 | | |
380 | | static faidx_t *fai_read(hFILE *fp, const char *fname, int format) |
381 | 0 | { |
382 | 0 | faidx_t *fai; |
383 | 0 | char *buf = NULL, *p; |
384 | 0 | ssize_t l, lnum = 1; |
385 | |
|
386 | 0 | fai = (faidx_t*)calloc(1, sizeof(faidx_t)); |
387 | 0 | if (!fai) return NULL; |
388 | | |
389 | 0 | fai->hash = kh_init(s); |
390 | 0 | if (!fai->hash) goto fail; |
391 | | |
392 | 0 | buf = (char*)calloc(0x10000, 1); |
393 | 0 | if (!buf) goto fail; |
394 | | |
395 | 0 | while ((l = hgetln(buf, 0x10000, fp)) > 0) { |
396 | 0 | uint32_t line_len, line_blen, n; |
397 | 0 | uint64_t len; |
398 | 0 | uint64_t seq_offset; |
399 | 0 | uint64_t qual_offset = 0; |
400 | |
|
401 | 0 | for (p = buf; *p && !isspace_c(*p); ++p); |
402 | |
|
403 | 0 | if (p - buf < l) { |
404 | 0 | *p = 0; ++p; |
405 | 0 | } |
406 | |
|
407 | 0 | if (format == FAI_FASTA) { |
408 | 0 | n = sscanf(p, "%"SCNu64"%"SCNu64"%"SCNu32"%"SCNu32, &len, &seq_offset, &line_blen, &line_len); |
409 | |
|
410 | 0 | if (n != 4) { |
411 | 0 | hts_log_error("Could not understand FASTA index %s line %zd", fname, lnum); |
412 | 0 | goto fail; |
413 | 0 | } |
414 | 0 | } else { |
415 | 0 | n = sscanf(p, "%"SCNu64"%"SCNu64"%"SCNu32"%"SCNu32"%"SCNu64, &len, &seq_offset, &line_blen, &line_len, &qual_offset); |
416 | |
|
417 | 0 | if (n != 5) { |
418 | 0 | if (n == 4) { |
419 | 0 | hts_log_error("Possibly this is a FASTA index, try using faidx. Problem in %s line %zd", fname, lnum); |
420 | 0 | } else { |
421 | 0 | hts_log_error("Could not understand FASTQ index %s line %zd", fname, lnum); |
422 | 0 | } |
423 | |
|
424 | 0 | goto fail; |
425 | 0 | } |
426 | 0 | } |
427 | | |
428 | 0 | if (fai_insert_index(fai, buf, len, line_len, line_blen, seq_offset, qual_offset) != 0) { |
429 | 0 | goto fail; |
430 | 0 | } |
431 | | |
432 | 0 | if (buf[l - 1] == '\n') ++lnum; |
433 | 0 | } |
434 | | |
435 | 0 | if (l < 0) { |
436 | 0 | hts_log_error("Error while reading %s: %s", fname, strerror(errno)); |
437 | 0 | goto fail; |
438 | 0 | } |
439 | 0 | free(buf); |
440 | 0 | return fai; |
441 | | |
442 | 0 | fail: |
443 | 0 | free(buf); |
444 | 0 | fai_destroy(fai); |
445 | 0 | return NULL; |
446 | 0 | } |
447 | | |
448 | | void fai_destroy(faidx_t *fai) |
449 | 1.95k | { |
450 | 1.95k | int i; |
451 | 1.95k | if (!fai) return; |
452 | 4.52k | for (i = 0; i < fai->n; ++i) free(fai->name[i]); |
453 | 889 | free(fai->name); |
454 | 889 | kh_destroy(s, fai->hash); |
455 | 889 | if (fai->bgzf) bgzf_close(fai->bgzf); |
456 | 889 | free(fai); |
457 | 889 | } |
458 | | |
459 | | |
460 | | static int fai_build3_core(const char *fn, const char *fnfai, const char *fngzi) |
461 | 1.24k | { |
462 | 1.24k | kstring_t fai_kstr = { 0, 0, NULL }; |
463 | 1.24k | kstring_t gzi_kstr = { 0, 0, NULL }; |
464 | 1.24k | BGZF *bgzf = NULL; |
465 | 1.24k | hFILE *fp = NULL; |
466 | 1.24k | faidx_t *fai = NULL; |
467 | 1.24k | int save_errno, res; |
468 | 1.24k | char *file_type; |
469 | | |
470 | 1.24k | bgzf = bgzf_open(fn, "r"); |
471 | | |
472 | 1.24k | if ( !bgzf ) { |
473 | 357 | hts_log_error("Failed to open the file %s : %s", fn, strerror(errno)); |
474 | 357 | goto fail; |
475 | 357 | } |
476 | | |
477 | 889 | if ( bgzf->is_compressed ) { |
478 | 12 | if (bgzf_index_build_init(bgzf) != 0) { |
479 | 0 | hts_log_error("Failed to allocate bgzf index"); |
480 | 0 | goto fail; |
481 | 0 | } |
482 | 12 | } |
483 | | |
484 | 889 | fai = fai_build_core(bgzf); |
485 | | |
486 | 889 | if ( !fai ) { |
487 | 713 | if (bgzf->is_compressed && bgzf->is_gzip) { |
488 | 12 | hts_log_error("Cannot index files compressed with gzip, please use bgzip"); |
489 | 12 | } |
490 | 713 | goto fail; |
491 | 713 | } |
492 | | |
493 | 176 | if (fai->format == FAI_FASTA) { |
494 | 163 | file_type = "FASTA"; |
495 | 163 | } else { |
496 | 13 | file_type = "FASTQ"; |
497 | 13 | } |
498 | | |
499 | 176 | if (!fnfai) { |
500 | 176 | if (ksprintf(&fai_kstr, "%s.fai", fn) < 0) goto fail; |
501 | 176 | fnfai = fai_kstr.s; |
502 | 176 | } |
503 | | |
504 | 176 | if (!fngzi) { |
505 | 176 | if (ksprintf(&gzi_kstr, "%s.gzi", fn) < 0) goto fail; |
506 | 176 | fngzi = gzi_kstr.s; |
507 | 176 | } |
508 | | |
509 | 176 | if ( bgzf->is_compressed ) { |
510 | 0 | if (bgzf_index_dump(bgzf, fngzi, NULL) < 0) { |
511 | 0 | hts_log_error("Failed to make bgzf index %s", fngzi); |
512 | 0 | goto fail; |
513 | 0 | } |
514 | 0 | } |
515 | | |
516 | 176 | res = bgzf_close(bgzf); |
517 | 176 | bgzf = NULL; |
518 | | |
519 | 176 | if (res < 0) { |
520 | 0 | hts_log_error("Error on closing %s : %s", fn, strerror(errno)); |
521 | 0 | goto fail; |
522 | 0 | } |
523 | | |
524 | 176 | fp = hopen(fnfai, "wb"); |
525 | | |
526 | 176 | if ( !fp ) { |
527 | 176 | hts_log_error("Failed to open %s index %s : %s", file_type, fnfai, strerror(errno)); |
528 | 176 | goto fail; |
529 | 176 | } |
530 | | |
531 | 0 | if (fai_save(fai, fp) != 0) { |
532 | 0 | hts_log_error("Failed to write %s index %s : %s", file_type, fnfai, strerror(errno)); |
533 | 0 | goto fail; |
534 | 0 | } |
535 | | |
536 | 0 | if (hclose(fp) != 0) { |
537 | 0 | hts_log_error("Failed on closing %s index %s : %s", file_type, fnfai, strerror(errno)); |
538 | 0 | goto fail; |
539 | 0 | } |
540 | | |
541 | 0 | free(fai_kstr.s); |
542 | 0 | free(gzi_kstr.s); |
543 | 0 | fai_destroy(fai); |
544 | 0 | return 0; |
545 | | |
546 | 1.24k | fail: |
547 | 1.24k | save_errno = errno; |
548 | 1.24k | free(fai_kstr.s); |
549 | 1.24k | free(gzi_kstr.s); |
550 | 1.24k | bgzf_close(bgzf); |
551 | 1.24k | fai_destroy(fai); |
552 | 1.24k | errno = save_errno; |
553 | 1.24k | return -1; |
554 | 0 | } |
555 | | |
556 | | |
557 | 1.24k | int fai_build3(const char *fn, const char *fnfai, const char *fngzi) { |
558 | 1.24k | return fai_build3_core(fn, fnfai, fngzi); |
559 | 1.24k | } |
560 | | |
561 | | |
562 | 1.24k | int fai_build(const char *fn) { |
563 | 1.24k | return fai_build3(fn, NULL, NULL); |
564 | 1.24k | } |
565 | | |
566 | | |
567 | | static faidx_t *fai_load3_core(const char *fn, const char *fnfai, const char *fngzi, |
568 | | int flags, int format) |
569 | 0 | { |
570 | 0 | kstring_t fai_kstr = { 0, 0, NULL }; |
571 | 0 | kstring_t gzi_kstr = { 0, 0, NULL }; |
572 | 0 | hFILE *fp = NULL; |
573 | 0 | faidx_t *fai = NULL; |
574 | 0 | int res, gzi_index_needed = 0; |
575 | 0 | char *file_type; |
576 | |
|
577 | 0 | if (format == FAI_FASTA) { |
578 | 0 | file_type = "FASTA"; |
579 | 0 | } else { |
580 | 0 | file_type = "FASTQ"; |
581 | 0 | } |
582 | |
|
583 | 0 | if (fn == NULL) |
584 | 0 | return NULL; |
585 | | |
586 | 0 | if (fnfai == NULL) { |
587 | 0 | if (ksprintf(&fai_kstr, "%s.fai", fn) < 0) goto fail; |
588 | 0 | fnfai = fai_kstr.s; |
589 | 0 | } |
590 | 0 | if (fngzi == NULL) { |
591 | 0 | if (ksprintf(&gzi_kstr, "%s.gzi", fn) < 0) goto fail; |
592 | 0 | fngzi = gzi_kstr.s; |
593 | 0 | } |
594 | | |
595 | 0 | fp = hopen(fnfai, "rb"); |
596 | |
|
597 | 0 | if (fp) { |
598 | | // index file present, check if a compressed index is needed |
599 | 0 | hFILE *gz = NULL; |
600 | 0 | BGZF *bgzf = bgzf_open(fn, "rb"); |
601 | |
|
602 | 0 | if (bgzf == 0) { |
603 | 0 | hts_log_error("Failed to open %s file %s", file_type, fn); |
604 | 0 | goto fail; |
605 | 0 | } |
606 | | |
607 | 0 | if (bgzf_compression(bgzf) == 2) { // BGZF compression |
608 | 0 | if ((gz = hopen(fngzi, "rb")) == 0) { |
609 | |
|
610 | 0 | if (!(flags & FAI_CREATE) || errno != ENOENT) { |
611 | 0 | hts_log_error("Failed to open %s index %s: %s", file_type, fngzi, strerror(errno)); |
612 | 0 | bgzf_close(bgzf); |
613 | 0 | goto fail; |
614 | 0 | } |
615 | | |
616 | 0 | gzi_index_needed = 1; |
617 | 0 | res = hclose(fp); // closed as going to be re-indexed |
618 | |
|
619 | 0 | if (res < 0) { |
620 | 0 | hts_log_error("Failed on closing %s index %s : %s", file_type, fnfai, strerror(errno)); |
621 | 0 | goto fail; |
622 | 0 | } |
623 | 0 | } else { |
624 | 0 | res = hclose(gz); |
625 | |
|
626 | 0 | if (res < 0) { |
627 | 0 | hts_log_error("Failed on closing %s index %s : %s", file_type, fngzi, strerror(errno)); |
628 | 0 | goto fail; |
629 | 0 | } |
630 | 0 | } |
631 | 0 | } |
632 | | |
633 | 0 | bgzf_close(bgzf); |
634 | 0 | } |
635 | | |
636 | 0 | if (fp == 0 || gzi_index_needed) { |
637 | 0 | if (!(flags & FAI_CREATE) || errno != ENOENT) { |
638 | 0 | hts_log_error("Failed to open %s index %s: %s", file_type, fnfai, strerror(errno)); |
639 | 0 | goto fail; |
640 | 0 | } |
641 | | |
642 | 0 | hts_log_info("Build %s index", file_type); |
643 | |
|
644 | 0 | if (fai_build3_core(fn, fnfai, fngzi) < 0) { |
645 | 0 | goto fail; |
646 | 0 | } |
647 | | |
648 | 0 | fp = hopen(fnfai, "rb"); |
649 | 0 | if (fp == 0) { |
650 | 0 | hts_log_error("Failed to open %s index %s: %s", file_type, fnfai, strerror(errno)); |
651 | 0 | goto fail; |
652 | 0 | } |
653 | 0 | } |
654 | | |
655 | 0 | fai = fai_read(fp, fnfai, format); |
656 | 0 | if (fai == NULL) { |
657 | 0 | hts_log_error("Failed to read %s index %s", file_type, fnfai); |
658 | 0 | goto fail; |
659 | 0 | } |
660 | | |
661 | 0 | res = hclose(fp); |
662 | 0 | fp = NULL; |
663 | 0 | if (res < 0) { |
664 | 0 | hts_log_error("Failed on closing %s index %s : %s", file_type, fnfai, strerror(errno)); |
665 | 0 | goto fail; |
666 | 0 | } |
667 | | |
668 | 0 | fai->bgzf = bgzf_open(fn, "rb"); |
669 | 0 | if (fai->bgzf == 0) { |
670 | 0 | hts_log_error("Failed to open %s file %s", file_type, fn); |
671 | 0 | goto fail; |
672 | 0 | } |
673 | | |
674 | 0 | if ( fai->bgzf->is_compressed==1 ) { |
675 | 0 | if ( bgzf_index_load(fai->bgzf, fngzi, NULL) < 0 ) { |
676 | 0 | hts_log_error("Failed to load .gzi index: %s", fngzi); |
677 | 0 | goto fail; |
678 | 0 | } |
679 | 0 | } |
680 | 0 | free(fai_kstr.s); |
681 | 0 | free(gzi_kstr.s); |
682 | 0 | return fai; |
683 | | |
684 | 0 | fail: |
685 | 0 | if (fai) fai_destroy(fai); |
686 | 0 | if (fp) hclose_abruptly(fp); |
687 | 0 | free(fai_kstr.s); |
688 | 0 | free(gzi_kstr.s); |
689 | 0 | return NULL; |
690 | 0 | } |
691 | | |
692 | | |
693 | | faidx_t *fai_load3(const char *fn, const char *fnfai, const char *fngzi, |
694 | 0 | int flags) { |
695 | 0 | return fai_load3_core(fn, fnfai, fngzi, flags, FAI_FASTA); |
696 | 0 | } |
697 | | |
698 | | |
699 | | faidx_t *fai_load(const char *fn) |
700 | 0 | { |
701 | 0 | return fai_load3(fn, NULL, NULL, FAI_CREATE); |
702 | 0 | } |
703 | | |
704 | | |
705 | | faidx_t *fai_load3_format(const char *fn, const char *fnfai, const char *fngzi, |
706 | 0 | int flags, enum fai_format_options format) { |
707 | 0 | return fai_load3_core(fn, fnfai, fngzi, flags, format); |
708 | 0 | } |
709 | | |
710 | | |
711 | 0 | faidx_t *fai_load_format(const char *fn, enum fai_format_options format) { |
712 | 0 | return fai_load3_format(fn, NULL, NULL, FAI_CREATE, format); |
713 | 0 | } |
714 | | |
715 | | |
716 | | static char *fai_retrieve(const faidx_t *fai, const faidx1_t *val, |
717 | 0 | uint64_t offset, hts_pos_t beg, hts_pos_t end, hts_pos_t *len) { |
718 | 0 | char *buffer, *s; |
719 | 0 | ssize_t nread, remaining, firstline_len, firstline_blen; |
720 | 0 | int ret; |
721 | |
|
722 | 0 | if ((uint64_t) end - (uint64_t) beg >= SIZE_MAX - 2) { |
723 | 0 | hts_log_error("Range %"PRId64"..%"PRId64" too big", beg, end); |
724 | 0 | *len = -1; |
725 | 0 | return NULL; |
726 | 0 | } |
727 | | |
728 | 0 | if (val->line_blen <= 0) { |
729 | 0 | hts_log_error("Invalid line length in index: %d", val->line_blen); |
730 | 0 | *len = -1; |
731 | 0 | return NULL; |
732 | 0 | } |
733 | | |
734 | 0 | ret = bgzf_useek(fai->bgzf, |
735 | 0 | offset |
736 | 0 | + beg / val->line_blen * val->line_len |
737 | 0 | + beg % val->line_blen, SEEK_SET); |
738 | |
|
739 | 0 | if (ret < 0) { |
740 | 0 | *len = -1; |
741 | 0 | hts_log_error("Failed to retrieve block. (Seeking in a compressed, .gzi unindexed, file?)"); |
742 | 0 | return NULL; |
743 | 0 | } |
744 | | |
745 | | // Over-allocate so there is extra space for one end-of-line sequence |
746 | 0 | buffer = (char*)malloc((size_t) end - beg + val->line_len - val->line_blen + 1); |
747 | 0 | if (!buffer) { |
748 | 0 | *len = -1; |
749 | 0 | return NULL; |
750 | 0 | } |
751 | | |
752 | 0 | remaining = *len = end - beg; |
753 | 0 | firstline_blen = val->line_blen - beg % val->line_blen; |
754 | | |
755 | | // Special case when the entire interval requested is within a single FASTA/Q line |
756 | 0 | if (remaining <= firstline_blen) { |
757 | 0 | nread = bgzf_read_small(fai->bgzf, buffer, remaining); |
758 | 0 | if (nread < remaining) goto error; |
759 | 0 | buffer[nread] = '\0'; |
760 | 0 | return buffer; |
761 | 0 | } |
762 | | |
763 | 0 | s = buffer; |
764 | 0 | firstline_len = val->line_len - beg % val->line_blen; |
765 | | |
766 | | // Read the (partial) first line and its line terminator, but increment s past the |
767 | | // line contents only, so the terminator characters will be overwritten by the next line. |
768 | 0 | nread = bgzf_read_small(fai->bgzf, s, firstline_len); |
769 | 0 | if (nread < firstline_len) goto error; |
770 | 0 | s += firstline_blen; |
771 | 0 | remaining -= firstline_blen; |
772 | | |
773 | | // Similarly read complete lines and their line terminator characters, but overwrite the latter. |
774 | 0 | while (remaining > val->line_blen) { |
775 | 0 | nread = bgzf_read_small(fai->bgzf, s, val->line_len); |
776 | 0 | if (nread < (ssize_t) val->line_len) goto error; |
777 | 0 | s += val->line_blen; |
778 | 0 | remaining -= val->line_blen; |
779 | 0 | } |
780 | | |
781 | 0 | if (remaining > 0) { |
782 | 0 | nread = bgzf_read_small(fai->bgzf, s, remaining); |
783 | 0 | if (nread < remaining) goto error; |
784 | 0 | s += remaining; |
785 | 0 | } |
786 | | |
787 | 0 | *s = '\0'; |
788 | 0 | return buffer; |
789 | | |
790 | 0 | error: |
791 | 0 | hts_log_error("Failed to retrieve block: %s", |
792 | 0 | (nread == 0)? "unexpected end of file" : "error reading file"); |
793 | 0 | free(buffer); |
794 | 0 | *len = -1; |
795 | 0 | return NULL; |
796 | 0 | } |
797 | | |
798 | | static int fai_get_val(const faidx_t *fai, const char *str, |
799 | 0 | hts_pos_t *len, faidx1_t *val, hts_pos_t *fbeg, hts_pos_t *fend) { |
800 | 0 | khiter_t iter; |
801 | 0 | khash_t(s) *h; |
802 | 0 | int id; |
803 | 0 | hts_pos_t beg, end; |
804 | |
|
805 | 0 | if (!fai_parse_region(fai, str, &id, &beg, &end, 0)) { |
806 | 0 | hts_log_warning("Reference %s not found in FASTA file, returning empty sequence", str); |
807 | 0 | *len = -2; |
808 | 0 | return 1; |
809 | 0 | } |
810 | | |
811 | 0 | h = fai->hash; |
812 | 0 | iter = kh_get(s, h, faidx_iseq(fai, id)); |
813 | 0 | if (iter >= kh_end(h)) { |
814 | | // should have already been caught above |
815 | 0 | abort(); |
816 | 0 | } |
817 | 0 | *val = kh_value(h, iter); |
818 | |
|
819 | 0 | if (beg >= val->len) beg = val->len; |
820 | 0 | if (end >= val->len) end = val->len; |
821 | 0 | if (beg > end) beg = end; |
822 | |
|
823 | 0 | *fbeg = beg; |
824 | 0 | *fend = end; |
825 | |
|
826 | 0 | return 0; |
827 | 0 | } |
828 | | |
829 | | /* |
830 | | * The internal still has line_blen as uint32_t, but our references |
831 | | * can be longer, so for future proofing we use hts_pos_t. We also needed |
832 | | * a signed value so we can return negatives as an error. |
833 | | */ |
834 | | hts_pos_t fai_line_length(const faidx_t *fai, const char *str) |
835 | 0 | { |
836 | 0 | faidx1_t val; |
837 | 0 | int64_t beg, end; |
838 | 0 | hts_pos_t len; |
839 | |
|
840 | 0 | if (fai_get_val(fai, str, &len, &val, &beg, &end)) |
841 | 0 | return -1; |
842 | 0 | else |
843 | 0 | return val.line_blen; |
844 | 0 | } |
845 | | |
846 | | char *fai_fetch64(const faidx_t *fai, const char *str, hts_pos_t *len) |
847 | 0 | { |
848 | 0 | faidx1_t val; |
849 | 0 | int64_t beg, end; |
850 | |
|
851 | 0 | if (fai_get_val(fai, str, len, &val, &beg, &end)) { |
852 | 0 | return NULL; |
853 | 0 | } |
854 | | |
855 | | // now retrieve the sequence |
856 | 0 | return fai_retrieve(fai, &val, val.seq_offset, beg, end, len); |
857 | 0 | } |
858 | | |
859 | | char *fai_fetch(const faidx_t *fai, const char *str, int *len) |
860 | 0 | { |
861 | 0 | hts_pos_t len64; |
862 | 0 | char *ret = fai_fetch64(fai, str, &len64); |
863 | 0 | *len = len64 < INT_MAX ? len64 : INT_MAX; // trunc |
864 | 0 | return ret; |
865 | 0 | } |
866 | | |
867 | 0 | char *fai_fetchqual64(const faidx_t *fai, const char *str, hts_pos_t *len) { |
868 | 0 | faidx1_t val; |
869 | 0 | int64_t beg, end; |
870 | |
|
871 | 0 | if (fai_get_val(fai, str, len, &val, &beg, &end)) { |
872 | 0 | return NULL; |
873 | 0 | } |
874 | | |
875 | | // now retrieve the sequence |
876 | 0 | return fai_retrieve(fai, &val, val.qual_offset, beg, end, len); |
877 | 0 | } |
878 | | |
879 | 0 | char *fai_fetchqual(const faidx_t *fai, const char *str, int *len) { |
880 | 0 | hts_pos_t len64; |
881 | 0 | char *ret = fai_fetchqual64(fai, str, &len64); |
882 | 0 | *len = len64 < INT_MAX ? len64 : INT_MAX; // trunc |
883 | 0 | return ret; |
884 | 0 | } |
885 | | |
886 | | int faidx_fetch_nseq(const faidx_t *fai) |
887 | 0 | { |
888 | 0 | return fai->n; |
889 | 0 | } |
890 | | |
891 | | int faidx_nseq(const faidx_t *fai) |
892 | 0 | { |
893 | 0 | return fai->n; |
894 | 0 | } |
895 | | |
896 | | const char *faidx_iseq(const faidx_t *fai, int i) |
897 | 0 | { |
898 | 0 | return fai->name[i]; |
899 | 0 | } |
900 | | |
901 | | hts_pos_t faidx_seq_len64(const faidx_t *fai, const char *seq) |
902 | 0 | { |
903 | 0 | khint_t k = kh_get(s, fai->hash, seq); |
904 | 0 | if ( k == kh_end(fai->hash) ) return -1; |
905 | 0 | return kh_val(fai->hash, k).len; |
906 | 0 | } |
907 | | |
908 | | int faidx_seq_len(const faidx_t *fai, const char *seq) |
909 | 0 | { |
910 | 0 | hts_pos_t len = faidx_seq_len64(fai, seq); |
911 | 0 | return len < INT_MAX ? len : INT_MAX; |
912 | 0 | } |
913 | | |
914 | | static int faidx_adjust_position(const faidx_t *fai, int end_adjust, |
915 | | faidx1_t *val_out, const char *c_name, |
916 | | hts_pos_t *p_beg_i, hts_pos_t *p_end_i, |
917 | 0 | hts_pos_t *len) { |
918 | 0 | khiter_t iter; |
919 | 0 | faidx1_t *val; |
920 | | |
921 | | // Adjust position |
922 | 0 | iter = kh_get(s, fai->hash, c_name); |
923 | |
|
924 | 0 | if (iter == kh_end(fai->hash)) { |
925 | 0 | if (len) |
926 | 0 | *len = -2; |
927 | 0 | hts_log_error("The sequence \"%s\" was not found", c_name); |
928 | 0 | return 1; |
929 | 0 | } |
930 | | |
931 | 0 | val = &kh_value(fai->hash, iter); |
932 | |
|
933 | 0 | if (val_out) |
934 | 0 | *val_out = *val; |
935 | |
|
936 | 0 | if(*p_end_i < *p_beg_i) |
937 | 0 | *p_beg_i = *p_end_i; |
938 | |
|
939 | 0 | if(*p_beg_i < 0) |
940 | 0 | *p_beg_i = 0; |
941 | 0 | else if(val->len <= *p_beg_i) |
942 | 0 | *p_beg_i = val->len; |
943 | |
|
944 | 0 | if(*p_end_i < 0) |
945 | 0 | *p_end_i = 0; |
946 | 0 | else if(val->len <= *p_end_i) |
947 | 0 | *p_end_i = val->len - end_adjust; |
948 | |
|
949 | 0 | return 0; |
950 | 0 | } |
951 | | |
952 | | int fai_adjust_region(const faidx_t *fai, int tid, |
953 | | hts_pos_t *beg, hts_pos_t *end) |
954 | 0 | { |
955 | 0 | hts_pos_t orig_beg, orig_end; |
956 | |
|
957 | 0 | if (!fai || !beg || !end || tid < 0 || tid >= fai->n) |
958 | 0 | return -1; |
959 | | |
960 | 0 | orig_beg = *beg; |
961 | 0 | orig_end = *end; |
962 | 0 | if (faidx_adjust_position(fai, 0, NULL, fai->name[tid], beg, end, NULL) != 0) { |
963 | 0 | hts_log_error("Inconsistent faidx internal state - couldn't find \"%s\"", |
964 | 0 | fai->name[tid]); |
965 | 0 | return -1; |
966 | 0 | } |
967 | | |
968 | 0 | return ((orig_beg != *beg ? 1 : 0) | |
969 | 0 | (orig_end != *end && orig_end < HTS_POS_MAX ? 2 : 0)); |
970 | 0 | } |
971 | | |
972 | | char *faidx_fetch_seq64(const faidx_t *fai, const char *c_name, hts_pos_t p_beg_i, hts_pos_t p_end_i, hts_pos_t *len) |
973 | 0 | { |
974 | 0 | faidx1_t val; |
975 | | |
976 | | // Adjust position |
977 | 0 | if (faidx_adjust_position(fai, 1, &val, c_name, &p_beg_i, &p_end_i, len)) { |
978 | 0 | return NULL; |
979 | 0 | } |
980 | | |
981 | | // Now retrieve the sequence |
982 | 0 | return fai_retrieve(fai, &val, val.seq_offset, p_beg_i, p_end_i + 1, len); |
983 | 0 | } |
984 | | |
985 | | char *faidx_fetch_seq(const faidx_t *fai, const char *c_name, int p_beg_i, int p_end_i, int *len) |
986 | 0 | { |
987 | 0 | hts_pos_t len64; |
988 | 0 | char *ret = faidx_fetch_seq64(fai, c_name, p_beg_i, p_end_i, &len64); |
989 | 0 | *len = len64 < INT_MAX ? len64 : INT_MAX; // trunc |
990 | 0 | return ret; |
991 | 0 | } |
992 | | |
993 | | char *faidx_fetch_qual64(const faidx_t *fai, const char *c_name, hts_pos_t p_beg_i, hts_pos_t p_end_i, hts_pos_t *len) |
994 | 0 | { |
995 | 0 | faidx1_t val; |
996 | | |
997 | | // Adjust position |
998 | 0 | if (faidx_adjust_position(fai, 1, &val, c_name, &p_beg_i, &p_end_i, len)) { |
999 | 0 | return NULL; |
1000 | 0 | } |
1001 | | |
1002 | | // Now retrieve the sequence |
1003 | 0 | return fai_retrieve(fai, &val, val.qual_offset, p_beg_i, p_end_i + 1, len); |
1004 | 0 | } |
1005 | | |
1006 | | char *faidx_fetch_qual(const faidx_t *fai, const char *c_name, int p_beg_i, int p_end_i, int *len) |
1007 | 0 | { |
1008 | 0 | hts_pos_t len64; |
1009 | 0 | char *ret = faidx_fetch_qual64(fai, c_name, p_beg_i, p_end_i, &len64); |
1010 | 0 | *len = len64 < INT_MAX ? len64 : INT_MAX; // trunc |
1011 | 0 | return ret; |
1012 | 0 | } |
1013 | | |
1014 | | int faidx_has_seq(const faidx_t *fai, const char *seq) |
1015 | 0 | { |
1016 | 0 | khiter_t iter = kh_get(s, fai->hash, seq); |
1017 | 0 | if (iter == kh_end(fai->hash)) return 0; |
1018 | 0 | return 1; |
1019 | 0 | } |
1020 | | |
1021 | | const char *fai_parse_region(const faidx_t *fai, const char *s, |
1022 | | int *tid, hts_pos_t *beg, hts_pos_t *end, |
1023 | | int flags) |
1024 | 0 | { |
1025 | 0 | return hts_parse_region(s, tid, beg, end, (hts_name2id_f)fai_name2id, (void *)fai, flags); |
1026 | 0 | } |
1027 | | |
1028 | 0 | void fai_set_cache_size(faidx_t *fai, int cache_size) { |
1029 | 0 | bgzf_set_cache_size(fai->bgzf, cache_size); |
1030 | 0 | } |
1031 | | |
1032 | | // Adds a thread pool to the underlying BGZF layer. |
1033 | 0 | int fai_thread_pool(faidx_t *fai, struct hts_tpool *pool, int qsize) { |
1034 | 0 | return bgzf_thread_pool(fai->bgzf, pool, qsize); |
1035 | 0 | } |
1036 | | |
1037 | 0 | char *fai_path(const char *fa) { |
1038 | 0 | char *fai = NULL; |
1039 | 0 | if (!fa) { |
1040 | 0 | hts_log_error("No reference file specified"); |
1041 | 0 | } else { |
1042 | 0 | char *fai_tmp = strstr(fa, HTS_IDX_DELIM); |
1043 | 0 | if (fai_tmp) { |
1044 | 0 | fai_tmp += strlen(HTS_IDX_DELIM); |
1045 | 0 | fai = strdup(fai_tmp); |
1046 | 0 | if (!fai) |
1047 | 0 | hts_log_error("Failed to allocate memory"); |
1048 | 0 | } else { |
1049 | 0 | if (hisremote(fa)) { |
1050 | 0 | fai = hts_idx_locatefn(fa, ".fai"); // get the remote fai file name, if any, but do not download the file |
1051 | 0 | if (!fai) |
1052 | 0 | hts_log_error("Failed to locate index file for remote reference file '%s'", fa); |
1053 | 0 | } else{ |
1054 | 0 | if (hts_idx_check_local(fa, HTS_FMT_FAI, &fai) == 0 && fai) { |
1055 | 0 | if (fai_build3(fa, fai, NULL) == -1) { // create local fai file by indexing local fasta |
1056 | 0 | hts_log_error("Failed to build index file for reference file '%s'", fa); |
1057 | 0 | free(fai); |
1058 | 0 | fai = NULL; |
1059 | 0 | } |
1060 | 0 | } |
1061 | 0 | } |
1062 | 0 | } |
1063 | 0 | } |
1064 | |
|
1065 | 0 | return fai; |
1066 | 0 | } |