/src/htslib/hts_internal.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* hts_internal.h -- internal functions; not part of the public API. |
2 | | |
3 | | Copyright (C) 2015-2016, 2018-2020 Genome Research Ltd. |
4 | | |
5 | | Permission is hereby granted, free of charge, to any person obtaining a copy |
6 | | of this software and associated documentation files (the "Software"), to deal |
7 | | in the Software without restriction, including without limitation the rights |
8 | | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
9 | | copies of the Software, and to permit persons to whom the Software is |
10 | | furnished to do so, subject to the following conditions: |
11 | | |
12 | | The above copyright notice and this permission notice shall be included in |
13 | | all copies or substantial portions of the Software. |
14 | | |
15 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
16 | | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17 | | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
18 | | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
19 | | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
20 | | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
21 | | DEALINGS IN THE SOFTWARE. */ |
22 | | |
23 | | #ifndef HTSLIB_HTS_INTERNAL_H |
24 | | #define HTSLIB_HTS_INTERNAL_H |
25 | | |
26 | | #include <stddef.h> |
27 | | #include <ctype.h> |
28 | | #include <time.h> |
29 | | |
30 | | #include "htslib/hts.h" |
31 | | #include "textutils_internal.h" |
32 | | |
33 | 5.44k | #define HTS_MAX_EXT_LEN 9 |
34 | | |
35 | | #ifdef __cplusplus |
36 | | extern "C" { |
37 | | #endif |
38 | | |
39 | | struct hFILE; |
40 | | |
41 | | struct hts_json_token { |
42 | | char type; ///< Token type |
43 | | char *str; ///< Value as a C string (filled in for all token types) |
44 | | // TODO Add other fields to fill in for particular data types, e.g. |
45 | | // int inum; |
46 | | // float fnum; |
47 | | }; |
48 | | |
49 | | struct cram_fd; |
50 | | |
51 | | /* |
52 | | * Check the existence of a local index file using part of the alignment file name. |
53 | | * The order is alignment.bam.csi, alignment.csi, alignment.bam.bai, alignment.bai |
54 | | * @param fn - pointer to the file name |
55 | | * @param fnidx - pointer to the index file name placeholder |
56 | | * @return 1 for success, 0 for failure |
57 | | */ |
58 | | int hts_idx_check_local(const char *fn, int fmt, char **fnidx); |
59 | | |
60 | | // Retrieve the name of the index file and also download it, if it is remote |
61 | | char *hts_idx_getfn(const char *fn, const char *ext); |
62 | | |
63 | | // Retrieve the name of the index file, but do not download it, if it is remote |
64 | | char *hts_idx_locatefn(const char *fn, const char *ext); |
65 | | |
66 | | // Used for on-the-fly indexing. See the comments in hts.c. |
67 | | void hts_idx_amend_last(hts_idx_t *idx, uint64_t offset); |
68 | | |
69 | | int hts_idx_fmt(hts_idx_t *idx); |
70 | | |
71 | | // Internal interface to save on-the-fly indexes. The index file handle |
72 | | // is kept open so hts_close() can close if after writing out the EOF |
73 | | // block for its own file. |
74 | | int hts_idx_save_but_not_close(hts_idx_t *idx, const char *fnidx, int fmt); |
75 | | |
76 | | // Construct a unique filename based on fname and open it. |
77 | | struct hFILE *hts_open_tmpfile(const char *fname, const char *mode, kstring_t *tmpname); |
78 | | |
79 | | // Check that index is capable of storing items in range beg..end |
80 | | int hts_idx_check_range(hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end); |
81 | | |
82 | | // The CRAM implementation stores the loaded index within the cram_fd rather |
83 | | // than separately as is done elsewhere in htslib. So if p is a pointer to |
84 | | // an hts_idx_t with p->fmt == HTS_FMT_CRAI, then it actually points to an |
85 | | // hts_cram_idx_t and should be cast accordingly. |
86 | | typedef struct hts_cram_idx_t { |
87 | | int fmt; |
88 | | struct cram_fd *cram; |
89 | | } hts_cram_idx_t; |
90 | | |
91 | | // Determine whether the string's contents appear to be UTF-16-encoded text. |
92 | | // Returns 1 if they are, 2 if there is also a BOM, or 0 otherwise. |
93 | | int hts_is_utf16_text(const kstring_t *str); |
94 | | |
95 | | // Entry point to hFILE_multipart backend. |
96 | | struct hFILE *hopen_htsget_redirect(struct hFILE *hfile, const char *mode); |
97 | | |
98 | | struct hts_path_itr { |
99 | | kstring_t path, entry; |
100 | | void *dirv; // DIR * privately |
101 | | const char *pathdir, *prefix, *suffix; |
102 | | size_t prefix_len, suffix_len, entry_dir_l; |
103 | | }; |
104 | | |
105 | | void hts_path_itr_setup(struct hts_path_itr *itr, const char *path, |
106 | | const char *builtin_path, const char *prefix, size_t prefix_len, |
107 | | const char *suffix, size_t suffix_len); |
108 | | |
109 | | const char *hts_path_itr_next(struct hts_path_itr *itr); |
110 | | |
111 | | typedef void plugin_void_func(void); |
112 | | plugin_void_func *load_plugin(void **pluginp, const char *filename, const char *symbol); |
113 | | void *plugin_sym(void *plugin, const char *name, const char **errmsg); |
114 | | plugin_void_func *plugin_func(void *plugin, const char *name, const char **errmsg); |
115 | | void close_plugin(void *plugin); |
116 | | const char *hts_plugin_path(void); |
117 | | |
118 | | /* |
119 | | * Buffers up arguments to hts_idx_push for later use, once we've written all bar |
120 | | * this block. This is necessary when multiple blocks are in flight (threading). |
121 | | * |
122 | | * Returns 0 on success, |
123 | | * -1 on failure |
124 | | */ |
125 | | int bgzf_idx_push(BGZF *fp, hts_idx_t *hidx, int tid, hts_pos_t beg, hts_pos_t end, uint64_t offset, int is_mapped); |
126 | | |
127 | | static inline int find_file_extension(const char *fn, char ext_out[static HTS_MAX_EXT_LEN]) |
128 | 5.44k | { |
129 | 5.44k | const char *delim = fn ? strstr(fn, HTS_IDX_DELIM) : NULL, *ext; |
130 | 5.44k | if (!fn) return -1; |
131 | 5.44k | if (!delim) delim = fn + strlen(fn); |
132 | 27.2k | for (ext = delim; ext > fn && *ext != '.' && *ext != '/'; --ext) {} |
133 | 5.44k | if (*ext == '.' && ext > fn && |
134 | 5.44k | ((delim - ext == 3 && ext[1] == 'g' && ext[2] == 'z') || // permit .sam.gz as a valid file extension |
135 | 0 | (delim - ext == 4 && ext[1] == 'b' && ext[2] == 'g' && ext[3] == 'z'))) // permit .vcf.bgz as a valid file extension |
136 | 0 | { |
137 | 0 | for (ext--; ext > fn && *ext != '.' && *ext != '/'; --ext) {} |
138 | 0 | } |
139 | 5.44k | if (*ext != '.' || delim - ext > HTS_MAX_EXT_LEN || delim - ext < 3) |
140 | 5.44k | return -1; |
141 | 0 | memcpy(ext_out, ext + 1, delim - ext - 1); |
142 | 0 | ext_out[delim - ext - 1] = '\0'; |
143 | 0 | return 0; |
144 | 5.44k | } Unexecuted instantiation: hfile.c:find_file_extension hts.c:find_file_extension Line | Count | Source | 128 | 5.44k | { | 129 | 5.44k | const char *delim = fn ? strstr(fn, HTS_IDX_DELIM) : NULL, *ext; | 130 | 5.44k | if (!fn) return -1; | 131 | 5.44k | if (!delim) delim = fn + strlen(fn); | 132 | 27.2k | for (ext = delim; ext > fn && *ext != '.' && *ext != '/'; --ext) {} | 133 | 5.44k | if (*ext == '.' && ext > fn && | 134 | 5.44k | ((delim - ext == 3 && ext[1] == 'g' && ext[2] == 'z') || // permit .sam.gz as a valid file extension | 135 | 0 | (delim - ext == 4 && ext[1] == 'b' && ext[2] == 'g' && ext[3] == 'z'))) // permit .vcf.bgz as a valid file extension | 136 | 0 | { | 137 | 0 | for (ext--; ext > fn && *ext != '.' && *ext != '/'; --ext) {} | 138 | 0 | } | 139 | 5.44k | if (*ext != '.' || delim - ext > HTS_MAX_EXT_LEN || delim - ext < 3) | 140 | 5.44k | return -1; | 141 | 0 | memcpy(ext_out, ext + 1, delim - ext - 1); | 142 | 0 | ext_out[delim - ext - 1] = '\0'; | 143 | 0 | return 0; | 144 | 5.44k | } |
Unexecuted instantiation: multipart.c:find_file_extension Unexecuted instantiation: sam.c:find_file_extension Unexecuted instantiation: textutils.c:find_file_extension Unexecuted instantiation: vcf.c:find_file_extension Unexecuted instantiation: cram_index.c:find_file_extension Unexecuted instantiation: cram_io.c:find_file_extension Unexecuted instantiation: bgzf.c:find_file_extension Unexecuted instantiation: faidx.c:find_file_extension Unexecuted instantiation: tbx.c:find_file_extension |
145 | | |
146 | | static inline int hts_usleep(long long usec) |
147 | 0 | { |
148 | 0 | struct timespec req = { usec / 1000000, (usec % 1000000) * 1000 }; |
149 | 0 | return nanosleep(&req, NULL); |
150 | 0 | } Unexecuted instantiation: hfile.c:hts_usleep Unexecuted instantiation: hts.c:hts_usleep Unexecuted instantiation: multipart.c:hts_usleep Unexecuted instantiation: sam.c:hts_usleep Unexecuted instantiation: textutils.c:hts_usleep Unexecuted instantiation: vcf.c:hts_usleep Unexecuted instantiation: cram_index.c:hts_usleep Unexecuted instantiation: cram_io.c:hts_usleep Unexecuted instantiation: bgzf.c:hts_usleep Unexecuted instantiation: faidx.c:hts_usleep Unexecuted instantiation: tbx.c:hts_usleep |
151 | | |
152 | | /*! |
153 | | @abstract Is SVLEN the reference length for a VCF ALT allele? |
154 | | @param alt ALT allele |
155 | | @param size Length of @p alt; -1 if not known |
156 | | @return 1 if yes; 0 if not. |
157 | | |
158 | | This is used when reading VCF and in tabix to check if SVLEN should be taken |
159 | | into account when working out the reference length. It should if the |
160 | | ALT allele is a symbolic one of type CNV, DEL, DUP or INV, plus |
161 | | sub-types like <CNV:TR> or <DEL:ME>. |
162 | | |
163 | | @p alt does not have to be NUL-terminated, but if not @p size should be |
164 | | greater than of equal to zero. If @p is less than zero, @p alt must be |
165 | | NUL-terminated. |
166 | | */ |
167 | | |
168 | | static inline int svlen_on_ref_for_vcf_alt(const char *alt, int32_t size) |
169 | 589 | { |
170 | 589 | size_t sz; |
171 | 589 | if (*alt != '<') // Check if ALT is symbolic |
172 | 0 | return 0; |
173 | 589 | sz = size >= 0 ? (size_t) size : strlen(alt); |
174 | 589 | if (sz < 5) // Reject if not long enough |
175 | 475 | return 0; |
176 | 114 | if (alt[4] != '>' && alt[4] != ':') // Reject if too long |
177 | 96 | return 0; |
178 | 18 | if (memcmp(alt, "<CNV", 4) != 0 // Copy-number variation |
179 | 18 | && memcmp(alt, "<DEL", 4) != 0 // Deletion |
180 | 18 | && memcmp(alt, "<DUP", 4) != 0 // Duplication |
181 | 18 | && memcmp(alt, "<INV", 4) != 0) // Inversion |
182 | 18 | return 0; |
183 | 0 | return alt[sz - 1] == '>' ? 1 : 0; // Check symbolic allele ends correctly |
184 | 18 | } Unexecuted instantiation: hfile.c:svlen_on_ref_for_vcf_alt Unexecuted instantiation: hts.c:svlen_on_ref_for_vcf_alt Unexecuted instantiation: multipart.c:svlen_on_ref_for_vcf_alt Unexecuted instantiation: sam.c:svlen_on_ref_for_vcf_alt Unexecuted instantiation: textutils.c:svlen_on_ref_for_vcf_alt vcf.c:svlen_on_ref_for_vcf_alt Line | Count | Source | 169 | 589 | { | 170 | 589 | size_t sz; | 171 | 589 | if (*alt != '<') // Check if ALT is symbolic | 172 | 0 | return 0; | 173 | 589 | sz = size >= 0 ? (size_t) size : strlen(alt); | 174 | 589 | if (sz < 5) // Reject if not long enough | 175 | 475 | return 0; | 176 | 114 | if (alt[4] != '>' && alt[4] != ':') // Reject if too long | 177 | 96 | return 0; | 178 | 18 | if (memcmp(alt, "<CNV", 4) != 0 // Copy-number variation | 179 | 18 | && memcmp(alt, "<DEL", 4) != 0 // Deletion | 180 | 18 | && memcmp(alt, "<DUP", 4) != 0 // Duplication | 181 | 18 | && memcmp(alt, "<INV", 4) != 0) // Inversion | 182 | 18 | return 0; | 183 | 0 | return alt[sz - 1] == '>' ? 1 : 0; // Check symbolic allele ends correctly | 184 | 18 | } |
Unexecuted instantiation: cram_index.c:svlen_on_ref_for_vcf_alt Unexecuted instantiation: cram_io.c:svlen_on_ref_for_vcf_alt Unexecuted instantiation: bgzf.c:svlen_on_ref_for_vcf_alt Unexecuted instantiation: faidx.c:svlen_on_ref_for_vcf_alt Unexecuted instantiation: tbx.c:svlen_on_ref_for_vcf_alt |
185 | | |
186 | | #ifdef __cplusplus |
187 | | } |
188 | | #endif |
189 | | |
190 | | #endif |