/src/htslib/hts_internal.h
Line | Count | Source |
1 | | /* hts_internal.h -- internal functions; not part of the public API. |
2 | | |
3 | | Copyright (C) 2015-2016, 2018-2020 Genome Research Ltd. |
4 | | |
5 | | Permission is hereby granted, free of charge, to any person obtaining a copy |
6 | | of this software and associated documentation files (the "Software"), to deal |
7 | | in the Software without restriction, including without limitation the rights |
8 | | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
9 | | copies of the Software, and to permit persons to whom the Software is |
10 | | furnished to do so, subject to the following conditions: |
11 | | |
12 | | The above copyright notice and this permission notice shall be included in |
13 | | all copies or substantial portions of the Software. |
14 | | |
15 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
16 | | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17 | | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
18 | | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
19 | | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
20 | | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
21 | | DEALINGS IN THE SOFTWARE. */ |
22 | | |
23 | | #ifndef HTSLIB_HTS_INTERNAL_H |
24 | | #define HTSLIB_HTS_INTERNAL_H |
25 | | |
26 | | #include <stddef.h> |
27 | | #include <ctype.h> |
28 | | #include <time.h> |
29 | | |
30 | | #include "htslib/hts.h" |
31 | | #include "textutils_internal.h" |
32 | | |
33 | 12.9k | #define HTS_MAX_EXT_LEN 9 |
34 | | |
35 | | #ifdef __cplusplus |
36 | | extern "C" { |
37 | | #endif |
38 | | |
39 | | struct hFILE; |
40 | | |
41 | | struct hts_json_token { |
42 | | char type; ///< Token type |
43 | | char *str; ///< Value as a C string (filled in for all token types) |
44 | | // TODO Add other fields to fill in for particular data types, e.g. |
45 | | // int inum; |
46 | | // float fnum; |
47 | | }; |
48 | | |
49 | | struct cram_fd; |
50 | | |
51 | | /* |
52 | | * Adjust CSI index parameters to support max_len_in bases |
53 | | * |
54 | | * @param max_len_in Maximum position to be indexed |
55 | | * @param min_shift_[in,out] min_shift parameter |
56 | | * @param n_lvls_[in,out] n_lvls parameter |
57 | | * |
58 | | * Adjusts *n_lvls_ (preferred) or *min_shift_ so that the resulting values |
59 | | * can be passed to hts_idx_init(, HTS_FMT_CSI, ...) in order to make an |
60 | | * index that can store positions up to max_len_in bases. |
61 | | */ |
62 | | void hts_adjust_csi_settings(int64_t max_len_in, int *min_shift_, int *n_lvls_); |
63 | | |
64 | | /* |
65 | | * Check the existence of a local index file using part of the alignment file name. |
66 | | * The order is alignment.bam.csi, alignment.csi, alignment.bam.bai, alignment.bai |
67 | | * @param fn - pointer to the file name |
68 | | * @param fnidx - pointer to the index file name placeholder |
69 | | * @return 1 for success, 0 for failure |
70 | | */ |
71 | | int hts_idx_check_local(const char *fn, int fmt, char **fnidx); |
72 | | |
73 | | // Retrieve the name of the index file and also download it, if it is remote |
74 | | char *hts_idx_getfn(const char *fn, const char *ext); |
75 | | |
76 | | // Retrieve the name of the index file, but do not download it, if it is remote |
77 | | char *hts_idx_locatefn(const char *fn, const char *ext); |
78 | | |
79 | | // Used for on-the-fly indexing. See the comments in hts.c. |
80 | | void hts_idx_amend_last(hts_idx_t *idx, uint64_t offset); |
81 | | |
82 | | int hts_idx_fmt(hts_idx_t *idx); |
83 | | |
84 | | // Internal interface to save on-the-fly indexes. The index file handle |
85 | | // is kept open so hts_close() can close if after writing out the EOF |
86 | | // block for its own file. |
87 | | int hts_idx_save_but_not_close(hts_idx_t *idx, const char *fnidx, int fmt); |
88 | | |
89 | | // Construct a unique filename based on fname and open it. |
90 | | struct hFILE *hts_open_tmpfile(const char *fname, const char *mode, kstring_t *tmpname); |
91 | | |
92 | | // Check that index is capable of storing items in range beg..end |
93 | | int hts_idx_check_range(hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end); |
94 | | |
95 | | // The CRAM implementation stores the loaded index within the cram_fd rather |
96 | | // than separately as is done elsewhere in htslib. So if p is a pointer to |
97 | | // an hts_idx_t with p->fmt == HTS_FMT_CRAI, then it actually points to an |
98 | | // hts_cram_idx_t and should be cast accordingly. |
99 | | typedef struct hts_cram_idx_t { |
100 | | int fmt; |
101 | | struct cram_fd *cram; |
102 | | } hts_cram_idx_t; |
103 | | |
104 | | // Determine whether the string's contents appear to be UTF-16-encoded text. |
105 | | // Returns 1 if they are, 2 if there is also a BOM, or 0 otherwise. |
106 | | int hts_is_utf16_text(const kstring_t *str); |
107 | | |
108 | | // Entry point to hFILE_multipart backend. |
109 | | struct hFILE *hopen_htsget_redirect(struct hFILE *hfile, const char *mode); |
110 | | |
111 | | struct hts_path_itr { |
112 | | kstring_t path, entry; |
113 | | void *dirv; // DIR * privately |
114 | | const char *pathdir, *prefix, *suffix; |
115 | | size_t prefix_len, suffix_len, entry_dir_l; |
116 | | }; |
117 | | |
118 | | void hts_path_itr_setup(struct hts_path_itr *itr, const char *path, |
119 | | const char *builtin_path, const char *prefix, size_t prefix_len, |
120 | | const char *suffix, size_t suffix_len); |
121 | | |
122 | | const char *hts_path_itr_next(struct hts_path_itr *itr); |
123 | | |
124 | | typedef void plugin_void_func(void); |
125 | | plugin_void_func *load_plugin(void **pluginp, const char *filename, const char *symbol); |
126 | | void *plugin_sym(void *plugin, const char *name, const char **errmsg); |
127 | | plugin_void_func *plugin_func(void *plugin, const char *name, const char **errmsg); |
128 | | void close_plugin(void *plugin); |
129 | | const char *hts_plugin_path(void); |
130 | | |
131 | | /* |
132 | | * Buffers up arguments to hts_idx_push for later use, once we've written all bar |
133 | | * this block. This is necessary when multiple blocks are in flight (threading). |
134 | | * |
135 | | * Returns 0 on success, |
136 | | * -1 on failure |
137 | | */ |
138 | | int bgzf_idx_push(BGZF *fp, hts_idx_t *hidx, int tid, hts_pos_t beg, hts_pos_t end, uint64_t offset, int is_mapped); |
139 | | |
140 | | static inline int find_file_extension(const char *fn, char ext_out[static HTS_MAX_EXT_LEN]) |
141 | 12.9k | { |
142 | 12.9k | const char *delim = fn ? strstr(fn, HTS_IDX_DELIM) : NULL, *ext; |
143 | 12.9k | if (!fn) return -1; |
144 | 12.9k | if (!delim) delim = fn + strlen(fn); |
145 | 64.9k | for (ext = delim; ext > fn && *ext != '.' && *ext != '/'; --ext) {} |
146 | 12.9k | if (*ext == '.' && ext > fn && |
147 | 0 | ((delim - ext == 3 && ext[1] == 'g' && ext[2] == 'z') || // permit .sam.gz as a valid file extension |
148 | 0 | (delim - ext == 4 && ext[1] == 'b' && ext[2] == 'g' && ext[3] == 'z'))) // permit .vcf.bgz as a valid file extension |
149 | 0 | { |
150 | 0 | for (ext--; ext > fn && *ext != '.' && *ext != '/'; --ext) {} |
151 | 0 | } |
152 | 12.9k | if (*ext != '.' || delim - ext > HTS_MAX_EXT_LEN || delim - ext < 3) |
153 | 12.9k | return -1; |
154 | 0 | memcpy(ext_out, ext + 1, delim - ext - 1); |
155 | 0 | ext_out[delim - ext - 1] = '\0'; |
156 | 0 | return 0; |
157 | 12.9k | } Unexecuted instantiation: hfile.c:find_file_extension hts.c:find_file_extension Line | Count | Source | 141 | 12.9k | { | 142 | 12.9k | const char *delim = fn ? strstr(fn, HTS_IDX_DELIM) : NULL, *ext; | 143 | 12.9k | if (!fn) return -1; | 144 | 12.9k | if (!delim) delim = fn + strlen(fn); | 145 | 64.9k | for (ext = delim; ext > fn && *ext != '.' && *ext != '/'; --ext) {} | 146 | 12.9k | if (*ext == '.' && ext > fn && | 147 | 0 | ((delim - ext == 3 && ext[1] == 'g' && ext[2] == 'z') || // permit .sam.gz as a valid file extension | 148 | 0 | (delim - ext == 4 && ext[1] == 'b' && ext[2] == 'g' && ext[3] == 'z'))) // permit .vcf.bgz as a valid file extension | 149 | 0 | { | 150 | 0 | for (ext--; ext > fn && *ext != '.' && *ext != '/'; --ext) {} | 151 | 0 | } | 152 | 12.9k | if (*ext != '.' || delim - ext > HTS_MAX_EXT_LEN || delim - ext < 3) | 153 | 12.9k | return -1; | 154 | 0 | memcpy(ext_out, ext + 1, delim - ext - 1); | 155 | 0 | ext_out[delim - ext - 1] = '\0'; | 156 | 0 | return 0; | 157 | 12.9k | } |
Unexecuted instantiation: multipart.c:find_file_extension Unexecuted instantiation: sam.c:find_file_extension Unexecuted instantiation: textutils.c:find_file_extension Unexecuted instantiation: vcf.c:find_file_extension Unexecuted instantiation: cram_index.c:find_file_extension Unexecuted instantiation: cram_io.c:find_file_extension Unexecuted instantiation: bgzf.c:find_file_extension Unexecuted instantiation: faidx.c:find_file_extension Unexecuted instantiation: tbx.c:find_file_extension |
158 | | |
159 | | static inline int hts_usleep(long long usec) |
160 | 0 | { |
161 | 0 | struct timespec req = { usec / 1000000, (usec % 1000000) * 1000 }; |
162 | 0 | return nanosleep(&req, NULL); |
163 | 0 | } Unexecuted instantiation: hfile.c:hts_usleep Unexecuted instantiation: hts.c:hts_usleep Unexecuted instantiation: multipart.c:hts_usleep Unexecuted instantiation: sam.c:hts_usleep Unexecuted instantiation: textutils.c:hts_usleep Unexecuted instantiation: vcf.c:hts_usleep Unexecuted instantiation: cram_index.c:hts_usleep Unexecuted instantiation: cram_io.c:hts_usleep Unexecuted instantiation: bgzf.c:hts_usleep Unexecuted instantiation: faidx.c:hts_usleep Unexecuted instantiation: tbx.c:hts_usleep |
164 | | |
165 | | /*! |
166 | | @abstract Is SVLEN the reference length for a VCF ALT allele? |
167 | | @param alt ALT allele |
168 | | @param size Length of @p alt; -1 if not known |
169 | | @return 1 if yes; 0 if not. |
170 | | |
171 | | This is used when reading VCF and in tabix to check if SVLEN should be taken |
172 | | into account when working out the reference length. It should if the |
173 | | ALT allele is a symbolic one of type CNV, DEL, DUP or INV, plus |
174 | | sub-types like <CNV:TR> or <DEL:ME>. |
175 | | |
176 | | @p alt does not have to be NUL-terminated, but if not @p size should be |
177 | | greater than of equal to zero. If @p is less than zero, @p alt must be |
178 | | NUL-terminated. |
179 | | */ |
180 | | |
181 | | static inline int svlen_on_ref_for_vcf_alt(const char *alt, int32_t size) |
182 | 997 | { |
183 | 997 | size_t sz; |
184 | 997 | if (*alt != '<') // Check if ALT is symbolic |
185 | 0 | return 0; |
186 | 997 | sz = size >= 0 ? (size_t) size : strlen(alt); |
187 | 997 | if (sz < 5) // Reject if not long enough |
188 | 780 | return 0; |
189 | 217 | if (alt[4] != '>' && alt[4] != ':') // Reject if too long |
190 | 170 | return 0; |
191 | 47 | if (memcmp(alt, "<CNV", 4) != 0 // Copy-number variation |
192 | 38 | && memcmp(alt, "<DEL", 4) != 0 // Deletion |
193 | 36 | && memcmp(alt, "<DUP", 4) != 0 // Duplication |
194 | 23 | && memcmp(alt, "<INV", 4) != 0) // Inversion |
195 | 23 | return 0; |
196 | 24 | return alt[sz - 1] == '>' ? 1 : 0; // Check symbolic allele ends correctly |
197 | 47 | } Unexecuted instantiation: hfile.c:svlen_on_ref_for_vcf_alt Unexecuted instantiation: hts.c:svlen_on_ref_for_vcf_alt Unexecuted instantiation: multipart.c:svlen_on_ref_for_vcf_alt Unexecuted instantiation: sam.c:svlen_on_ref_for_vcf_alt Unexecuted instantiation: textutils.c:svlen_on_ref_for_vcf_alt vcf.c:svlen_on_ref_for_vcf_alt Line | Count | Source | 182 | 997 | { | 183 | 997 | size_t sz; | 184 | 997 | if (*alt != '<') // Check if ALT is symbolic | 185 | 0 | return 0; | 186 | 997 | sz = size >= 0 ? (size_t) size : strlen(alt); | 187 | 997 | if (sz < 5) // Reject if not long enough | 188 | 780 | return 0; | 189 | 217 | if (alt[4] != '>' && alt[4] != ':') // Reject if too long | 190 | 170 | return 0; | 191 | 47 | if (memcmp(alt, "<CNV", 4) != 0 // Copy-number variation | 192 | 38 | && memcmp(alt, "<DEL", 4) != 0 // Deletion | 193 | 36 | && memcmp(alt, "<DUP", 4) != 0 // Duplication | 194 | 23 | && memcmp(alt, "<INV", 4) != 0) // Inversion | 195 | 23 | return 0; | 196 | 24 | return alt[sz - 1] == '>' ? 1 : 0; // Check symbolic allele ends correctly | 197 | 47 | } |
Unexecuted instantiation: cram_index.c:svlen_on_ref_for_vcf_alt Unexecuted instantiation: cram_io.c:svlen_on_ref_for_vcf_alt Unexecuted instantiation: bgzf.c:svlen_on_ref_for_vcf_alt Unexecuted instantiation: faidx.c:svlen_on_ref_for_vcf_alt Unexecuted instantiation: tbx.c:svlen_on_ref_for_vcf_alt |
198 | | |
199 | | #ifdef __cplusplus |
200 | | } |
201 | | #endif |
202 | | |
203 | | #endif |