/src/htslib/textutils_internal.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* textutils_internal.h -- non-bioinformatics utility routines for text etc. |
2 | | |
3 | | Copyright (C) 2016,2018-2020 Genome Research Ltd. |
4 | | |
5 | | Author: John Marshall <jm18@sanger.ac.uk> |
6 | | |
7 | | Permission is hereby granted, free of charge, to any person obtaining a copy |
8 | | of this software and associated documentation files (the "Software"), to deal |
9 | | in the Software without restriction, including without limitation the rights |
10 | | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
11 | | copies of the Software, and to permit persons to whom the Software is |
12 | | furnished to do so, subject to the following conditions: |
13 | | |
14 | | The above copyright notice and this permission notice shall be included in |
15 | | all copies or substantial portions of the Software. |
16 | | |
17 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
18 | | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
19 | | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
20 | | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
21 | | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
22 | | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
23 | | DEALINGS IN THE SOFTWARE. */ |
24 | | |
25 | | #ifndef HTSLIB_TEXTUTILS_INTERNAL_H |
26 | | #define HTSLIB_TEXTUTILS_INTERNAL_H |
27 | | |
28 | | /* N.B. These interfaces may be used by plug-ins */ |
29 | | |
30 | | #include <ctype.h> |
31 | | #include <stdlib.h> |
32 | | #include "htslib/kstring.h" |
33 | | |
34 | | #ifdef __cplusplus |
35 | | extern "C" { |
36 | | #endif |
37 | | |
38 | | /// Decode percent-encoded (URL-encoded) text |
39 | | /** On input, _dest_ should be a buffer at least the same size as _s_, |
40 | | and may be equal to _s_ to decode in place. On output, _dest_ will be |
41 | | NUL-terminated and the number of characters written (not including the |
42 | | NUL) is stored in _destlen_. |
43 | | */ |
44 | | int hts_decode_percent(char *dest, size_t *destlen, const char *s); |
45 | | |
46 | | /// Return decoded data length given length of base64-encoded text |
47 | | /** This gives an upper bound, as it overestimates by a byte or two when |
48 | | the encoded text ends with (possibly omitted) `=` padding characters. |
49 | | */ |
50 | | size_t hts_base64_decoded_length(size_t len); |
51 | | |
52 | | /// Decode base64-encoded data |
53 | | /** On input, _dest_ should be a sufficient buffer (see `hts_base64_length()`), |
54 | | and may be equal to _s_ to decode in place. On output, the number of |
55 | | bytes written is stored in _destlen_. |
56 | | */ |
57 | | int hts_decode_base64(char *dest, size_t *destlen, const char *s); |
58 | | |
59 | | /// Token structure returned by JSON lexing functions |
60 | | /** Structure is defined in hts_internal.h |
61 | | */ |
62 | | |
63 | | typedef struct hts_json_token hts_json_token; |
64 | | |
65 | | /// Allocate an empty JSON token structure, for use with hts_json_* functions |
66 | | /** @return An empty token on success; NULL on failure |
67 | | */ |
68 | | HTSLIB_EXPORT |
69 | | hts_json_token *hts_json_alloc_token(void); |
70 | | |
71 | | /// Free a JSON token |
72 | | HTSLIB_EXPORT |
73 | | void hts_json_free_token(hts_json_token *token); |
74 | | |
75 | | /// Accessor function to get JSON token type |
76 | | /** @param token Pointer to JSON token |
77 | | @return Character indicating the token type |
78 | | |
79 | | Token types correspond to scalar JSON values and selected punctuation |
80 | | as follows: |
81 | | - `s` string |
82 | | - `n` number |
83 | | - `b` boolean literal |
84 | | - `.` null literal |
85 | | - `{`, `}`, `[`, `]` object and array delimiters |
86 | | - `?` lexing error |
87 | | - `!` other errors (e.g. out of memory) |
88 | | - `\0` terminator at end of input |
89 | | */ |
90 | | HTSLIB_EXPORT |
91 | | char hts_json_token_type(hts_json_token *token); |
92 | | |
93 | | /// Accessor function to get JSON token in string form |
94 | | /** @param token Pointer to JSON token |
95 | | @return String representation of the JSON token; NULL if unset |
96 | | |
97 | | If the token was parsed from a string using hts_json_snext(), the return value |
98 | | will point into the string passed as the first parameter to hts_json_snext(). |
99 | | If the token was parsed from a file using hts_json_fnext(), the return value |
100 | | will point at the kstring_t buffer passed as the third parameter to |
101 | | hts_json_fnext(). In that case, the value will only be valid until the |
102 | | next call to hts_json_fnext(). |
103 | | */ |
104 | | HTSLIB_EXPORT |
105 | | char *hts_json_token_str(hts_json_token *token); |
106 | | |
107 | | /// Read one JSON token from a string |
108 | | /** @param str The input C string |
109 | | @param state The input string state |
110 | | @param token On return, filled in with the token read |
111 | | @return The type of the token read |
112 | | |
113 | | On return, `token->str` points into the supplied input string, which |
114 | | is modified by having token-terminating characters overwritten as NULs. |
115 | | The `state` argument records the current position within `str` after each |
116 | | `hts_json_snext()` call, and should be set to 0 before the first call. |
117 | | */ |
118 | | HTSLIB_EXPORT |
119 | | char hts_json_snext(char *str, size_t *state, hts_json_token *token); |
120 | | |
121 | | /// Read and discard a complete JSON value from a string |
122 | | /** @param str The input C string |
123 | | @param state The input string state, as per `hts_json_snext()` |
124 | | @param type If the first token of the value to be discarded has already |
125 | | been read, provide its type; otherwise `'\0'` |
126 | | @return One of `v` (success), `\0` (end of string), and `?` (lexing error) |
127 | | |
128 | | Skips a complete JSON value, which may be a single token or an entire object |
129 | | or array. |
130 | | */ |
131 | | HTSLIB_EXPORT |
132 | | char hts_json_sskip_value(char *str, size_t *state, char type); |
133 | | |
134 | | struct hFILE; |
135 | | |
136 | | /// Read one JSON token from a file |
137 | | /** @param fp The file stream |
138 | | @param token On return, filled in with the token read |
139 | | @param kstr Buffer used to store the token string returned |
140 | | @return The type of the token read |
141 | | |
142 | | The `kstr` buffer is used to store the string value of the token read, |
143 | | so `token->str` is only valid until the next time `hts_json_fnext()` is |
144 | | called with the same `kstr` argument. |
145 | | */ |
146 | | HTSLIB_EXPORT |
147 | | char hts_json_fnext(struct hFILE *fp, hts_json_token *token, kstring_t *kstr); |
148 | | |
149 | | /// Read and discard a complete JSON value from a file |
150 | | /** @param fp The file stream |
151 | | @param type If the first token of the value to be discarded has already |
152 | | been read, provide its type; otherwise `'\0'` |
153 | | @return One of `v` (success), `\0` (EOF), and `?` (lexing error) |
154 | | |
155 | | Skips a complete JSON value, which may be a single token or an entire object |
156 | | or array. |
157 | | */ |
158 | | HTSLIB_EXPORT |
159 | | char hts_json_fskip_value(struct hFILE *fp, char type); |
160 | | |
161 | | // The <ctype.h> functions operate on ints such as are returned by fgetc(), |
162 | | // i.e., characters represented as unsigned-char-valued ints, or EOF. |
163 | | // To operate on plain chars (and to avoid warnings on some platforms), |
164 | | // technically one must cast to unsigned char everywhere (see CERT STR37-C) |
165 | | // or less painfully use these *_c() functions that operate on plain chars |
166 | | // (but not EOF, which must be considered separately where it is applicable). |
167 | | // TODO We may eventually wish to implement these functions directly without |
168 | | // using their <ctype.h> equivalents, and thus make them immune to locales. |
169 | 251k | static inline int isalnum_c(char c) { return isalnum((unsigned char) c); } Unexecuted instantiation: header.c:isalnum_c Line | Count | Source | 169 | 15.7k | static inline int isalnum_c(char c) { return isalnum((unsigned char) c); } |
Unexecuted instantiation: hts.c:isalnum_c Unexecuted instantiation: hts_expr.c:isalnum_c Unexecuted instantiation: multipart.c:isalnum_c Unexecuted instantiation: sam.c:isalnum_c Unexecuted instantiation: textutils.c:isalnum_c Line | Count | Source | 169 | 235k | static inline int isalnum_c(char c) { return isalnum((unsigned char) c); } |
Unexecuted instantiation: cram_encode.c:isalnum_c Unexecuted instantiation: cram_index.c:isalnum_c Unexecuted instantiation: cram_io.c:isalnum_c Unexecuted instantiation: hfile_libcurl.c:isalnum_c Unexecuted instantiation: hfile_gcs.c:isalnum_c Unexecuted instantiation: hfile_s3.c:isalnum_c Unexecuted instantiation: hfile_s3_write.c:isalnum_c Unexecuted instantiation: bgzf.c:isalnum_c Unexecuted instantiation: faidx.c:isalnum_c Unexecuted instantiation: tbx.c:isalnum_c |
170 | 5.52M | static inline int isalpha_c(char c) { return isalpha((unsigned char) c); } Line | Count | Source | 170 | 5.47M | static inline int isalpha_c(char c) { return isalpha((unsigned char) c); } |
Unexecuted instantiation: hfile.c:isalpha_c Unexecuted instantiation: hts.c:isalpha_c Unexecuted instantiation: hts_expr.c:isalpha_c Unexecuted instantiation: multipart.c:isalpha_c Unexecuted instantiation: sam.c:isalpha_c Unexecuted instantiation: textutils.c:isalpha_c Line | Count | Source | 170 | 53.1k | static inline int isalpha_c(char c) { return isalpha((unsigned char) c); } |
Unexecuted instantiation: cram_encode.c:isalpha_c Unexecuted instantiation: cram_index.c:isalpha_c Unexecuted instantiation: cram_io.c:isalpha_c Unexecuted instantiation: hfile_libcurl.c:isalpha_c Unexecuted instantiation: hfile_gcs.c:isalpha_c Unexecuted instantiation: hfile_s3.c:isalpha_c Unexecuted instantiation: hfile_s3_write.c:isalpha_c Unexecuted instantiation: bgzf.c:isalpha_c Unexecuted instantiation: faidx.c:isalpha_c Unexecuted instantiation: tbx.c:isalpha_c |
171 | 66.5M | static inline int isdigit_c(char c) { return isdigit((unsigned char) c); } Unexecuted instantiation: header.c:isdigit_c Unexecuted instantiation: hfile.c:isdigit_c Line | Count | Source | 171 | 375 | static inline int isdigit_c(char c) { return isdigit((unsigned char) c); } |
Unexecuted instantiation: hts_expr.c:isdigit_c Unexecuted instantiation: multipart.c:isdigit_c Line | Count | Source | 171 | 66.5M | static inline int isdigit_c(char c) { return isdigit((unsigned char) c); } |
Unexecuted instantiation: textutils.c:isdigit_c Unexecuted instantiation: vcf.c:isdigit_c Unexecuted instantiation: cram_encode.c:isdigit_c Unexecuted instantiation: cram_index.c:isdigit_c Unexecuted instantiation: cram_io.c:isdigit_c Unexecuted instantiation: hfile_libcurl.c:isdigit_c Unexecuted instantiation: hfile_gcs.c:isdigit_c Unexecuted instantiation: hfile_s3.c:isdigit_c Unexecuted instantiation: hfile_s3_write.c:isdigit_c Unexecuted instantiation: bgzf.c:isdigit_c Unexecuted instantiation: faidx.c:isdigit_c Unexecuted instantiation: tbx.c:isdigit_c |
172 | 0 | static inline int isgraph_c(char c) { return isgraph((unsigned char) c); } Unexecuted instantiation: header.c:isgraph_c Unexecuted instantiation: hfile.c:isgraph_c Unexecuted instantiation: hts.c:isgraph_c Unexecuted instantiation: hts_expr.c:isgraph_c Unexecuted instantiation: multipart.c:isgraph_c Unexecuted instantiation: sam.c:isgraph_c Unexecuted instantiation: textutils.c:isgraph_c Unexecuted instantiation: vcf.c:isgraph_c Unexecuted instantiation: cram_encode.c:isgraph_c Unexecuted instantiation: cram_index.c:isgraph_c Unexecuted instantiation: cram_io.c:isgraph_c Unexecuted instantiation: hfile_libcurl.c:isgraph_c Unexecuted instantiation: hfile_gcs.c:isgraph_c Unexecuted instantiation: hfile_s3.c:isgraph_c Unexecuted instantiation: hfile_s3_write.c:isgraph_c Unexecuted instantiation: bgzf.c:isgraph_c Unexecuted instantiation: faidx.c:isgraph_c Unexecuted instantiation: tbx.c:isgraph_c |
173 | 0 | static inline int islower_c(char c) { return islower((unsigned char) c); } Unexecuted instantiation: header.c:islower_c Unexecuted instantiation: hfile.c:islower_c Unexecuted instantiation: hts.c:islower_c Unexecuted instantiation: hts_expr.c:islower_c Unexecuted instantiation: multipart.c:islower_c Unexecuted instantiation: sam.c:islower_c Unexecuted instantiation: textutils.c:islower_c Unexecuted instantiation: vcf.c:islower_c Unexecuted instantiation: cram_encode.c:islower_c Unexecuted instantiation: cram_index.c:islower_c Unexecuted instantiation: cram_io.c:islower_c Unexecuted instantiation: hfile_libcurl.c:islower_c Unexecuted instantiation: hfile_gcs.c:islower_c Unexecuted instantiation: hfile_s3.c:islower_c Unexecuted instantiation: hfile_s3_write.c:islower_c Unexecuted instantiation: bgzf.c:islower_c Unexecuted instantiation: faidx.c:islower_c Unexecuted instantiation: tbx.c:islower_c |
174 | 121k | static inline int isprint_c(char c) { return isprint((unsigned char) c); } Unexecuted instantiation: header.c:isprint_c Unexecuted instantiation: hfile.c:isprint_c Unexecuted instantiation: hts.c:isprint_c Unexecuted instantiation: hts_expr.c:isprint_c Unexecuted instantiation: multipart.c:isprint_c Unexecuted instantiation: sam.c:isprint_c Line | Count | Source | 174 | 121k | static inline int isprint_c(char c) { return isprint((unsigned char) c); } |
Unexecuted instantiation: vcf.c:isprint_c Unexecuted instantiation: cram_encode.c:isprint_c Unexecuted instantiation: cram_index.c:isprint_c Unexecuted instantiation: cram_io.c:isprint_c Unexecuted instantiation: hfile_libcurl.c:isprint_c Unexecuted instantiation: hfile_gcs.c:isprint_c Unexecuted instantiation: hfile_s3.c:isprint_c Unexecuted instantiation: hfile_s3_write.c:isprint_c Unexecuted instantiation: bgzf.c:isprint_c Unexecuted instantiation: faidx.c:isprint_c Unexecuted instantiation: tbx.c:isprint_c |
175 | 0 | static inline int ispunct_c(char c) { return ispunct((unsigned char) c); } Unexecuted instantiation: header.c:ispunct_c Unexecuted instantiation: hfile.c:ispunct_c Unexecuted instantiation: hts.c:ispunct_c Unexecuted instantiation: hts_expr.c:ispunct_c Unexecuted instantiation: multipart.c:ispunct_c Unexecuted instantiation: sam.c:ispunct_c Unexecuted instantiation: textutils.c:ispunct_c Unexecuted instantiation: vcf.c:ispunct_c Unexecuted instantiation: cram_encode.c:ispunct_c Unexecuted instantiation: cram_index.c:ispunct_c Unexecuted instantiation: cram_io.c:ispunct_c Unexecuted instantiation: hfile_libcurl.c:ispunct_c Unexecuted instantiation: hfile_gcs.c:ispunct_c Unexecuted instantiation: hfile_s3.c:ispunct_c Unexecuted instantiation: hfile_s3_write.c:ispunct_c Unexecuted instantiation: bgzf.c:ispunct_c Unexecuted instantiation: faidx.c:ispunct_c Unexecuted instantiation: tbx.c:ispunct_c |
176 | 16.6M | static inline int isspace_c(char c) { return isspace((unsigned char) c); } Unexecuted instantiation: header.c:isspace_c Unexecuted instantiation: hfile.c:isspace_c Line | Count | Source | 176 | 123 | static inline int isspace_c(char c) { return isspace((unsigned char) c); } |
Unexecuted instantiation: hts_expr.c:isspace_c Unexecuted instantiation: multipart.c:isspace_c Line | Count | Source | 176 | 16.6M | static inline int isspace_c(char c) { return isspace((unsigned char) c); } |
Unexecuted instantiation: textutils.c:isspace_c Line | Count | Source | 176 | 44.9k | static inline int isspace_c(char c) { return isspace((unsigned char) c); } |
Unexecuted instantiation: cram_encode.c:isspace_c Unexecuted instantiation: cram_index.c:isspace_c Unexecuted instantiation: cram_io.c:isspace_c Unexecuted instantiation: hfile_libcurl.c:isspace_c Unexecuted instantiation: hfile_gcs.c:isspace_c Unexecuted instantiation: hfile_s3.c:isspace_c Unexecuted instantiation: hfile_s3_write.c:isspace_c Unexecuted instantiation: bgzf.c:isspace_c Unexecuted instantiation: faidx.c:isspace_c Unexecuted instantiation: tbx.c:isspace_c |
177 | 0 | static inline int isupper_c(char c) { return isupper((unsigned char) c); } Unexecuted instantiation: header.c:isupper_c Unexecuted instantiation: hfile.c:isupper_c Unexecuted instantiation: hts.c:isupper_c Unexecuted instantiation: hts_expr.c:isupper_c Unexecuted instantiation: multipart.c:isupper_c Unexecuted instantiation: sam.c:isupper_c Unexecuted instantiation: textutils.c:isupper_c Unexecuted instantiation: vcf.c:isupper_c Unexecuted instantiation: cram_encode.c:isupper_c Unexecuted instantiation: cram_index.c:isupper_c Unexecuted instantiation: cram_io.c:isupper_c Unexecuted instantiation: hfile_libcurl.c:isupper_c Unexecuted instantiation: hfile_gcs.c:isupper_c Unexecuted instantiation: hfile_s3.c:isupper_c Unexecuted instantiation: hfile_s3_write.c:isupper_c Unexecuted instantiation: bgzf.c:isupper_c Unexecuted instantiation: faidx.c:isupper_c Unexecuted instantiation: tbx.c:isupper_c |
178 | 0 | static inline int isxdigit_c(char c) { return isxdigit((unsigned char) c); } Unexecuted instantiation: header.c:isxdigit_c Unexecuted instantiation: hfile.c:isxdigit_c Unexecuted instantiation: hts.c:isxdigit_c Unexecuted instantiation: hts_expr.c:isxdigit_c Unexecuted instantiation: multipart.c:isxdigit_c Unexecuted instantiation: sam.c:isxdigit_c Unexecuted instantiation: textutils.c:isxdigit_c Unexecuted instantiation: vcf.c:isxdigit_c Unexecuted instantiation: cram_encode.c:isxdigit_c Unexecuted instantiation: cram_index.c:isxdigit_c Unexecuted instantiation: cram_io.c:isxdigit_c Unexecuted instantiation: hfile_libcurl.c:isxdigit_c Unexecuted instantiation: hfile_gcs.c:isxdigit_c Unexecuted instantiation: hfile_s3.c:isxdigit_c Unexecuted instantiation: hfile_s3_write.c:isxdigit_c Unexecuted instantiation: bgzf.c:isxdigit_c Unexecuted instantiation: faidx.c:isxdigit_c Unexecuted instantiation: tbx.c:isxdigit_c |
179 | 10.7k | static inline char tolower_c(char c) { return tolower((unsigned char) c); } Unexecuted instantiation: header.c:tolower_c Line | Count | Source | 179 | 10.7k | static inline char tolower_c(char c) { return tolower((unsigned char) c); } |
Unexecuted instantiation: hts.c:tolower_c Unexecuted instantiation: hts_expr.c:tolower_c Unexecuted instantiation: multipart.c:tolower_c Unexecuted instantiation: sam.c:tolower_c Unexecuted instantiation: textutils.c:tolower_c Unexecuted instantiation: vcf.c:tolower_c Unexecuted instantiation: cram_encode.c:tolower_c Unexecuted instantiation: cram_index.c:tolower_c Unexecuted instantiation: cram_io.c:tolower_c Unexecuted instantiation: hfile_libcurl.c:tolower_c Unexecuted instantiation: hfile_gcs.c:tolower_c Unexecuted instantiation: hfile_s3.c:tolower_c Unexecuted instantiation: hfile_s3_write.c:tolower_c Unexecuted instantiation: bgzf.c:tolower_c Unexecuted instantiation: faidx.c:tolower_c Unexecuted instantiation: tbx.c:tolower_c |
180 | 0 | static inline char toupper_c(char c) { return toupper((unsigned char) c); } Unexecuted instantiation: header.c:toupper_c Unexecuted instantiation: hfile.c:toupper_c Unexecuted instantiation: hts.c:toupper_c Unexecuted instantiation: hts_expr.c:toupper_c Unexecuted instantiation: multipart.c:toupper_c Unexecuted instantiation: sam.c:toupper_c Unexecuted instantiation: textutils.c:toupper_c Unexecuted instantiation: vcf.c:toupper_c Unexecuted instantiation: cram_encode.c:toupper_c Unexecuted instantiation: cram_index.c:toupper_c Unexecuted instantiation: cram_io.c:toupper_c Unexecuted instantiation: hfile_libcurl.c:toupper_c Unexecuted instantiation: hfile_gcs.c:toupper_c Unexecuted instantiation: hfile_s3.c:toupper_c Unexecuted instantiation: hfile_s3_write.c:toupper_c Unexecuted instantiation: bgzf.c:toupper_c Unexecuted instantiation: faidx.c:toupper_c Unexecuted instantiation: tbx.c:toupper_c |
181 | | |
182 | | /// Copy possibly malicious text data to a buffer |
183 | | /** @param buf Destination buffer |
184 | | @param buflen Size of the destination buffer (>= 4; >= 6 when quotes used) |
185 | | @param quote Quote character (or '\0' for no quoting of the output) |
186 | | @param s String to be copied |
187 | | @param len Length of the input string, or SIZE_MAX to copy until '\0' |
188 | | @return The destination buffer, @a buf. |
189 | | |
190 | | Copies the source text string (escaping any unprintable characters) to the |
191 | | destination buffer. The destination buffer will always be NUL-terminated; |
192 | | the text will be truncated (and "..." appended) if necessary to make it fit. |
193 | | */ |
194 | | const char *hts_strprint(char *buf, size_t buflen, char quote, |
195 | | const char *s, size_t len); |
196 | | |
197 | | // Faster replacements for strtol, for use when parsing lots of numbers. |
198 | | // Note that these only handle base 10 and do not skip leading whitespace |
199 | | |
200 | | /// Convert a string to a signed integer, with overflow detection |
201 | | /** @param[in] in Input string |
202 | | @param[out] end Returned end pointer |
203 | | @param[in] bits Bits available for the converted value |
204 | | @param[out] failed Location of overflow flag |
205 | | @return String value converted to an int64_t |
206 | | |
207 | | Converts a signed decimal string to an int64_t. The string should |
208 | | consist of an optional '+' or '-' sign followed by one or more of |
209 | | the digits 0 to 9. The output value will be limited to fit in the |
210 | | given number of bits (including the sign bit). If the value is too big, |
211 | | the largest possible value will be returned and *failed will be set to 1. |
212 | | |
213 | | The address of the first character following the converted number will |
214 | | be stored in *end. |
215 | | |
216 | | Both end and failed must be non-NULL. |
217 | | */ |
218 | | static inline int64_t hts_str2int(const char *in, char **end, int bits, |
219 | 472M | int *failed) { |
220 | 472M | uint64_t n = 0, limit = (1ULL << (bits - 1)) - 1; |
221 | 472M | uint32_t fast = (bits - 1) * 1000 / 3322 + 1; // log(10)/log(2) ~= 3.322 |
222 | 472M | const unsigned char *v = (const unsigned char *) in; |
223 | 472M | const unsigned int ascii_zero = '0'; // Prevents conversion to signed |
224 | 472M | unsigned char d; |
225 | 472M | int neg = 1; |
226 | | |
227 | 472M | switch(*v) { |
228 | 5.07M | case '-': |
229 | 5.07M | neg=-1; |
230 | 5.07M | limit++; /* fall through */ |
231 | 5.65M | case '+': |
232 | 5.65M | v++; |
233 | 5.65M | break; |
234 | 467M | default: |
235 | 467M | break; |
236 | 472M | } |
237 | | |
238 | 495M | while (--fast && *v>='0' && *v<='9') |
239 | 22.1M | n = n*10 + *v++ - ascii_zero; |
240 | | |
241 | 472M | if (!fast) { |
242 | 649k | uint64_t limit_d_10 = limit / 10; |
243 | 649k | uint64_t limit_m_10 = limit - 10 * limit_d_10; |
244 | 753k | while ((d = *v - ascii_zero) < 10) { |
245 | 438k | if (n < limit_d_10 || (n == limit_d_10 && d <= limit_m_10)) { |
246 | 103k | n = n*10 + d; |
247 | 103k | v++; |
248 | 334k | } else { |
249 | 2.55M | do { v++; } while (*v - ascii_zero < 10); |
250 | 334k | n = limit; |
251 | 334k | *failed = 1; |
252 | 334k | break; |
253 | 334k | } |
254 | 438k | } |
255 | 649k | } |
256 | | |
257 | 472M | *end = (char *)v; |
258 | | |
259 | 472M | return (n && neg < 0) ? -((int64_t) (n - 1)) - 1 : (int64_t) n; |
260 | 472M | } Unexecuted instantiation: header.c:hts_str2int Unexecuted instantiation: hfile.c:hts_str2int Unexecuted instantiation: hts.c:hts_str2int Unexecuted instantiation: hts_expr.c:hts_str2int Unexecuted instantiation: multipart.c:hts_str2int Line | Count | Source | 219 | 231M | int *failed) { | 220 | 231M | uint64_t n = 0, limit = (1ULL << (bits - 1)) - 1; | 221 | 231M | uint32_t fast = (bits - 1) * 1000 / 3322 + 1; // log(10)/log(2) ~= 3.322 | 222 | 231M | const unsigned char *v = (const unsigned char *) in; | 223 | 231M | const unsigned int ascii_zero = '0'; // Prevents conversion to signed | 224 | 231M | unsigned char d; | 225 | 231M | int neg = 1; | 226 | | | 227 | 231M | switch(*v) { | 228 | 3.40M | case '-': | 229 | 3.40M | neg=-1; | 230 | 3.40M | limit++; /* fall through */ | 231 | 3.98M | case '+': | 232 | 3.98M | v++; | 233 | 3.98M | break; | 234 | 227M | default: | 235 | 227M | break; | 236 | 231M | } | 237 | | | 238 | 241M | while (--fast && *v>='0' && *v<='9') | 239 | 10.0M | n = n*10 + *v++ - ascii_zero; | 240 | | | 241 | 231M | if (!fast) { | 242 | 614k | uint64_t limit_d_10 = limit / 10; | 243 | 614k | uint64_t limit_m_10 = limit - 10 * limit_d_10; | 244 | 682k | while ((d = *v - ascii_zero) < 10) { | 245 | 368k | if (n < limit_d_10 || (n == limit_d_10 && d <= limit_m_10)) { | 246 | 68.5k | n = n*10 + d; | 247 | 68.5k | v++; | 248 | 299k | } else { | 249 | 594k | do { v++; } while (*v - ascii_zero < 10); | 250 | 299k | n = limit; | 251 | 299k | *failed = 1; | 252 | 299k | break; | 253 | 299k | } | 254 | 368k | } | 255 | 614k | } | 256 | | | 257 | 231M | *end = (char *)v; | 258 | | | 259 | 231M | return (n && neg < 0) ? -((int64_t) (n - 1)) - 1 : (int64_t) n; | 260 | 231M | } |
Unexecuted instantiation: textutils.c:hts_str2int Line | Count | Source | 219 | 241M | int *failed) { | 220 | 241M | uint64_t n = 0, limit = (1ULL << (bits - 1)) - 1; | 221 | 241M | uint32_t fast = (bits - 1) * 1000 / 3322 + 1; // log(10)/log(2) ~= 3.322 | 222 | 241M | const unsigned char *v = (const unsigned char *) in; | 223 | 241M | const unsigned int ascii_zero = '0'; // Prevents conversion to signed | 224 | 241M | unsigned char d; | 225 | 241M | int neg = 1; | 226 | | | 227 | 241M | switch(*v) { | 228 | 1.66M | case '-': | 229 | 1.66M | neg=-1; | 230 | 1.66M | limit++; /* fall through */ | 231 | 1.66M | case '+': | 232 | 1.66M | v++; | 233 | 1.66M | break; | 234 | 240M | default: | 235 | 240M | break; | 236 | 241M | } | 237 | | | 238 | 253M | while (--fast && *v>='0' && *v<='9') | 239 | 12.1M | n = n*10 + *v++ - ascii_zero; | 240 | | | 241 | 241M | if (!fast) { | 242 | 35.3k | uint64_t limit_d_10 = limit / 10; | 243 | 35.3k | uint64_t limit_m_10 = limit - 10 * limit_d_10; | 244 | 70.7k | while ((d = *v - ascii_zero) < 10) { | 245 | 70.4k | if (n < limit_d_10 || (n == limit_d_10 && d <= limit_m_10)) { | 246 | 35.3k | n = n*10 + d; | 247 | 35.3k | v++; | 248 | 35.3k | } else { | 249 | 1.95M | do { v++; } while (*v - ascii_zero < 10); | 250 | 35.1k | n = limit; | 251 | 35.1k | *failed = 1; | 252 | 35.1k | break; | 253 | 35.1k | } | 254 | 70.4k | } | 255 | 35.3k | } | 256 | | | 257 | 241M | *end = (char *)v; | 258 | | | 259 | 241M | return (n && neg < 0) ? -((int64_t) (n - 1)) - 1 : (int64_t) n; | 260 | 241M | } |
Unexecuted instantiation: cram_encode.c:hts_str2int Unexecuted instantiation: cram_index.c:hts_str2int Unexecuted instantiation: cram_io.c:hts_str2int Unexecuted instantiation: hfile_libcurl.c:hts_str2int Unexecuted instantiation: hfile_gcs.c:hts_str2int Unexecuted instantiation: hfile_s3.c:hts_str2int Unexecuted instantiation: hfile_s3_write.c:hts_str2int Unexecuted instantiation: bgzf.c:hts_str2int Unexecuted instantiation: faidx.c:hts_str2int Unexecuted instantiation: tbx.c:hts_str2int |
261 | | |
262 | | /// Convert a string to an unsigned integer, with overflow detection |
263 | | /** @param[in] in Input string |
264 | | @param[out] end Returned end pointer |
265 | | @param[in] bits Bits available for the converted value |
266 | | @param[out] failed Location of overflow flag |
267 | | @return String value converted to a uint64_t |
268 | | |
269 | | Converts an unsigned decimal string to a uint64_t. The string should |
270 | | consist of an optional '+' sign followed by one or more of the digits 0 |
271 | | to 9. The output value will be limited to fit in the given number of bits. |
272 | | If the value is too big, the largest possible value will be returned |
273 | | and *failed will be set to 1. |
274 | | |
275 | | The address of the first character following the converted number will |
276 | | be stored in *end. |
277 | | |
278 | | Both end and failed must be non-NULL. |
279 | | */ |
280 | | |
281 | | static inline uint64_t hts_str2uint(const char *in, char **end, int bits, |
282 | 39.7M | int *failed) { |
283 | 39.7M | uint64_t n = 0, limit = (bits < 64 ? (1ULL << bits) : 0) - 1; |
284 | 39.7M | const unsigned char *v = (const unsigned char *) in; |
285 | 39.7M | const unsigned int ascii_zero = '0'; // Prevents conversion to signed |
286 | 39.7M | uint32_t fast = bits * 1000 / 3322 + 1; // log(10)/log(2) ~= 3.322 |
287 | 39.7M | unsigned char d; |
288 | | |
289 | 39.7M | if (*v == '+') |
290 | 93.9k | v++; |
291 | | |
292 | 42.2M | while (--fast && *v>='0' && *v<='9') |
293 | 2.42M | n = n*10 + *v++ - ascii_zero; |
294 | | |
295 | 39.7M | if (!fast) { |
296 | 232k | uint64_t limit_d_10 = limit / 10; |
297 | 232k | uint64_t limit_m_10 = limit - 10 * limit_d_10; |
298 | 240k | while ((d = *v - ascii_zero) < 10) { |
299 | 234k | if (n < limit_d_10 || (n == limit_d_10 && d <= limit_m_10)) { |
300 | 8.46k | n = n*10 + d; |
301 | 8.46k | v++; |
302 | 225k | } else { |
303 | 825k | do { v++; } while (*v - ascii_zero < 10); |
304 | 225k | n = limit; |
305 | 225k | *failed = 1; |
306 | 225k | break; |
307 | 225k | } |
308 | 234k | } |
309 | 232k | } |
310 | | |
311 | 39.7M | *end = (char *)v; |
312 | 39.7M | return n; |
313 | 39.7M | } Unexecuted instantiation: header.c:hts_str2uint Unexecuted instantiation: hfile.c:hts_str2uint Unexecuted instantiation: hts.c:hts_str2uint Unexecuted instantiation: hts_expr.c:hts_str2uint Unexecuted instantiation: multipart.c:hts_str2uint Line | Count | Source | 282 | 39.7M | int *failed) { | 283 | 39.7M | uint64_t n = 0, limit = (bits < 64 ? (1ULL << bits) : 0) - 1; | 284 | 39.7M | const unsigned char *v = (const unsigned char *) in; | 285 | 39.7M | const unsigned int ascii_zero = '0'; // Prevents conversion to signed | 286 | 39.7M | uint32_t fast = bits * 1000 / 3322 + 1; // log(10)/log(2) ~= 3.322 | 287 | 39.7M | unsigned char d; | 288 | | | 289 | 39.7M | if (*v == '+') | 290 | 93.9k | v++; | 291 | | | 292 | 42.1M | while (--fast && *v>='0' && *v<='9') | 293 | 2.34M | n = n*10 + *v++ - ascii_zero; | 294 | | | 295 | 39.7M | if (!fast) { | 296 | 231k | uint64_t limit_d_10 = limit / 10; | 297 | 231k | uint64_t limit_m_10 = limit - 10 * limit_d_10; | 298 | 237k | while ((d = *v - ascii_zero) < 10) { | 299 | 231k | if (n < limit_d_10 || (n == limit_d_10 && d <= limit_m_10)) { | 300 | 5.90k | n = n*10 + d; | 301 | 5.90k | v++; | 302 | 225k | } else { | 303 | 824k | do { v++; } while (*v - ascii_zero < 10); | 304 | 225k | n = limit; | 305 | 225k | *failed = 1; | 306 | 225k | break; | 307 | 225k | } | 308 | 231k | } | 309 | 231k | } | 310 | | | 311 | 39.7M | *end = (char *)v; | 312 | 39.7M | return n; | 313 | 39.7M | } |
Unexecuted instantiation: textutils.c:hts_str2uint Line | Count | Source | 282 | 21.1k | int *failed) { | 283 | 21.1k | uint64_t n = 0, limit = (bits < 64 ? (1ULL << bits) : 0) - 1; | 284 | 21.1k | const unsigned char *v = (const unsigned char *) in; | 285 | 21.1k | const unsigned int ascii_zero = '0'; // Prevents conversion to signed | 286 | 21.1k | uint32_t fast = bits * 1000 / 3322 + 1; // log(10)/log(2) ~= 3.322 | 287 | 21.1k | unsigned char d; | 288 | | | 289 | 21.1k | if (*v == '+') | 290 | 25 | v++; | 291 | | | 292 | 104k | while (--fast && *v>='0' && *v<='9') | 293 | 83.0k | n = n*10 + *v++ - ascii_zero; | 294 | | | 295 | 21.1k | if (!fast) { | 296 | 711 | uint64_t limit_d_10 = limit / 10; | 297 | 711 | uint64_t limit_m_10 = limit - 10 * limit_d_10; | 298 | 3.26k | while ((d = *v - ascii_zero) < 10) { | 299 | 2.62k | if (n < limit_d_10 || (n == limit_d_10 && d <= limit_m_10)) { | 300 | 2.55k | n = n*10 + d; | 301 | 2.55k | v++; | 302 | 2.55k | } else { | 303 | 853 | do { v++; } while (*v - ascii_zero < 10); | 304 | 71 | n = limit; | 305 | 71 | *failed = 1; | 306 | 71 | break; | 307 | 71 | } | 308 | 2.62k | } | 309 | 711 | } | 310 | | | 311 | 21.1k | *end = (char *)v; | 312 | 21.1k | return n; | 313 | 21.1k | } |
Unexecuted instantiation: cram_encode.c:hts_str2uint Unexecuted instantiation: cram_index.c:hts_str2uint Unexecuted instantiation: cram_io.c:hts_str2uint Unexecuted instantiation: hfile_libcurl.c:hts_str2uint Unexecuted instantiation: hfile_gcs.c:hts_str2uint Unexecuted instantiation: hfile_s3.c:hts_str2uint Unexecuted instantiation: hfile_s3_write.c:hts_str2uint Unexecuted instantiation: bgzf.c:hts_str2uint Unexecuted instantiation: faidx.c:hts_str2uint Unexecuted instantiation: tbx.c:hts_str2uint |
314 | | |
315 | | /// Convert a string to a double, with overflow detection |
316 | | /** @param[in] in Input string |
317 | | @param[out] end Returned end pointer |
318 | | @param[out] failed Location of overflow flag |
319 | | @return String value converted to a double |
320 | | |
321 | | Converts a floating point value string to a double. The string should |
322 | | have the format [+-]?[0-9]*[.]?[0-9]* with at least one and no more than 15 |
323 | | digits. Strings that do not match (inf, nan, values with exponents) will |
324 | | be passed on to strtod() for processing. |
325 | | |
326 | | If the value is too big, the largest possible value will be returned; |
327 | | if it is too small to be represented in a double zero will be returned. |
328 | | In both cases errno will be set to ERANGE. |
329 | | |
330 | | If no characters could be converted, *failed will be set to 1. |
331 | | |
332 | | The address of the first character following the converted number will |
333 | | be stored in *end. |
334 | | |
335 | | Both end and failed must be non-NULL. |
336 | | */ |
337 | | |
338 | 63.2M | static inline double hts_str2dbl(const char *in, char **end, int *failed) { |
339 | 63.2M | uint64_t n = 0; |
340 | 63.2M | int max_len = 15; |
341 | 63.2M | const unsigned char *v = (const unsigned char *) in; |
342 | 63.2M | const unsigned int ascii_zero = '0'; // Prevents conversion to signed |
343 | 63.2M | int neg = 0, point = -1; |
344 | 63.2M | double d; |
345 | 63.2M | static double D[] = {1,1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, |
346 | 63.2M | 1e8, 1e9, 1e10,1e11,1e12,1e13,1e14,1e15, |
347 | 63.2M | 1e16,1e17,1e18,1e19,1e20}; |
348 | | |
349 | 63.2M | while (isspace(*v)) |
350 | 22.7k | v++; |
351 | | |
352 | 63.2M | if (*v == '-') { |
353 | 430k | neg = 1; |
354 | 430k | v++; |
355 | 62.8M | } else if (*v == '+') { |
356 | 748 | v++; |
357 | 748 | } |
358 | | |
359 | 63.2M | switch(*v) { |
360 | 295k | case '1': case '2': case '3': case '4': |
361 | 435k | case '5': case '6': case '7': case '8': case '9': |
362 | 435k | break; |
363 | | |
364 | 34.5k | case '0': |
365 | 34.5k | if (v[1] != 'x' && v[1] != 'X') break; |
366 | | // else fall through - hex number |
367 | | |
368 | 62.7M | default: |
369 | | // Non numbers, like NaN, Inf |
370 | 62.7M | d = strtod(in, end); |
371 | 62.7M | if (*end == in) |
372 | 62.7M | *failed = 1; |
373 | 62.7M | return d; |
374 | 63.2M | } |
375 | | |
376 | 507k | while (*v == '0') ++v; |
377 | | |
378 | 470k | const unsigned char *start = v; |
379 | | |
380 | 3.46M | while (--max_len && *v>='0' && *v<='9') |
381 | 2.99M | n = n*10 + *v++ - ascii_zero; |
382 | 470k | if (max_len && *v == '.') { |
383 | 718 | point = v - start; |
384 | 718 | v++; |
385 | 1.02k | while (--max_len && *v>='0' && *v<='9') |
386 | 305 | n = n*10 + *v++ - ascii_zero; |
387 | 718 | } |
388 | 470k | if (point < 0) |
389 | 469k | point = v - start; |
390 | | |
391 | | // Outside the scope of this quick and dirty parser. |
392 | 470k | if (!max_len || *v == 'e' || *v == 'E') { |
393 | 3.09k | d = strtod(in, end); |
394 | 3.09k | if (*end == in) |
395 | 0 | *failed = 1; |
396 | 3.09k | return d; |
397 | 3.09k | } |
398 | | |
399 | 467k | *end = (char *)v; |
400 | 467k | d = n / D[v - start - point]; |
401 | | |
402 | 467k | return neg ? -d : d; |
403 | 470k | } Unexecuted instantiation: header.c:hts_str2dbl Unexecuted instantiation: hfile.c:hts_str2dbl Unexecuted instantiation: hts.c:hts_str2dbl Unexecuted instantiation: hts_expr.c:hts_str2dbl Unexecuted instantiation: multipart.c:hts_str2dbl Unexecuted instantiation: sam.c:hts_str2dbl Unexecuted instantiation: textutils.c:hts_str2dbl Line | Count | Source | 338 | 63.2M | static inline double hts_str2dbl(const char *in, char **end, int *failed) { | 339 | 63.2M | uint64_t n = 0; | 340 | 63.2M | int max_len = 15; | 341 | 63.2M | const unsigned char *v = (const unsigned char *) in; | 342 | 63.2M | const unsigned int ascii_zero = '0'; // Prevents conversion to signed | 343 | 63.2M | int neg = 0, point = -1; | 344 | 63.2M | double d; | 345 | 63.2M | static double D[] = {1,1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, | 346 | 63.2M | 1e8, 1e9, 1e10,1e11,1e12,1e13,1e14,1e15, | 347 | 63.2M | 1e16,1e17,1e18,1e19,1e20}; | 348 | | | 349 | 63.2M | while (isspace(*v)) | 350 | 22.7k | v++; | 351 | | | 352 | 63.2M | if (*v == '-') { | 353 | 430k | neg = 1; | 354 | 430k | v++; | 355 | 62.8M | } else if (*v == '+') { | 356 | 748 | v++; | 357 | 748 | } | 358 | | | 359 | 63.2M | switch(*v) { | 360 | 295k | case '1': case '2': case '3': case '4': | 361 | 435k | case '5': case '6': case '7': case '8': case '9': | 362 | 435k | break; | 363 | | | 364 | 34.5k | case '0': | 365 | 34.5k | if (v[1] != 'x' && v[1] != 'X') break; | 366 | | // else fall through - hex number | 367 | | | 368 | 62.7M | default: | 369 | | // Non numbers, like NaN, Inf | 370 | 62.7M | d = strtod(in, end); | 371 | 62.7M | if (*end == in) | 372 | 62.7M | *failed = 1; | 373 | 62.7M | return d; | 374 | 63.2M | } | 375 | | | 376 | 507k | while (*v == '0') ++v; | 377 | | | 378 | 470k | const unsigned char *start = v; | 379 | | | 380 | 3.46M | while (--max_len && *v>='0' && *v<='9') | 381 | 2.99M | n = n*10 + *v++ - ascii_zero; | 382 | 470k | if (max_len && *v == '.') { | 383 | 718 | point = v - start; | 384 | 718 | v++; | 385 | 1.02k | while (--max_len && *v>='0' && *v<='9') | 386 | 305 | n = n*10 + *v++ - ascii_zero; | 387 | 718 | } | 388 | 470k | if (point < 0) | 389 | 469k | point = v - start; | 390 | | | 391 | | // Outside the scope of this quick and dirty parser. | 392 | 470k | if (!max_len || *v == 'e' || *v == 'E') { | 393 | 3.09k | d = strtod(in, end); | 394 | 3.09k | if (*end == in) | 395 | 0 | *failed = 1; | 396 | 3.09k | return d; | 397 | 3.09k | } | 398 | | | 399 | 467k | *end = (char *)v; | 400 | 467k | d = n / D[v - start - point]; | 401 | | | 402 | 467k | return neg ? -d : d; | 403 | 470k | } |
Unexecuted instantiation: cram_encode.c:hts_str2dbl Unexecuted instantiation: cram_index.c:hts_str2dbl Unexecuted instantiation: cram_io.c:hts_str2dbl Unexecuted instantiation: hfile_libcurl.c:hts_str2dbl Unexecuted instantiation: hfile_gcs.c:hts_str2dbl Unexecuted instantiation: hfile_s3.c:hts_str2dbl Unexecuted instantiation: hfile_s3_write.c:hts_str2dbl Unexecuted instantiation: bgzf.c:hts_str2dbl Unexecuted instantiation: faidx.c:hts_str2dbl Unexecuted instantiation: tbx.c:hts_str2dbl |
404 | | |
405 | | |
406 | | #ifdef __cplusplus |
407 | | } |
408 | | #endif |
409 | | |
410 | | #endif |