Coverage Report

Created: 2023-01-17 06:24

/src/htslib/textutils_internal.h
Line
Count
Source (jump to first uncovered line)
1
/* textutils_internal.h -- non-bioinformatics utility routines for text etc.
2
3
   Copyright (C) 2016,2018-2020 Genome Research Ltd.
4
5
   Author: John Marshall <jm18@sanger.ac.uk>
6
7
Permission is hereby granted, free of charge, to any person obtaining a copy
8
of this software and associated documentation files (the "Software"), to deal
9
in the Software without restriction, including without limitation the rights
10
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
copies of the Software, and to permit persons to whom the Software is
12
furnished to do so, subject to the following conditions:
13
14
The above copyright notice and this permission notice shall be included in
15
all copies or substantial portions of the Software.
16
17
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23
DEALINGS IN THE SOFTWARE.  */
24
25
#ifndef HTSLIB_TEXTUTILS_INTERNAL_H
26
#define HTSLIB_TEXTUTILS_INTERNAL_H
27
28
/* N.B. These interfaces may be used by plug-ins */
29
30
#include <ctype.h>
31
#include <stdlib.h>
32
#include "htslib/kstring.h"
33
34
#ifdef __cplusplus
35
extern "C" {
36
#endif
37
38
/// Decode percent-encoded (URL-encoded) text
39
/** On input, _dest_ should be a buffer at least the same size as _s_,
40
    and may be equal to _s_ to decode in place.  On output, _dest_ will be
41
    NUL-terminated and the number of characters written (not including the
42
    NUL) is stored in _destlen_.
43
*/
44
int hts_decode_percent(char *dest, size_t *destlen, const char *s);
45
46
/// Return decoded data length given length of base64-encoded text
47
/** This gives an upper bound, as it overestimates by a byte or two when
48
    the encoded text ends with (possibly omitted) `=` padding characters.
49
*/
50
size_t hts_base64_decoded_length(size_t len);
51
52
/// Decode base64-encoded data
53
/** On input, _dest_ should be a sufficient buffer (see `hts_base64_length()`),
54
    and may be equal to _s_ to decode in place.  On output, the number of
55
    bytes written is stored in _destlen_.
56
*/
57
int hts_decode_base64(char *dest, size_t *destlen, const char *s);
58
59
/// Token structure returned by JSON lexing functions
60
/** Structure is defined in hts_internal.h
61
 */
62
63
typedef struct hts_json_token hts_json_token;
64
65
/// Allocate an empty JSON token structure, for use with hts_json_* functions
66
/** @return An empty token on success; NULL on failure
67
 */
68
HTSLIB_EXPORT
69
hts_json_token *hts_json_alloc_token(void);
70
71
/// Free a JSON token
72
HTSLIB_EXPORT
73
void hts_json_free_token(hts_json_token *token);
74
75
/// Accessor function to get JSON token type
76
/** @param  token Pointer to JSON token
77
    @return Character indicating the token type
78
79
Token types correspond to scalar JSON values and selected punctuation
80
as follows:
81
  - `s` string
82
  - `n` number
83
  - `b` boolean literal
84
  - `.` null literal
85
  - `{`, `}`, `[`, `]` object and array delimiters
86
  - `?` lexing error
87
  - `!` other errors (e.g. out of memory)
88
  - `\0` terminator at end of input
89
*/
90
HTSLIB_EXPORT
91
char hts_json_token_type(hts_json_token *token);
92
93
/// Accessor function to get JSON token in string form
94
/** @param  token Pointer to JSON token
95
    @return String representation of the JSON token; NULL if unset
96
97
If the token was parsed from a string using hts_json_snext(), the return value
98
will point into the string passed as the first parameter to hts_json_snext().
99
If the token was parsed from a file using hts_json_fnext(), the return value
100
will point at the kstring_t buffer passed as the third parameter to
101
hts_json_fnext().  In that case, the value will only be valid until the
102
next call to hts_json_fnext().
103
 */
104
HTSLIB_EXPORT
105
char *hts_json_token_str(hts_json_token *token);
106
107
/// Read one JSON token from a string
108
/** @param str    The input C string
109
    @param state  The input string state
110
    @param token  On return, filled in with the token read
111
    @return  The type of the token read
112
113
On return, `token->str` points into the supplied input string, which
114
is modified by having token-terminating characters overwritten as NULs.
115
The `state` argument records the current position within `str` after each
116
`hts_json_snext()` call, and should be set to 0 before the first call.
117
*/
118
HTSLIB_EXPORT
119
char hts_json_snext(char *str, size_t *state, hts_json_token *token);
120
121
/// Read and discard a complete JSON value from a string
122
/** @param str    The input C string
123
    @param state  The input string state, as per `hts_json_snext()`
124
    @param type   If the first token of the value to be discarded has already
125
                  been read, provide its type; otherwise `'\0'`
126
    @return  One of `v` (success), `\0` (end of string), and `?` (lexing error)
127
128
Skips a complete JSON value, which may be a single token or an entire object
129
or array.
130
*/
131
HTSLIB_EXPORT
132
char hts_json_sskip_value(char *str, size_t *state, char type);
133
134
struct hFILE;
135
136
/// Read one JSON token from a file
137
/** @param fp     The file stream
138
    @param token  On return, filled in with the token read
139
    @param kstr   Buffer used to store the token string returned
140
    @return  The type of the token read
141
142
The `kstr` buffer is used to store the string value of the token read,
143
so `token->str` is only valid until the next time `hts_json_fnext()` is
144
called with the same `kstr` argument.
145
*/
146
HTSLIB_EXPORT
147
char hts_json_fnext(struct hFILE *fp, hts_json_token *token, kstring_t *kstr);
148
149
/// Read and discard a complete JSON value from a file
150
/** @param fp    The file stream
151
    @param type  If the first token of the value to be discarded has already
152
                 been read, provide its type; otherwise `'\0'`
153
    @return  One of `v` (success), `\0` (EOF), and `?` (lexing error)
154
155
Skips a complete JSON value, which may be a single token or an entire object
156
or array.
157
*/
158
HTSLIB_EXPORT
159
char hts_json_fskip_value(struct hFILE *fp, char type);
160
161
// The <ctype.h> functions operate on ints such as are returned by fgetc(),
162
// i.e., characters represented as unsigned-char-valued ints, or EOF.
163
// To operate on plain chars (and to avoid warnings on some platforms),
164
// technically one must cast to unsigned char everywhere (see CERT STR37-C)
165
// or less painfully use these *_c() functions that operate on plain chars
166
// (but not EOF, which must be considered separately where it is applicable).
167
// TODO We may eventually wish to implement these functions directly without
168
// using their <ctype.h> equivalents, and thus make them immune to locales.
169
251k
static inline int isalnum_c(char c) { return isalnum((unsigned char) c); }
Unexecuted instantiation: header.c:isalnum_c
hfile.c:isalnum_c
Line
Count
Source
169
15.7k
static inline int isalnum_c(char c) { return isalnum((unsigned char) c); }
Unexecuted instantiation: hts.c:isalnum_c
Unexecuted instantiation: hts_expr.c:isalnum_c
Unexecuted instantiation: multipart.c:isalnum_c
Unexecuted instantiation: sam.c:isalnum_c
Unexecuted instantiation: textutils.c:isalnum_c
vcf.c:isalnum_c
Line
Count
Source
169
235k
static inline int isalnum_c(char c) { return isalnum((unsigned char) c); }
Unexecuted instantiation: cram_encode.c:isalnum_c
Unexecuted instantiation: cram_index.c:isalnum_c
Unexecuted instantiation: cram_io.c:isalnum_c
Unexecuted instantiation: hfile_libcurl.c:isalnum_c
Unexecuted instantiation: hfile_gcs.c:isalnum_c
Unexecuted instantiation: hfile_s3.c:isalnum_c
Unexecuted instantiation: hfile_s3_write.c:isalnum_c
Unexecuted instantiation: bgzf.c:isalnum_c
Unexecuted instantiation: faidx.c:isalnum_c
Unexecuted instantiation: tbx.c:isalnum_c
170
5.52M
static inline int isalpha_c(char c) { return isalpha((unsigned char) c); }
header.c:isalpha_c
Line
Count
Source
170
5.47M
static inline int isalpha_c(char c) { return isalpha((unsigned char) c); }
Unexecuted instantiation: hfile.c:isalpha_c
Unexecuted instantiation: hts.c:isalpha_c
Unexecuted instantiation: hts_expr.c:isalpha_c
Unexecuted instantiation: multipart.c:isalpha_c
Unexecuted instantiation: sam.c:isalpha_c
Unexecuted instantiation: textutils.c:isalpha_c
vcf.c:isalpha_c
Line
Count
Source
170
53.1k
static inline int isalpha_c(char c) { return isalpha((unsigned char) c); }
Unexecuted instantiation: cram_encode.c:isalpha_c
Unexecuted instantiation: cram_index.c:isalpha_c
Unexecuted instantiation: cram_io.c:isalpha_c
Unexecuted instantiation: hfile_libcurl.c:isalpha_c
Unexecuted instantiation: hfile_gcs.c:isalpha_c
Unexecuted instantiation: hfile_s3.c:isalpha_c
Unexecuted instantiation: hfile_s3_write.c:isalpha_c
Unexecuted instantiation: bgzf.c:isalpha_c
Unexecuted instantiation: faidx.c:isalpha_c
Unexecuted instantiation: tbx.c:isalpha_c
171
66.5M
static inline int isdigit_c(char c) { return isdigit((unsigned char) c); }
Unexecuted instantiation: header.c:isdigit_c
Unexecuted instantiation: hfile.c:isdigit_c
hts.c:isdigit_c
Line
Count
Source
171
375
static inline int isdigit_c(char c) { return isdigit((unsigned char) c); }
Unexecuted instantiation: hts_expr.c:isdigit_c
Unexecuted instantiation: multipart.c:isdigit_c
sam.c:isdigit_c
Line
Count
Source
171
66.5M
static inline int isdigit_c(char c) { return isdigit((unsigned char) c); }
Unexecuted instantiation: textutils.c:isdigit_c
Unexecuted instantiation: vcf.c:isdigit_c
Unexecuted instantiation: cram_encode.c:isdigit_c
Unexecuted instantiation: cram_index.c:isdigit_c
Unexecuted instantiation: cram_io.c:isdigit_c
Unexecuted instantiation: hfile_libcurl.c:isdigit_c
Unexecuted instantiation: hfile_gcs.c:isdigit_c
Unexecuted instantiation: hfile_s3.c:isdigit_c
Unexecuted instantiation: hfile_s3_write.c:isdigit_c
Unexecuted instantiation: bgzf.c:isdigit_c
Unexecuted instantiation: faidx.c:isdigit_c
Unexecuted instantiation: tbx.c:isdigit_c
172
0
static inline int isgraph_c(char c) { return isgraph((unsigned char) c); }
Unexecuted instantiation: header.c:isgraph_c
Unexecuted instantiation: hfile.c:isgraph_c
Unexecuted instantiation: hts.c:isgraph_c
Unexecuted instantiation: hts_expr.c:isgraph_c
Unexecuted instantiation: multipart.c:isgraph_c
Unexecuted instantiation: sam.c:isgraph_c
Unexecuted instantiation: textutils.c:isgraph_c
Unexecuted instantiation: vcf.c:isgraph_c
Unexecuted instantiation: cram_encode.c:isgraph_c
Unexecuted instantiation: cram_index.c:isgraph_c
Unexecuted instantiation: cram_io.c:isgraph_c
Unexecuted instantiation: hfile_libcurl.c:isgraph_c
Unexecuted instantiation: hfile_gcs.c:isgraph_c
Unexecuted instantiation: hfile_s3.c:isgraph_c
Unexecuted instantiation: hfile_s3_write.c:isgraph_c
Unexecuted instantiation: bgzf.c:isgraph_c
Unexecuted instantiation: faidx.c:isgraph_c
Unexecuted instantiation: tbx.c:isgraph_c
173
0
static inline int islower_c(char c) { return islower((unsigned char) c); }
Unexecuted instantiation: header.c:islower_c
Unexecuted instantiation: hfile.c:islower_c
Unexecuted instantiation: hts.c:islower_c
Unexecuted instantiation: hts_expr.c:islower_c
Unexecuted instantiation: multipart.c:islower_c
Unexecuted instantiation: sam.c:islower_c
Unexecuted instantiation: textutils.c:islower_c
Unexecuted instantiation: vcf.c:islower_c
Unexecuted instantiation: cram_encode.c:islower_c
Unexecuted instantiation: cram_index.c:islower_c
Unexecuted instantiation: cram_io.c:islower_c
Unexecuted instantiation: hfile_libcurl.c:islower_c
Unexecuted instantiation: hfile_gcs.c:islower_c
Unexecuted instantiation: hfile_s3.c:islower_c
Unexecuted instantiation: hfile_s3_write.c:islower_c
Unexecuted instantiation: bgzf.c:islower_c
Unexecuted instantiation: faidx.c:islower_c
Unexecuted instantiation: tbx.c:islower_c
174
121k
static inline int isprint_c(char c) { return isprint((unsigned char) c); }
Unexecuted instantiation: header.c:isprint_c
Unexecuted instantiation: hfile.c:isprint_c
Unexecuted instantiation: hts.c:isprint_c
Unexecuted instantiation: hts_expr.c:isprint_c
Unexecuted instantiation: multipart.c:isprint_c
Unexecuted instantiation: sam.c:isprint_c
textutils.c:isprint_c
Line
Count
Source
174
121k
static inline int isprint_c(char c) { return isprint((unsigned char) c); }
Unexecuted instantiation: vcf.c:isprint_c
Unexecuted instantiation: cram_encode.c:isprint_c
Unexecuted instantiation: cram_index.c:isprint_c
Unexecuted instantiation: cram_io.c:isprint_c
Unexecuted instantiation: hfile_libcurl.c:isprint_c
Unexecuted instantiation: hfile_gcs.c:isprint_c
Unexecuted instantiation: hfile_s3.c:isprint_c
Unexecuted instantiation: hfile_s3_write.c:isprint_c
Unexecuted instantiation: bgzf.c:isprint_c
Unexecuted instantiation: faidx.c:isprint_c
Unexecuted instantiation: tbx.c:isprint_c
175
0
static inline int ispunct_c(char c) { return ispunct((unsigned char) c); }
Unexecuted instantiation: header.c:ispunct_c
Unexecuted instantiation: hfile.c:ispunct_c
Unexecuted instantiation: hts.c:ispunct_c
Unexecuted instantiation: hts_expr.c:ispunct_c
Unexecuted instantiation: multipart.c:ispunct_c
Unexecuted instantiation: sam.c:ispunct_c
Unexecuted instantiation: textutils.c:ispunct_c
Unexecuted instantiation: vcf.c:ispunct_c
Unexecuted instantiation: cram_encode.c:ispunct_c
Unexecuted instantiation: cram_index.c:ispunct_c
Unexecuted instantiation: cram_io.c:ispunct_c
Unexecuted instantiation: hfile_libcurl.c:ispunct_c
Unexecuted instantiation: hfile_gcs.c:ispunct_c
Unexecuted instantiation: hfile_s3.c:ispunct_c
Unexecuted instantiation: hfile_s3_write.c:ispunct_c
Unexecuted instantiation: bgzf.c:ispunct_c
Unexecuted instantiation: faidx.c:ispunct_c
Unexecuted instantiation: tbx.c:ispunct_c
176
16.6M
static inline int isspace_c(char c) { return isspace((unsigned char) c); }
Unexecuted instantiation: header.c:isspace_c
Unexecuted instantiation: hfile.c:isspace_c
hts.c:isspace_c
Line
Count
Source
176
123
static inline int isspace_c(char c) { return isspace((unsigned char) c); }
Unexecuted instantiation: hts_expr.c:isspace_c
Unexecuted instantiation: multipart.c:isspace_c
sam.c:isspace_c
Line
Count
Source
176
16.6M
static inline int isspace_c(char c) { return isspace((unsigned char) c); }
Unexecuted instantiation: textutils.c:isspace_c
vcf.c:isspace_c
Line
Count
Source
176
44.9k
static inline int isspace_c(char c) { return isspace((unsigned char) c); }
Unexecuted instantiation: cram_encode.c:isspace_c
Unexecuted instantiation: cram_index.c:isspace_c
Unexecuted instantiation: cram_io.c:isspace_c
Unexecuted instantiation: hfile_libcurl.c:isspace_c
Unexecuted instantiation: hfile_gcs.c:isspace_c
Unexecuted instantiation: hfile_s3.c:isspace_c
Unexecuted instantiation: hfile_s3_write.c:isspace_c
Unexecuted instantiation: bgzf.c:isspace_c
Unexecuted instantiation: faidx.c:isspace_c
Unexecuted instantiation: tbx.c:isspace_c
177
0
static inline int isupper_c(char c) { return isupper((unsigned char) c); }
Unexecuted instantiation: header.c:isupper_c
Unexecuted instantiation: hfile.c:isupper_c
Unexecuted instantiation: hts.c:isupper_c
Unexecuted instantiation: hts_expr.c:isupper_c
Unexecuted instantiation: multipart.c:isupper_c
Unexecuted instantiation: sam.c:isupper_c
Unexecuted instantiation: textutils.c:isupper_c
Unexecuted instantiation: vcf.c:isupper_c
Unexecuted instantiation: cram_encode.c:isupper_c
Unexecuted instantiation: cram_index.c:isupper_c
Unexecuted instantiation: cram_io.c:isupper_c
Unexecuted instantiation: hfile_libcurl.c:isupper_c
Unexecuted instantiation: hfile_gcs.c:isupper_c
Unexecuted instantiation: hfile_s3.c:isupper_c
Unexecuted instantiation: hfile_s3_write.c:isupper_c
Unexecuted instantiation: bgzf.c:isupper_c
Unexecuted instantiation: faidx.c:isupper_c
Unexecuted instantiation: tbx.c:isupper_c
178
0
static inline int isxdigit_c(char c) { return isxdigit((unsigned char) c); }
Unexecuted instantiation: header.c:isxdigit_c
Unexecuted instantiation: hfile.c:isxdigit_c
Unexecuted instantiation: hts.c:isxdigit_c
Unexecuted instantiation: hts_expr.c:isxdigit_c
Unexecuted instantiation: multipart.c:isxdigit_c
Unexecuted instantiation: sam.c:isxdigit_c
Unexecuted instantiation: textutils.c:isxdigit_c
Unexecuted instantiation: vcf.c:isxdigit_c
Unexecuted instantiation: cram_encode.c:isxdigit_c
Unexecuted instantiation: cram_index.c:isxdigit_c
Unexecuted instantiation: cram_io.c:isxdigit_c
Unexecuted instantiation: hfile_libcurl.c:isxdigit_c
Unexecuted instantiation: hfile_gcs.c:isxdigit_c
Unexecuted instantiation: hfile_s3.c:isxdigit_c
Unexecuted instantiation: hfile_s3_write.c:isxdigit_c
Unexecuted instantiation: bgzf.c:isxdigit_c
Unexecuted instantiation: faidx.c:isxdigit_c
Unexecuted instantiation: tbx.c:isxdigit_c
179
10.7k
static inline char tolower_c(char c) { return tolower((unsigned char) c); }
Unexecuted instantiation: header.c:tolower_c
hfile.c:tolower_c
Line
Count
Source
179
10.7k
static inline char tolower_c(char c) { return tolower((unsigned char) c); }
Unexecuted instantiation: hts.c:tolower_c
Unexecuted instantiation: hts_expr.c:tolower_c
Unexecuted instantiation: multipart.c:tolower_c
Unexecuted instantiation: sam.c:tolower_c
Unexecuted instantiation: textutils.c:tolower_c
Unexecuted instantiation: vcf.c:tolower_c
Unexecuted instantiation: cram_encode.c:tolower_c
Unexecuted instantiation: cram_index.c:tolower_c
Unexecuted instantiation: cram_io.c:tolower_c
Unexecuted instantiation: hfile_libcurl.c:tolower_c
Unexecuted instantiation: hfile_gcs.c:tolower_c
Unexecuted instantiation: hfile_s3.c:tolower_c
Unexecuted instantiation: hfile_s3_write.c:tolower_c
Unexecuted instantiation: bgzf.c:tolower_c
Unexecuted instantiation: faidx.c:tolower_c
Unexecuted instantiation: tbx.c:tolower_c
180
0
static inline char toupper_c(char c) { return toupper((unsigned char) c); }
Unexecuted instantiation: header.c:toupper_c
Unexecuted instantiation: hfile.c:toupper_c
Unexecuted instantiation: hts.c:toupper_c
Unexecuted instantiation: hts_expr.c:toupper_c
Unexecuted instantiation: multipart.c:toupper_c
Unexecuted instantiation: sam.c:toupper_c
Unexecuted instantiation: textutils.c:toupper_c
Unexecuted instantiation: vcf.c:toupper_c
Unexecuted instantiation: cram_encode.c:toupper_c
Unexecuted instantiation: cram_index.c:toupper_c
Unexecuted instantiation: cram_io.c:toupper_c
Unexecuted instantiation: hfile_libcurl.c:toupper_c
Unexecuted instantiation: hfile_gcs.c:toupper_c
Unexecuted instantiation: hfile_s3.c:toupper_c
Unexecuted instantiation: hfile_s3_write.c:toupper_c
Unexecuted instantiation: bgzf.c:toupper_c
Unexecuted instantiation: faidx.c:toupper_c
Unexecuted instantiation: tbx.c:toupper_c
181
182
/// Copy possibly malicious text data to a buffer
183
/** @param buf     Destination buffer
184
    @param buflen  Size of the destination buffer (>= 4; >= 6 when quotes used)
185
    @param quote   Quote character (or '\0' for no quoting of the output)
186
    @param s       String to be copied
187
    @param len     Length of the input string, or SIZE_MAX to copy until '\0'
188
    @return The destination buffer, @a buf.
189
190
Copies the source text string (escaping any unprintable characters) to the
191
destination buffer. The destination buffer will always be NUL-terminated;
192
the text will be truncated (and "..." appended) if necessary to make it fit.
193
 */
194
const char *hts_strprint(char *buf, size_t buflen, char quote,
195
                         const char *s, size_t len);
196
197
// Faster replacements for strtol, for use when parsing lots of numbers.
198
// Note that these only handle base 10 and do not skip leading whitespace
199
200
/// Convert a string to a signed integer, with overflow detection
201
/** @param[in]  in     Input string
202
    @param[out] end    Returned end pointer
203
    @param[in]  bits   Bits available for the converted value
204
    @param[out] failed Location of overflow flag
205
    @return String value converted to an int64_t
206
207
Converts a signed decimal string to an int64_t.  The string should
208
consist of an optional '+' or '-' sign followed by one or more of
209
the digits 0 to 9.  The output value will be limited to fit in the
210
given number of bits (including the sign bit).  If the value is too big,
211
the largest possible value will be returned and *failed will be set to 1.
212
213
The address of the first character following the converted number will
214
be stored in *end.
215
216
Both end and failed must be non-NULL.
217
 */
218
static inline int64_t hts_str2int(const char *in, char **end, int bits,
219
472M
                                    int *failed) {
220
472M
    uint64_t n = 0, limit = (1ULL << (bits - 1)) - 1;
221
472M
    uint32_t fast = (bits - 1) * 1000 / 3322 + 1; // log(10)/log(2) ~= 3.322
222
472M
    const unsigned char *v = (const unsigned char *) in;
223
472M
    const unsigned int ascii_zero = '0'; // Prevents conversion to signed
224
472M
    unsigned char d;
225
472M
    int neg = 1;
226
227
472M
    switch(*v) {
228
5.07M
    case '-':
229
5.07M
        neg=-1;
230
5.07M
        limit++; /* fall through */
231
5.65M
    case '+':
232
5.65M
        v++;
233
5.65M
        break;
234
467M
    default:
235
467M
        break;
236
472M
    }
237
238
495M
    while (--fast && *v>='0' && *v<='9')
239
22.1M
        n = n*10 + *v++ - ascii_zero;
240
241
472M
    if (!fast) {
242
649k
        uint64_t limit_d_10 = limit / 10;
243
649k
        uint64_t limit_m_10 = limit - 10 * limit_d_10;
244
753k
         while ((d = *v - ascii_zero) < 10) {
245
438k
            if (n < limit_d_10 || (n == limit_d_10 && d <= limit_m_10)) {
246
103k
                n = n*10 + d;
247
103k
                v++;
248
334k
            } else {
249
2.55M
                do { v++; } while (*v - ascii_zero < 10);
250
334k
                n = limit;
251
334k
                *failed = 1;
252
334k
                break;
253
334k
            }
254
438k
        }
255
649k
    }
256
257
472M
    *end = (char *)v;
258
259
472M
    return (n && neg < 0) ? -((int64_t) (n - 1)) - 1 : (int64_t) n;
260
472M
}
Unexecuted instantiation: header.c:hts_str2int
Unexecuted instantiation: hfile.c:hts_str2int
Unexecuted instantiation: hts.c:hts_str2int
Unexecuted instantiation: hts_expr.c:hts_str2int
Unexecuted instantiation: multipart.c:hts_str2int
sam.c:hts_str2int
Line
Count
Source
219
231M
                                    int *failed) {
220
231M
    uint64_t n = 0, limit = (1ULL << (bits - 1)) - 1;
221
231M
    uint32_t fast = (bits - 1) * 1000 / 3322 + 1; // log(10)/log(2) ~= 3.322
222
231M
    const unsigned char *v = (const unsigned char *) in;
223
231M
    const unsigned int ascii_zero = '0'; // Prevents conversion to signed
224
231M
    unsigned char d;
225
231M
    int neg = 1;
226
227
231M
    switch(*v) {
228
3.40M
    case '-':
229
3.40M
        neg=-1;
230
3.40M
        limit++; /* fall through */
231
3.98M
    case '+':
232
3.98M
        v++;
233
3.98M
        break;
234
227M
    default:
235
227M
        break;
236
231M
    }
237
238
241M
    while (--fast && *v>='0' && *v<='9')
239
10.0M
        n = n*10 + *v++ - ascii_zero;
240
241
231M
    if (!fast) {
242
614k
        uint64_t limit_d_10 = limit / 10;
243
614k
        uint64_t limit_m_10 = limit - 10 * limit_d_10;
244
682k
         while ((d = *v - ascii_zero) < 10) {
245
368k
            if (n < limit_d_10 || (n == limit_d_10 && d <= limit_m_10)) {
246
68.5k
                n = n*10 + d;
247
68.5k
                v++;
248
299k
            } else {
249
594k
                do { v++; } while (*v - ascii_zero < 10);
250
299k
                n = limit;
251
299k
                *failed = 1;
252
299k
                break;
253
299k
            }
254
368k
        }
255
614k
    }
256
257
231M
    *end = (char *)v;
258
259
231M
    return (n && neg < 0) ? -((int64_t) (n - 1)) - 1 : (int64_t) n;
260
231M
}
Unexecuted instantiation: textutils.c:hts_str2int
vcf.c:hts_str2int
Line
Count
Source
219
241M
                                    int *failed) {
220
241M
    uint64_t n = 0, limit = (1ULL << (bits - 1)) - 1;
221
241M
    uint32_t fast = (bits - 1) * 1000 / 3322 + 1; // log(10)/log(2) ~= 3.322
222
241M
    const unsigned char *v = (const unsigned char *) in;
223
241M
    const unsigned int ascii_zero = '0'; // Prevents conversion to signed
224
241M
    unsigned char d;
225
241M
    int neg = 1;
226
227
241M
    switch(*v) {
228
1.66M
    case '-':
229
1.66M
        neg=-1;
230
1.66M
        limit++; /* fall through */
231
1.66M
    case '+':
232
1.66M
        v++;
233
1.66M
        break;
234
240M
    default:
235
240M
        break;
236
241M
    }
237
238
253M
    while (--fast && *v>='0' && *v<='9')
239
12.1M
        n = n*10 + *v++ - ascii_zero;
240
241
241M
    if (!fast) {
242
35.3k
        uint64_t limit_d_10 = limit / 10;
243
35.3k
        uint64_t limit_m_10 = limit - 10 * limit_d_10;
244
70.7k
         while ((d = *v - ascii_zero) < 10) {
245
70.4k
            if (n < limit_d_10 || (n == limit_d_10 && d <= limit_m_10)) {
246
35.3k
                n = n*10 + d;
247
35.3k
                v++;
248
35.3k
            } else {
249
1.95M
                do { v++; } while (*v - ascii_zero < 10);
250
35.1k
                n = limit;
251
35.1k
                *failed = 1;
252
35.1k
                break;
253
35.1k
            }
254
70.4k
        }
255
35.3k
    }
256
257
241M
    *end = (char *)v;
258
259
241M
    return (n && neg < 0) ? -((int64_t) (n - 1)) - 1 : (int64_t) n;
260
241M
}
Unexecuted instantiation: cram_encode.c:hts_str2int
Unexecuted instantiation: cram_index.c:hts_str2int
Unexecuted instantiation: cram_io.c:hts_str2int
Unexecuted instantiation: hfile_libcurl.c:hts_str2int
Unexecuted instantiation: hfile_gcs.c:hts_str2int
Unexecuted instantiation: hfile_s3.c:hts_str2int
Unexecuted instantiation: hfile_s3_write.c:hts_str2int
Unexecuted instantiation: bgzf.c:hts_str2int
Unexecuted instantiation: faidx.c:hts_str2int
Unexecuted instantiation: tbx.c:hts_str2int
261
262
/// Convert a string to an unsigned integer, with overflow detection
263
/** @param[in]  in     Input string
264
    @param[out] end    Returned end pointer
265
    @param[in]  bits   Bits available for the converted value
266
    @param[out] failed Location of overflow flag
267
    @return String value converted to a uint64_t
268
269
Converts an unsigned decimal string to a uint64_t.  The string should
270
consist of an optional '+' sign followed by one or more of the digits 0
271
to 9.  The output value will be limited to fit in the given number of bits.
272
If the value is too big, the largest possible value will be returned
273
and *failed will be set to 1.
274
275
The address of the first character following the converted number will
276
be stored in *end.
277
278
Both end and failed must be non-NULL.
279
 */
280
281
static inline uint64_t hts_str2uint(const char *in, char **end, int bits,
282
39.7M
                                      int *failed) {
283
39.7M
    uint64_t n = 0, limit = (bits < 64 ? (1ULL << bits) : 0) - 1;
284
39.7M
    const unsigned char *v = (const unsigned char *) in;
285
39.7M
    const unsigned int ascii_zero = '0'; // Prevents conversion to signed
286
39.7M
    uint32_t fast = bits * 1000 / 3322 + 1; // log(10)/log(2) ~= 3.322
287
39.7M
    unsigned char d;
288
289
39.7M
    if (*v == '+')
290
93.9k
        v++;
291
292
42.2M
    while (--fast && *v>='0' && *v<='9')
293
2.42M
        n = n*10 + *v++ - ascii_zero;
294
295
39.7M
    if (!fast) {
296
232k
        uint64_t limit_d_10 = limit / 10;
297
232k
        uint64_t limit_m_10 = limit - 10 * limit_d_10;
298
240k
        while ((d = *v - ascii_zero) < 10) {
299
234k
            if (n < limit_d_10 || (n == limit_d_10 && d <= limit_m_10)) {
300
8.46k
                n = n*10 + d;
301
8.46k
                v++;
302
225k
            } else {
303
825k
                do { v++; } while (*v - ascii_zero < 10);
304
225k
                n = limit;
305
225k
                *failed = 1;
306
225k
                break;
307
225k
            }
308
234k
        }
309
232k
    }
310
311
39.7M
    *end = (char *)v;
312
39.7M
    return n;
313
39.7M
}
Unexecuted instantiation: header.c:hts_str2uint
Unexecuted instantiation: hfile.c:hts_str2uint
Unexecuted instantiation: hts.c:hts_str2uint
Unexecuted instantiation: hts_expr.c:hts_str2uint
Unexecuted instantiation: multipart.c:hts_str2uint
sam.c:hts_str2uint
Line
Count
Source
282
39.7M
                                      int *failed) {
283
39.7M
    uint64_t n = 0, limit = (bits < 64 ? (1ULL << bits) : 0) - 1;
284
39.7M
    const unsigned char *v = (const unsigned char *) in;
285
39.7M
    const unsigned int ascii_zero = '0'; // Prevents conversion to signed
286
39.7M
    uint32_t fast = bits * 1000 / 3322 + 1; // log(10)/log(2) ~= 3.322
287
39.7M
    unsigned char d;
288
289
39.7M
    if (*v == '+')
290
93.9k
        v++;
291
292
42.1M
    while (--fast && *v>='0' && *v<='9')
293
2.34M
        n = n*10 + *v++ - ascii_zero;
294
295
39.7M
    if (!fast) {
296
231k
        uint64_t limit_d_10 = limit / 10;
297
231k
        uint64_t limit_m_10 = limit - 10 * limit_d_10;
298
237k
        while ((d = *v - ascii_zero) < 10) {
299
231k
            if (n < limit_d_10 || (n == limit_d_10 && d <= limit_m_10)) {
300
5.90k
                n = n*10 + d;
301
5.90k
                v++;
302
225k
            } else {
303
824k
                do { v++; } while (*v - ascii_zero < 10);
304
225k
                n = limit;
305
225k
                *failed = 1;
306
225k
                break;
307
225k
            }
308
231k
        }
309
231k
    }
310
311
39.7M
    *end = (char *)v;
312
39.7M
    return n;
313
39.7M
}
Unexecuted instantiation: textutils.c:hts_str2uint
vcf.c:hts_str2uint
Line
Count
Source
282
21.1k
                                      int *failed) {
283
21.1k
    uint64_t n = 0, limit = (bits < 64 ? (1ULL << bits) : 0) - 1;
284
21.1k
    const unsigned char *v = (const unsigned char *) in;
285
21.1k
    const unsigned int ascii_zero = '0'; // Prevents conversion to signed
286
21.1k
    uint32_t fast = bits * 1000 / 3322 + 1; // log(10)/log(2) ~= 3.322
287
21.1k
    unsigned char d;
288
289
21.1k
    if (*v == '+')
290
25
        v++;
291
292
104k
    while (--fast && *v>='0' && *v<='9')
293
83.0k
        n = n*10 + *v++ - ascii_zero;
294
295
21.1k
    if (!fast) {
296
711
        uint64_t limit_d_10 = limit / 10;
297
711
        uint64_t limit_m_10 = limit - 10 * limit_d_10;
298
3.26k
        while ((d = *v - ascii_zero) < 10) {
299
2.62k
            if (n < limit_d_10 || (n == limit_d_10 && d <= limit_m_10)) {
300
2.55k
                n = n*10 + d;
301
2.55k
                v++;
302
2.55k
            } else {
303
853
                do { v++; } while (*v - ascii_zero < 10);
304
71
                n = limit;
305
71
                *failed = 1;
306
71
                break;
307
71
            }
308
2.62k
        }
309
711
    }
310
311
21.1k
    *end = (char *)v;
312
21.1k
    return n;
313
21.1k
}
Unexecuted instantiation: cram_encode.c:hts_str2uint
Unexecuted instantiation: cram_index.c:hts_str2uint
Unexecuted instantiation: cram_io.c:hts_str2uint
Unexecuted instantiation: hfile_libcurl.c:hts_str2uint
Unexecuted instantiation: hfile_gcs.c:hts_str2uint
Unexecuted instantiation: hfile_s3.c:hts_str2uint
Unexecuted instantiation: hfile_s3_write.c:hts_str2uint
Unexecuted instantiation: bgzf.c:hts_str2uint
Unexecuted instantiation: faidx.c:hts_str2uint
Unexecuted instantiation: tbx.c:hts_str2uint
314
315
/// Convert a string to a double, with overflow detection
316
/** @param[in]  in     Input string
317
    @param[out] end    Returned end pointer
318
    @param[out] failed Location of overflow flag
319
    @return String value converted to a double
320
321
Converts a floating point value string to a double.  The string should
322
have the format [+-]?[0-9]*[.]?[0-9]* with at least one and no more than 15
323
digits.  Strings that do not match (inf, nan, values with exponents) will
324
be passed on to strtod() for processing.
325
326
If the value is too big, the largest possible value will be returned;
327
if it is too small to be represented in a double zero will be returned.
328
In both cases errno will be set to ERANGE.
329
330
If no characters could be converted, *failed will be set to 1.
331
332
The address of the first character following the converted number will
333
be stored in *end.
334
335
Both end and failed must be non-NULL.
336
 */
337
338
63.2M
static inline double hts_str2dbl(const char *in, char **end, int *failed) {
339
63.2M
    uint64_t n = 0;
340
63.2M
    int max_len = 15;
341
63.2M
    const unsigned char *v = (const unsigned char *) in;
342
63.2M
    const unsigned int ascii_zero = '0'; // Prevents conversion to signed
343
63.2M
    int neg = 0, point = -1;
344
63.2M
    double d;
345
63.2M
    static double D[] = {1,1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7,
346
63.2M
                         1e8, 1e9, 1e10,1e11,1e12,1e13,1e14,1e15,
347
63.2M
                         1e16,1e17,1e18,1e19,1e20};
348
349
63.2M
    while (isspace(*v))
350
22.7k
        v++;
351
352
63.2M
    if (*v == '-') {
353
430k
        neg = 1;
354
430k
        v++;
355
62.8M
    } else if (*v == '+') {
356
748
        v++;
357
748
    }
358
359
63.2M
    switch(*v) {
360
295k
    case '1': case '2': case '3': case '4':
361
435k
    case '5': case '6': case '7': case '8': case '9':
362
435k
        break;
363
364
34.5k
    case '0':
365
34.5k
        if (v[1] != 'x' && v[1] != 'X') break;
366
        // else fall through - hex number
367
368
62.7M
    default:
369
        // Non numbers, like NaN, Inf
370
62.7M
        d = strtod(in, end);
371
62.7M
        if (*end == in)
372
62.7M
            *failed = 1;
373
62.7M
        return d;
374
63.2M
    }
375
376
507k
    while (*v == '0') ++v;
377
378
470k
    const unsigned char *start = v;
379
380
3.46M
    while (--max_len && *v>='0' && *v<='9')
381
2.99M
        n = n*10 + *v++ - ascii_zero;
382
470k
    if (max_len && *v == '.') {
383
718
        point = v - start;
384
718
        v++;
385
1.02k
        while (--max_len && *v>='0' && *v<='9')
386
305
            n = n*10 + *v++ - ascii_zero;
387
718
    }
388
470k
    if (point < 0)
389
469k
        point = v - start;
390
391
    // Outside the scope of this quick and dirty parser.
392
470k
    if (!max_len || *v == 'e' || *v == 'E') {
393
3.09k
        d = strtod(in, end);
394
3.09k
        if (*end == in)
395
0
            *failed = 1;
396
3.09k
        return d;
397
3.09k
    }
398
399
467k
    *end = (char *)v;
400
467k
    d = n / D[v - start - point];
401
402
467k
    return neg ? -d : d;
403
470k
}
Unexecuted instantiation: header.c:hts_str2dbl
Unexecuted instantiation: hfile.c:hts_str2dbl
Unexecuted instantiation: hts.c:hts_str2dbl
Unexecuted instantiation: hts_expr.c:hts_str2dbl
Unexecuted instantiation: multipart.c:hts_str2dbl
Unexecuted instantiation: sam.c:hts_str2dbl
Unexecuted instantiation: textutils.c:hts_str2dbl
vcf.c:hts_str2dbl
Line
Count
Source
338
63.2M
static inline double hts_str2dbl(const char *in, char **end, int *failed) {
339
63.2M
    uint64_t n = 0;
340
63.2M
    int max_len = 15;
341
63.2M
    const unsigned char *v = (const unsigned char *) in;
342
63.2M
    const unsigned int ascii_zero = '0'; // Prevents conversion to signed
343
63.2M
    int neg = 0, point = -1;
344
63.2M
    double d;
345
63.2M
    static double D[] = {1,1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7,
346
63.2M
                         1e8, 1e9, 1e10,1e11,1e12,1e13,1e14,1e15,
347
63.2M
                         1e16,1e17,1e18,1e19,1e20};
348
349
63.2M
    while (isspace(*v))
350
22.7k
        v++;
351
352
63.2M
    if (*v == '-') {
353
430k
        neg = 1;
354
430k
        v++;
355
62.8M
    } else if (*v == '+') {
356
748
        v++;
357
748
    }
358
359
63.2M
    switch(*v) {
360
295k
    case '1': case '2': case '3': case '4':
361
435k
    case '5': case '6': case '7': case '8': case '9':
362
435k
        break;
363
364
34.5k
    case '0':
365
34.5k
        if (v[1] != 'x' && v[1] != 'X') break;
366
        // else fall through - hex number
367
368
62.7M
    default:
369
        // Non numbers, like NaN, Inf
370
62.7M
        d = strtod(in, end);
371
62.7M
        if (*end == in)
372
62.7M
            *failed = 1;
373
62.7M
        return d;
374
63.2M
    }
375
376
507k
    while (*v == '0') ++v;
377
378
470k
    const unsigned char *start = v;
379
380
3.46M
    while (--max_len && *v>='0' && *v<='9')
381
2.99M
        n = n*10 + *v++ - ascii_zero;
382
470k
    if (max_len && *v == '.') {
383
718
        point = v - start;
384
718
        v++;
385
1.02k
        while (--max_len && *v>='0' && *v<='9')
386
305
            n = n*10 + *v++ - ascii_zero;
387
718
    }
388
470k
    if (point < 0)
389
469k
        point = v - start;
390
391
    // Outside the scope of this quick and dirty parser.
392
470k
    if (!max_len || *v == 'e' || *v == 'E') {
393
3.09k
        d = strtod(in, end);
394
3.09k
        if (*end == in)
395
0
            *failed = 1;
396
3.09k
        return d;
397
3.09k
    }
398
399
467k
    *end = (char *)v;
400
467k
    d = n / D[v - start - point];
401
402
467k
    return neg ? -d : d;
403
470k
}
Unexecuted instantiation: cram_encode.c:hts_str2dbl
Unexecuted instantiation: cram_index.c:hts_str2dbl
Unexecuted instantiation: cram_io.c:hts_str2dbl
Unexecuted instantiation: hfile_libcurl.c:hts_str2dbl
Unexecuted instantiation: hfile_gcs.c:hts_str2dbl
Unexecuted instantiation: hfile_s3.c:hts_str2dbl
Unexecuted instantiation: hfile_s3_write.c:hts_str2dbl
Unexecuted instantiation: bgzf.c:hts_str2dbl
Unexecuted instantiation: faidx.c:hts_str2dbl
Unexecuted instantiation: tbx.c:hts_str2dbl
404
405
406
#ifdef __cplusplus
407
}
408
#endif
409
410
#endif