Coverage Report

Created: 2025-11-11 06:39

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/htslib/textutils_internal.h
Line
Count
Source
1
/* textutils_internal.h -- non-bioinformatics utility routines for text etc.
2
3
   Copyright (C) 2016,2018-2020, 2024 Genome Research Ltd.
4
5
   Author: John Marshall <jm18@sanger.ac.uk>
6
7
Permission is hereby granted, free of charge, to any person obtaining a copy
8
of this software and associated documentation files (the "Software"), to deal
9
in the Software without restriction, including without limitation the rights
10
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
copies of the Software, and to permit persons to whom the Software is
12
furnished to do so, subject to the following conditions:
13
14
The above copyright notice and this permission notice shall be included in
15
all copies or substantial portions of the Software.
16
17
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23
DEALINGS IN THE SOFTWARE.  */
24
25
#ifndef HTSLIB_TEXTUTILS_INTERNAL_H
26
#define HTSLIB_TEXTUTILS_INTERNAL_H
27
28
/* N.B. These interfaces may be used by plug-ins */
29
30
#include <ctype.h>
31
#include <stdlib.h>
32
#include "htslib/kstring.h"
33
34
#ifdef __cplusplus
35
extern "C" {
36
#endif
37
38
/// Decode percent-encoded (URL-encoded) text
39
/** On input, _dest_ should be a buffer at least the same size as _s_,
40
    and may be equal to _s_ to decode in place.  On output, _dest_ will be
41
    NUL-terminated and the number of characters written (not including the
42
    NUL) is stored in _destlen_.
43
*/
44
int hts_decode_percent(char *dest, size_t *destlen, const char *s);
45
46
/// Return decoded data length given length of base64-encoded text
47
/** This gives an upper bound, as it overestimates by a byte or two when
48
    the encoded text ends with (possibly omitted) `=` padding characters.
49
*/
50
size_t hts_base64_decoded_length(size_t len);
51
52
/// Decode base64-encoded data
53
/** On input, _dest_ should be a sufficient buffer (see `hts_base64_length()`),
54
    and may be equal to _s_ to decode in place.  On output, the number of
55
    bytes written is stored in _destlen_.
56
*/
57
int hts_decode_base64(char *dest, size_t *destlen, const char *s);
58
59
/// Token structure returned by JSON lexing functions
60
/** Structure is defined in hts_internal.h
61
 */
62
63
typedef struct hts_json_token hts_json_token;
64
65
/// Allocate an empty JSON token structure, for use with hts_json_* functions
66
/** @return An empty token on success; NULL on failure
67
 */
68
HTSLIB_EXPORT
69
hts_json_token *hts_json_alloc_token(void);
70
71
/// Free a JSON token
72
HTSLIB_EXPORT
73
void hts_json_free_token(hts_json_token *token);
74
75
/// Accessor function to get JSON token type
76
/** @param  token Pointer to JSON token
77
    @return Character indicating the token type
78
79
Token types correspond to scalar JSON values and selected punctuation
80
as follows:
81
  - `s` string
82
  - `n` number
83
  - `b` boolean literal
84
  - `.` null literal
85
  - `{`, `}`, `[`, `]` object and array delimiters
86
  - `?` lexing error
87
  - `!` other errors (e.g. out of memory)
88
  - `\0` terminator at end of input
89
*/
90
HTSLIB_EXPORT
91
char hts_json_token_type(hts_json_token *token);
92
93
/// Accessor function to get JSON token in string form
94
/** @param  token Pointer to JSON token
95
    @return String representation of the JSON token; NULL if unset
96
97
If the token was parsed from a string using hts_json_snext(), the return value
98
will point into the string passed as the first parameter to hts_json_snext().
99
If the token was parsed from a file using hts_json_fnext(), the return value
100
will point at the kstring_t buffer passed as the third parameter to
101
hts_json_fnext().  In that case, the value will only be valid until the
102
next call to hts_json_fnext().
103
 */
104
HTSLIB_EXPORT
105
char *hts_json_token_str(hts_json_token *token);
106
107
/// Read one JSON token from a string
108
/** @param str    The input C string
109
    @param state  The input string state
110
    @param token  On return, filled in with the token read
111
    @return  The type of the token read
112
113
On return, `token->str` points into the supplied input string, which
114
is modified by having token-terminating characters overwritten as NULs.
115
The `state` argument records the current position within `str` after each
116
`hts_json_snext()` call, and should be set to 0 before the first call.
117
*/
118
HTSLIB_EXPORT
119
char hts_json_snext(char *str, size_t *state, hts_json_token *token);
120
121
/// Read and discard a complete JSON value from a string
122
/** @param str    The input C string
123
    @param state  The input string state, as per `hts_json_snext()`
124
    @param type   If the first token of the value to be discarded has already
125
                  been read, provide its type; otherwise `'\0'`
126
    @return  One of `v` (success), `\0` (end of string), and `?` (lexing error)
127
128
Skips a complete JSON value, which may be a single token or an entire object
129
or array.
130
*/
131
HTSLIB_EXPORT
132
char hts_json_sskip_value(char *str, size_t *state, char type);
133
134
struct hFILE;
135
136
/// Read one JSON token from a file
137
/** @param fp     The file stream
138
    @param token  On return, filled in with the token read
139
    @param kstr   Buffer used to store the token string returned
140
    @return  The type of the token read
141
142
The `kstr` buffer is used to store the string value of the token read,
143
so `token->str` is only valid until the next time `hts_json_fnext()` is
144
called with the same `kstr` argument.
145
*/
146
HTSLIB_EXPORT
147
char hts_json_fnext(struct hFILE *fp, hts_json_token *token, kstring_t *kstr);
148
149
/// Read and discard a complete JSON value from a file
150
/** @param fp    The file stream
151
    @param type  If the first token of the value to be discarded has already
152
                 been read, provide its type; otherwise `'\0'`
153
    @return  One of `v` (success), `\0` (EOF), and `?` (lexing error)
154
155
Skips a complete JSON value, which may be a single token or an entire object
156
or array.
157
*/
158
HTSLIB_EXPORT
159
char hts_json_fskip_value(struct hFILE *fp, char type);
160
161
// The <ctype.h> functions operate on ints such as are returned by fgetc(),
162
// i.e., characters represented as unsigned-char-valued ints, or EOF.
163
// To operate on plain chars (and to avoid warnings on some platforms),
164
// technically one must cast to unsigned char everywhere (see CERT STR37-C)
165
// or less painfully use these *_c() functions that operate on plain chars
166
// (but not EOF, which must be considered separately where it is applicable).
167
// TODO We may eventually wish to implement these functions directly without
168
// using their <ctype.h> equivalents, and thus make them immune to locales.
169
2.15M
static inline int isalnum_c(char c) { return isalnum((unsigned char) c); }
Unexecuted instantiation: header.c:isalnum_c
hfile.c:isalnum_c
Line
Count
Source
169
336k
static inline int isalnum_c(char c) { return isalnum((unsigned char) c); }
Unexecuted instantiation: hts.c:isalnum_c
Unexecuted instantiation: hts_expr.c:isalnum_c
Unexecuted instantiation: multipart.c:isalnum_c
Unexecuted instantiation: sam.c:isalnum_c
Unexecuted instantiation: sam_mods.c:isalnum_c
Unexecuted instantiation: textutils.c:isalnum_c
vcf.c:isalnum_c
Line
Count
Source
169
1.81M
static inline int isalnum_c(char c) { return isalnum((unsigned char) c); }
Unexecuted instantiation: cram_encode.c:isalnum_c
Unexecuted instantiation: cram_index.c:isalnum_c
Unexecuted instantiation: cram_io.c:isalnum_c
Unexecuted instantiation: hfile_libcurl.c:isalnum_c
Unexecuted instantiation: hfile_gcs.c:isalnum_c
Unexecuted instantiation: hfile_s3.c:isalnum_c
Unexecuted instantiation: hfile_s3_write.c:isalnum_c
Unexecuted instantiation: bgzf.c:isalnum_c
Unexecuted instantiation: faidx.c:isalnum_c
Unexecuted instantiation: tbx.c:isalnum_c
170
2.32M
static inline int isalpha_c(char c) { return isalpha((unsigned char) c); }
header.c:isalpha_c
Line
Count
Source
170
1.98M
static inline int isalpha_c(char c) { return isalpha((unsigned char) c); }
Unexecuted instantiation: hfile.c:isalpha_c
Unexecuted instantiation: hts.c:isalpha_c
Unexecuted instantiation: hts_expr.c:isalpha_c
Unexecuted instantiation: multipart.c:isalpha_c
Unexecuted instantiation: sam.c:isalpha_c
Unexecuted instantiation: sam_mods.c:isalpha_c
Unexecuted instantiation: textutils.c:isalpha_c
vcf.c:isalpha_c
Line
Count
Source
170
341k
static inline int isalpha_c(char c) { return isalpha((unsigned char) c); }
Unexecuted instantiation: cram_encode.c:isalpha_c
Unexecuted instantiation: cram_index.c:isalpha_c
Unexecuted instantiation: cram_io.c:isalpha_c
Unexecuted instantiation: hfile_libcurl.c:isalpha_c
Unexecuted instantiation: hfile_gcs.c:isalpha_c
Unexecuted instantiation: hfile_s3.c:isalpha_c
Unexecuted instantiation: hfile_s3_write.c:isalpha_c
Unexecuted instantiation: bgzf.c:isalpha_c
Unexecuted instantiation: faidx.c:isalpha_c
Unexecuted instantiation: tbx.c:isalpha_c
171
2.52M
static inline int isdigit_c(char c) { return isdigit((unsigned char) c); }
Unexecuted instantiation: header.c:isdigit_c
Unexecuted instantiation: hfile.c:isdigit_c
hts.c:isdigit_c
Line
Count
Source
171
7.06k
static inline int isdigit_c(char c) { return isdigit((unsigned char) c); }
Unexecuted instantiation: hts_expr.c:isdigit_c
Unexecuted instantiation: multipart.c:isdigit_c
sam.c:isdigit_c
Line
Count
Source
171
2.52M
static inline int isdigit_c(char c) { return isdigit((unsigned char) c); }
Unexecuted instantiation: sam_mods.c:isdigit_c
Unexecuted instantiation: textutils.c:isdigit_c
Unexecuted instantiation: vcf.c:isdigit_c
Unexecuted instantiation: cram_encode.c:isdigit_c
Unexecuted instantiation: cram_index.c:isdigit_c
Unexecuted instantiation: cram_io.c:isdigit_c
Unexecuted instantiation: hfile_libcurl.c:isdigit_c
Unexecuted instantiation: hfile_gcs.c:isdigit_c
hfile_s3.c:isdigit_c
Line
Count
Source
171
897
static inline int isdigit_c(char c) { return isdigit((unsigned char) c); }
Unexecuted instantiation: hfile_s3_write.c:isdigit_c
Unexecuted instantiation: bgzf.c:isdigit_c
Unexecuted instantiation: faidx.c:isdigit_c
Unexecuted instantiation: tbx.c:isdigit_c
172
0
static inline int isgraph_c(char c) { return isgraph((unsigned char) c); }
Unexecuted instantiation: header.c:isgraph_c
Unexecuted instantiation: hfile.c:isgraph_c
Unexecuted instantiation: hts.c:isgraph_c
Unexecuted instantiation: hts_expr.c:isgraph_c
Unexecuted instantiation: multipart.c:isgraph_c
Unexecuted instantiation: sam.c:isgraph_c
Unexecuted instantiation: sam_mods.c:isgraph_c
Unexecuted instantiation: textutils.c:isgraph_c
Unexecuted instantiation: vcf.c:isgraph_c
Unexecuted instantiation: cram_encode.c:isgraph_c
Unexecuted instantiation: cram_index.c:isgraph_c
Unexecuted instantiation: cram_io.c:isgraph_c
Unexecuted instantiation: hfile_libcurl.c:isgraph_c
Unexecuted instantiation: hfile_gcs.c:isgraph_c
Unexecuted instantiation: hfile_s3.c:isgraph_c
Unexecuted instantiation: hfile_s3_write.c:isgraph_c
Unexecuted instantiation: bgzf.c:isgraph_c
Unexecuted instantiation: faidx.c:isgraph_c
Unexecuted instantiation: tbx.c:isgraph_c
173
1.80k
static inline int islower_c(char c) { return islower((unsigned char) c); }
Unexecuted instantiation: header.c:islower_c
Unexecuted instantiation: hfile.c:islower_c
Unexecuted instantiation: hts.c:islower_c
Unexecuted instantiation: hts_expr.c:islower_c
Unexecuted instantiation: multipart.c:islower_c
Unexecuted instantiation: sam.c:islower_c
Unexecuted instantiation: sam_mods.c:islower_c
Unexecuted instantiation: textutils.c:islower_c
Unexecuted instantiation: vcf.c:islower_c
Unexecuted instantiation: cram_encode.c:islower_c
Unexecuted instantiation: cram_index.c:islower_c
Unexecuted instantiation: cram_io.c:islower_c
Unexecuted instantiation: hfile_libcurl.c:islower_c
Unexecuted instantiation: hfile_gcs.c:islower_c
hfile_s3.c:islower_c
Line
Count
Source
173
1.80k
static inline int islower_c(char c) { return islower((unsigned char) c); }
Unexecuted instantiation: hfile_s3_write.c:islower_c
Unexecuted instantiation: bgzf.c:islower_c
Unexecuted instantiation: faidx.c:islower_c
Unexecuted instantiation: tbx.c:islower_c
174
1.44M
static inline int isprint_c(char c) { return isprint((unsigned char) c); }
Unexecuted instantiation: header.c:isprint_c
Unexecuted instantiation: hfile.c:isprint_c
Unexecuted instantiation: hts.c:isprint_c
Unexecuted instantiation: hts_expr.c:isprint_c
Unexecuted instantiation: multipart.c:isprint_c
Unexecuted instantiation: sam.c:isprint_c
Unexecuted instantiation: sam_mods.c:isprint_c
textutils.c:isprint_c
Line
Count
Source
174
1.44M
static inline int isprint_c(char c) { return isprint((unsigned char) c); }
Unexecuted instantiation: vcf.c:isprint_c
Unexecuted instantiation: cram_encode.c:isprint_c
Unexecuted instantiation: cram_index.c:isprint_c
Unexecuted instantiation: cram_io.c:isprint_c
Unexecuted instantiation: hfile_libcurl.c:isprint_c
Unexecuted instantiation: hfile_gcs.c:isprint_c
Unexecuted instantiation: hfile_s3.c:isprint_c
Unexecuted instantiation: hfile_s3_write.c:isprint_c
Unexecuted instantiation: bgzf.c:isprint_c
Unexecuted instantiation: faidx.c:isprint_c
Unexecuted instantiation: tbx.c:isprint_c
175
0
static inline int ispunct_c(char c) { return ispunct((unsigned char) c); }
Unexecuted instantiation: header.c:ispunct_c
Unexecuted instantiation: hfile.c:ispunct_c
Unexecuted instantiation: hts.c:ispunct_c
Unexecuted instantiation: hts_expr.c:ispunct_c
Unexecuted instantiation: multipart.c:ispunct_c
Unexecuted instantiation: sam.c:ispunct_c
Unexecuted instantiation: sam_mods.c:ispunct_c
Unexecuted instantiation: textutils.c:ispunct_c
Unexecuted instantiation: vcf.c:ispunct_c
Unexecuted instantiation: cram_encode.c:ispunct_c
Unexecuted instantiation: cram_index.c:ispunct_c
Unexecuted instantiation: cram_io.c:ispunct_c
Unexecuted instantiation: hfile_libcurl.c:ispunct_c
Unexecuted instantiation: hfile_gcs.c:ispunct_c
Unexecuted instantiation: hfile_s3.c:ispunct_c
Unexecuted instantiation: hfile_s3_write.c:ispunct_c
Unexecuted instantiation: bgzf.c:ispunct_c
Unexecuted instantiation: faidx.c:ispunct_c
Unexecuted instantiation: tbx.c:ispunct_c
176
38.5M
static inline int isspace_c(char c) { return isspace((unsigned char) c); }
Unexecuted instantiation: header.c:isspace_c
Unexecuted instantiation: hfile.c:isspace_c
hts.c:isspace_c
Line
Count
Source
176
8.57k
static inline int isspace_c(char c) { return isspace((unsigned char) c); }
Unexecuted instantiation: hts_expr.c:isspace_c
Unexecuted instantiation: multipart.c:isspace_c
sam.c:isspace_c
Line
Count
Source
176
37.3M
static inline int isspace_c(char c) { return isspace((unsigned char) c); }
Unexecuted instantiation: sam_mods.c:isspace_c
Unexecuted instantiation: textutils.c:isspace_c
vcf.c:isspace_c
Line
Count
Source
176
1.13M
static inline int isspace_c(char c) { return isspace((unsigned char) c); }
Unexecuted instantiation: cram_encode.c:isspace_c
Unexecuted instantiation: cram_index.c:isspace_c
Unexecuted instantiation: cram_io.c:isspace_c
Unexecuted instantiation: hfile_libcurl.c:isspace_c
Unexecuted instantiation: hfile_gcs.c:isspace_c
Unexecuted instantiation: hfile_s3.c:isspace_c
Unexecuted instantiation: hfile_s3_write.c:isspace_c
Unexecuted instantiation: bgzf.c:isspace_c
Unexecuted instantiation: faidx.c:isspace_c
Unexecuted instantiation: tbx.c:isspace_c
177
0
static inline int isupper_c(char c) { return isupper((unsigned char) c); }
Unexecuted instantiation: header.c:isupper_c
Unexecuted instantiation: hfile.c:isupper_c
Unexecuted instantiation: hts.c:isupper_c
Unexecuted instantiation: hts_expr.c:isupper_c
Unexecuted instantiation: multipart.c:isupper_c
Unexecuted instantiation: sam.c:isupper_c
Unexecuted instantiation: sam_mods.c:isupper_c
Unexecuted instantiation: textutils.c:isupper_c
Unexecuted instantiation: vcf.c:isupper_c
Unexecuted instantiation: cram_encode.c:isupper_c
Unexecuted instantiation: cram_index.c:isupper_c
Unexecuted instantiation: cram_io.c:isupper_c
Unexecuted instantiation: hfile_libcurl.c:isupper_c
Unexecuted instantiation: hfile_gcs.c:isupper_c
Unexecuted instantiation: hfile_s3.c:isupper_c
Unexecuted instantiation: hfile_s3_write.c:isupper_c
Unexecuted instantiation: bgzf.c:isupper_c
Unexecuted instantiation: faidx.c:isupper_c
Unexecuted instantiation: tbx.c:isupper_c
178
91.5k
static inline int isxdigit_c(char c) { return isxdigit((unsigned char) c); }
Unexecuted instantiation: header.c:isxdigit_c
Unexecuted instantiation: hfile.c:isxdigit_c
Unexecuted instantiation: hts.c:isxdigit_c
Unexecuted instantiation: hts_expr.c:isxdigit_c
Unexecuted instantiation: multipart.c:isxdigit_c
Unexecuted instantiation: sam.c:isxdigit_c
Unexecuted instantiation: sam_mods.c:isxdigit_c
Unexecuted instantiation: textutils.c:isxdigit_c
Unexecuted instantiation: vcf.c:isxdigit_c
Unexecuted instantiation: cram_encode.c:isxdigit_c
Unexecuted instantiation: cram_index.c:isxdigit_c
Unexecuted instantiation: cram_io.c:isxdigit_c
Unexecuted instantiation: hfile_libcurl.c:isxdigit_c
Unexecuted instantiation: hfile_gcs.c:isxdigit_c
hfile_s3.c:isxdigit_c
Line
Count
Source
178
91.5k
static inline int isxdigit_c(char c) { return isxdigit((unsigned char) c); }
Unexecuted instantiation: hfile_s3_write.c:isxdigit_c
Unexecuted instantiation: bgzf.c:isxdigit_c
Unexecuted instantiation: faidx.c:isxdigit_c
Unexecuted instantiation: tbx.c:isxdigit_c
179
226k
static inline char tolower_c(char c) { return tolower((unsigned char) c); }
Unexecuted instantiation: header.c:tolower_c
hfile.c:tolower_c
Line
Count
Source
179
226k
static inline char tolower_c(char c) { return tolower((unsigned char) c); }
Unexecuted instantiation: hts.c:tolower_c
Unexecuted instantiation: hts_expr.c:tolower_c
Unexecuted instantiation: multipart.c:tolower_c
Unexecuted instantiation: sam.c:tolower_c
Unexecuted instantiation: sam_mods.c:tolower_c
Unexecuted instantiation: textutils.c:tolower_c
Unexecuted instantiation: vcf.c:tolower_c
Unexecuted instantiation: cram_encode.c:tolower_c
Unexecuted instantiation: cram_index.c:tolower_c
Unexecuted instantiation: cram_io.c:tolower_c
Unexecuted instantiation: hfile_libcurl.c:tolower_c
Unexecuted instantiation: hfile_gcs.c:tolower_c
Unexecuted instantiation: hfile_s3.c:tolower_c
Unexecuted instantiation: hfile_s3_write.c:tolower_c
Unexecuted instantiation: bgzf.c:tolower_c
Unexecuted instantiation: faidx.c:tolower_c
Unexecuted instantiation: tbx.c:tolower_c
180
0
static inline char toupper_c(char c) { return toupper((unsigned char) c); }
Unexecuted instantiation: header.c:toupper_c
Unexecuted instantiation: hfile.c:toupper_c
Unexecuted instantiation: hts.c:toupper_c
Unexecuted instantiation: hts_expr.c:toupper_c
Unexecuted instantiation: multipart.c:toupper_c
Unexecuted instantiation: sam.c:toupper_c
Unexecuted instantiation: sam_mods.c:toupper_c
Unexecuted instantiation: textutils.c:toupper_c
Unexecuted instantiation: vcf.c:toupper_c
Unexecuted instantiation: cram_encode.c:toupper_c
Unexecuted instantiation: cram_index.c:toupper_c
Unexecuted instantiation: cram_io.c:toupper_c
Unexecuted instantiation: hfile_libcurl.c:toupper_c
Unexecuted instantiation: hfile_gcs.c:toupper_c
Unexecuted instantiation: hfile_s3.c:toupper_c
Unexecuted instantiation: hfile_s3_write.c:toupper_c
Unexecuted instantiation: bgzf.c:toupper_c
Unexecuted instantiation: faidx.c:toupper_c
Unexecuted instantiation: tbx.c:toupper_c
181
182
/// Copy possibly malicious text data to a buffer
183
/** @param buf     Destination buffer
184
    @param buflen  Size of the destination buffer (>= 4; >= 6 when quotes used)
185
    @param quote   Quote character (or '\0' for no quoting of the output)
186
    @param s       String to be copied
187
    @param len     Length of the input string, or SIZE_MAX to copy until '\0'
188
    @return The destination buffer, @a buf.
189
190
Copies the source text string (escaping any unprintable characters) to the
191
destination buffer. The destination buffer will always be NUL-terminated;
192
the text will be truncated (and "..." appended) if necessary to make it fit.
193
 */
194
const char *hts_strprint(char *buf, size_t buflen, char quote,
195
                         const char *s, size_t len);
196
197
// Faster replacements for strtol, for use when parsing lots of numbers.
198
// Note that these only handle base 10 and do not skip leading whitespace
199
200
/// Convert a string to a signed integer, with overflow detection
201
/** @param[in]  in     Input string
202
    @param[out] end    Returned end pointer
203
    @param[in]  bits   Bits available for the converted value
204
    @param[out] failed Location of overflow flag
205
    @return String value converted to an int64_t
206
207
Converts a signed decimal string to an int64_t.  The string should
208
consist of an optional '+' or '-' sign followed by one or more of
209
the digits 0 to 9.  The output value will be limited to fit in the
210
given number of bits (including the sign bit).  If the value is too big,
211
the largest possible value will be returned and *failed will be set to 1.
212
213
The address of the first character following the converted number will
214
be stored in *end.
215
216
Both end and failed must be non-NULL.
217
 */
218
static inline int64_t hts_str2int(const char *in, char **end, int bits,
219
121M
                                    int *failed) {
220
121M
    uint64_t n = 0, limit = (1ULL << (bits - 1)) - 1;
221
121M
    uint32_t fast = (bits - 1) * 1000 / 3322 + 1; // log(10)/log(2) ~= 3.322
222
121M
    const unsigned char *v = (const unsigned char *) in;
223
121M
    const unsigned int ascii_zero = '0'; // Prevents conversion to signed
224
121M
    unsigned int d;
225
226
121M
    int neg;
227
121M
    switch(*v) {
228
2.71M
    case '-':
229
2.71M
        limit++;
230
2.71M
        neg=1;
231
2.71M
        v++;
232
        // See "dup" comment below
233
10.9M
        while (--fast && *v>='0' && *v<='9')
234
8.24M
            n = n*10 + *v++ - ascii_zero;
235
2.71M
        break;
236
237
12.8k
    case '+':
238
12.8k
        v++;
239
        // fall through
240
241
118M
    default:
242
118M
        neg = 0;
243
        // dup of above.  This is somewhat unstable and mainly for code
244
        // size cheats to prevent instruction cache lines spanning 32-byte
245
        // blocks in the sam_parse_B_vals calling code.  It's been tested
246
        // on gcc7, gcc13, clang10 and clang16 with -O2 and -O3.  While
247
        // not exhaustive, this code duplication gives stable fast results
248
        // while a single copy does not.
249
        // (NB: system was "seq4d", so quite old)
250
125M
        while (--fast && *v>='0' && *v<='9')
251
6.95M
            n = n*10 + *v++ - ascii_zero;
252
118M
        break;
253
121M
    }
254
255
    // NB gcc7 is slow with (unsigned)(*v - ascii_zero) < 10,
256
    // while gcc13 prefers it.
257
121M
    if (*v>='0' && !fast) { // rejects ',' and tab
258
66.5k
        uint64_t limit_d_10 = limit / 10;
259
66.5k
        uint64_t limit_m_10 = limit - 10 * limit_d_10;
260
234k
        while ((d = *v - ascii_zero) < 10) {
261
201k
            if (n < limit_d_10 || (n == limit_d_10 && d <= limit_m_10)) {
262
168k
                n = n*10 + d;
263
168k
                v++;
264
168k
            } else {
265
212k
                do { v++; } while (*v - ascii_zero < 10);
266
33.4k
                n = limit;
267
33.4k
                *failed = 1;
268
33.4k
                break;
269
33.4k
            }
270
201k
        }
271
66.5k
    }
272
273
121M
    *end = (char *)v;
274
275
121M
    return neg ? (int64_t)-n : (int64_t)n;
276
121M
}
Unexecuted instantiation: header.c:hts_str2int
Unexecuted instantiation: hfile.c:hts_str2int
Unexecuted instantiation: hts.c:hts_str2int
Unexecuted instantiation: hts_expr.c:hts_str2int
Unexecuted instantiation: multipart.c:hts_str2int
sam.c:hts_str2int
Line
Count
Source
219
15.1M
                                    int *failed) {
220
15.1M
    uint64_t n = 0, limit = (1ULL << (bits - 1)) - 1;
221
15.1M
    uint32_t fast = (bits - 1) * 1000 / 3322 + 1; // log(10)/log(2) ~= 3.322
222
15.1M
    const unsigned char *v = (const unsigned char *) in;
223
15.1M
    const unsigned int ascii_zero = '0'; // Prevents conversion to signed
224
15.1M
    unsigned int d;
225
226
15.1M
    int neg;
227
15.1M
    switch(*v) {
228
1.74M
    case '-':
229
1.74M
        limit++;
230
1.74M
        neg=1;
231
1.74M
        v++;
232
        // See "dup" comment below
233
7.64M
        while (--fast && *v>='0' && *v<='9')
234
5.89M
            n = n*10 + *v++ - ascii_zero;
235
1.74M
        break;
236
237
2.22k
    case '+':
238
2.22k
        v++;
239
        // fall through
240
241
13.4M
    default:
242
13.4M
        neg = 0;
243
        // dup of above.  This is somewhat unstable and mainly for code
244
        // size cheats to prevent instruction cache lines spanning 32-byte
245
        // blocks in the sam_parse_B_vals calling code.  It's been tested
246
        // on gcc7, gcc13, clang10 and clang16 with -O2 and -O3.  While
247
        // not exhaustive, this code duplication gives stable fast results
248
        // while a single copy does not.
249
        // (NB: system was "seq4d", so quite old)
250
16.6M
        while (--fast && *v>='0' && *v<='9')
251
3.24M
            n = n*10 + *v++ - ascii_zero;
252
13.4M
        break;
253
15.1M
    }
254
255
    // NB gcc7 is slow with (unsigned)(*v - ascii_zero) < 10,
256
    // while gcc13 prefers it.
257
15.1M
    if (*v>='0' && !fast) { // rejects ',' and tab
258
47.3k
        uint64_t limit_d_10 = limit / 10;
259
47.3k
        uint64_t limit_m_10 = limit - 10 * limit_d_10;
260
201k
        while ((d = *v - ascii_zero) < 10) {
261
170k
            if (n < limit_d_10 || (n == limit_d_10 && d <= limit_m_10)) {
262
154k
                n = n*10 + d;
263
154k
                v++;
264
154k
            } else {
265
51.9k
                do { v++; } while (*v - ascii_zero < 10);
266
16.2k
                n = limit;
267
16.2k
                *failed = 1;
268
16.2k
                break;
269
16.2k
            }
270
170k
        }
271
47.3k
    }
272
273
15.1M
    *end = (char *)v;
274
275
15.1M
    return neg ? (int64_t)-n : (int64_t)n;
276
15.1M
}
Unexecuted instantiation: sam_mods.c:hts_str2int
Unexecuted instantiation: textutils.c:hts_str2int
vcf.c:hts_str2int
Line
Count
Source
219
106M
                                    int *failed) {
220
106M
    uint64_t n = 0, limit = (1ULL << (bits - 1)) - 1;
221
106M
    uint32_t fast = (bits - 1) * 1000 / 3322 + 1; // log(10)/log(2) ~= 3.322
222
106M
    const unsigned char *v = (const unsigned char *) in;
223
106M
    const unsigned int ascii_zero = '0'; // Prevents conversion to signed
224
106M
    unsigned int d;
225
226
106M
    int neg;
227
106M
    switch(*v) {
228
967k
    case '-':
229
967k
        limit++;
230
967k
        neg=1;
231
967k
        v++;
232
        // See "dup" comment below
233
3.31M
        while (--fast && *v>='0' && *v<='9')
234
2.34M
            n = n*10 + *v++ - ascii_zero;
235
967k
        break;
236
237
10.5k
    case '+':
238
10.5k
        v++;
239
        // fall through
240
241
105M
    default:
242
105M
        neg = 0;
243
        // dup of above.  This is somewhat unstable and mainly for code
244
        // size cheats to prevent instruction cache lines spanning 32-byte
245
        // blocks in the sam_parse_B_vals calling code.  It's been tested
246
        // on gcc7, gcc13, clang10 and clang16 with -O2 and -O3.  While
247
        // not exhaustive, this code duplication gives stable fast results
248
        // while a single copy does not.
249
        // (NB: system was "seq4d", so quite old)
250
109M
        while (--fast && *v>='0' && *v<='9')
251
3.70M
            n = n*10 + *v++ - ascii_zero;
252
105M
        break;
253
106M
    }
254
255
    // NB gcc7 is slow with (unsigned)(*v - ascii_zero) < 10,
256
    // while gcc13 prefers it.
257
106M
    if (*v>='0' && !fast) { // rejects ',' and tab
258
19.2k
        uint64_t limit_d_10 = limit / 10;
259
19.2k
        uint64_t limit_m_10 = limit - 10 * limit_d_10;
260
33.0k
        while ((d = *v - ascii_zero) < 10) {
261
31.0k
            if (n < limit_d_10 || (n == limit_d_10 && d <= limit_m_10)) {
262
13.8k
                n = n*10 + d;
263
13.8k
                v++;
264
17.2k
            } else {
265
160k
                do { v++; } while (*v - ascii_zero < 10);
266
17.2k
                n = limit;
267
17.2k
                *failed = 1;
268
17.2k
                break;
269
17.2k
            }
270
31.0k
        }
271
19.2k
    }
272
273
106M
    *end = (char *)v;
274
275
106M
    return neg ? (int64_t)-n : (int64_t)n;
276
106M
}
Unexecuted instantiation: cram_encode.c:hts_str2int
Unexecuted instantiation: cram_index.c:hts_str2int
Unexecuted instantiation: cram_io.c:hts_str2int
Unexecuted instantiation: hfile_libcurl.c:hts_str2int
Unexecuted instantiation: hfile_gcs.c:hts_str2int
Unexecuted instantiation: hfile_s3.c:hts_str2int
Unexecuted instantiation: hfile_s3_write.c:hts_str2int
Unexecuted instantiation: bgzf.c:hts_str2int
Unexecuted instantiation: faidx.c:hts_str2int
Unexecuted instantiation: tbx.c:hts_str2int
277
278
/// Convert a string to an unsigned integer, with overflow detection
279
/** @param[in]  in     Input string
280
    @param[out] end    Returned end pointer
281
    @param[in]  bits   Bits available for the converted value
282
    @param[out] failed Location of overflow flag
283
    @return String value converted to a uint64_t
284
285
Converts an unsigned decimal string to a uint64_t.  The string should
286
consist of an optional '+' sign followed by one or more of the digits 0
287
to 9.  The output value will be limited to fit in the given number of bits.
288
If the value is too big, the largest possible value will be returned
289
and *failed will be set to 1.
290
291
The address of the first character following the converted number will
292
be stored in *end.
293
294
Both end and failed must be non-NULL.
295
 */
296
297
static inline uint64_t hts_str2uint(const char *in, char **end, int bits,
298
7.96M
                                    int *failed) {
299
7.96M
    uint64_t n = 0, limit = (bits < 64 ? (1ULL << bits) : 0) - 1;
300
7.96M
    const unsigned char *v = (const unsigned char *) in;
301
7.96M
    const unsigned int ascii_zero = '0'; // Prevents conversion to signed
302
7.96M
    uint32_t fast = bits * 1000 / 3322 + 1; // log(10)/log(2) ~= 3.322
303
7.96M
    unsigned int d;
304
305
7.96M
    if (*v == '+')
306
1.27k
        v++;
307
308
11.2M
    while (--fast && *v>='0' && *v<='9')
309
3.33M
        n = n*10 + *v++ - ascii_zero;
310
311
7.96M
    if ((unsigned)(*v - ascii_zero) < 10 && !fast) {
312
215k
        uint64_t limit_d_10 = limit / 10;
313
215k
        uint64_t limit_m_10 = limit - 10 * limit_d_10;
314
497k
        while ((d = *v - ascii_zero) < 10) {
315
392k
            if (n < limit_d_10 || (n == limit_d_10 && d <= limit_m_10)) {
316
281k
                n = n*10 + d;
317
281k
                v++;
318
281k
            } else {
319
185k
                do { v++; } while (*v - ascii_zero < 10);
320
110k
                n = limit;
321
110k
                *failed = 1;
322
110k
                break;
323
110k
            }
324
392k
        }
325
215k
    }
326
327
7.96M
    *end = (char *)v;
328
7.96M
    return n;
329
7.96M
}
Unexecuted instantiation: header.c:hts_str2uint
Unexecuted instantiation: hfile.c:hts_str2uint
Unexecuted instantiation: hts.c:hts_str2uint
Unexecuted instantiation: hts_expr.c:hts_str2uint
Unexecuted instantiation: multipart.c:hts_str2uint
sam.c:hts_str2uint
Line
Count
Source
298
7.91M
                                    int *failed) {
299
7.91M
    uint64_t n = 0, limit = (bits < 64 ? (1ULL << bits) : 0) - 1;
300
7.91M
    const unsigned char *v = (const unsigned char *) in;
301
7.91M
    const unsigned int ascii_zero = '0'; // Prevents conversion to signed
302
7.91M
    uint32_t fast = bits * 1000 / 3322 + 1; // log(10)/log(2) ~= 3.322
303
7.91M
    unsigned int d;
304
305
7.91M
    if (*v == '+')
306
1.21k
        v++;
307
308
11.1M
    while (--fast && *v>='0' && *v<='9')
309
3.18M
        n = n*10 + *v++ - ascii_zero;
310
311
7.91M
    if ((unsigned)(*v - ascii_zero) < 10 && !fast) {
312
214k
        uint64_t limit_d_10 = limit / 10;
313
214k
        uint64_t limit_m_10 = limit - 10 * limit_d_10;
314
480k
        while ((d = *v - ascii_zero) < 10) {
315
376k
            if (n < limit_d_10 || (n == limit_d_10 && d <= limit_m_10)) {
316
265k
                n = n*10 + d;
317
265k
                v++;
318
265k
            } else {
319
166k
                do { v++; } while (*v - ascii_zero < 10);
320
110k
                n = limit;
321
110k
                *failed = 1;
322
110k
                break;
323
110k
            }
324
376k
        }
325
214k
    }
326
327
7.91M
    *end = (char *)v;
328
7.91M
    return n;
329
7.91M
}
Unexecuted instantiation: sam_mods.c:hts_str2uint
Unexecuted instantiation: textutils.c:hts_str2uint
vcf.c:hts_str2uint
Line
Count
Source
298
44.7k
                                    int *failed) {
299
44.7k
    uint64_t n = 0, limit = (bits < 64 ? (1ULL << bits) : 0) - 1;
300
44.7k
    const unsigned char *v = (const unsigned char *) in;
301
44.7k
    const unsigned int ascii_zero = '0'; // Prevents conversion to signed
302
44.7k
    uint32_t fast = bits * 1000 / 3322 + 1; // log(10)/log(2) ~= 3.322
303
44.7k
    unsigned int d;
304
305
44.7k
    if (*v == '+')
306
61
        v++;
307
308
182k
    while (--fast && *v>='0' && *v<='9')
309
138k
        n = n*10 + *v++ - ascii_zero;
310
311
44.7k
    if ((unsigned)(*v - ascii_zero) < 10 && !fast) {
312
904
        uint64_t limit_d_10 = limit / 10;
313
904
        uint64_t limit_m_10 = limit - 10 * limit_d_10;
314
15.7k
        while ((d = *v - ascii_zero) < 10) {
315
15.1k
            if (n < limit_d_10 || (n == limit_d_10 && d <= limit_m_10)) {
316
14.8k
                n = n*10 + d;
317
14.8k
                v++;
318
14.8k
            } else {
319
15.5k
                do { v++; } while (*v - ascii_zero < 10);
320
233
                n = limit;
321
233
                *failed = 1;
322
233
                break;
323
233
            }
324
15.1k
        }
325
904
    }
326
327
44.7k
    *end = (char *)v;
328
44.7k
    return n;
329
44.7k
}
cram_encode.c:hts_str2uint
Line
Count
Source
298
2.07k
                                    int *failed) {
299
2.07k
    uint64_t n = 0, limit = (bits < 64 ? (1ULL << bits) : 0) - 1;
300
2.07k
    const unsigned char *v = (const unsigned char *) in;
301
2.07k
    const unsigned int ascii_zero = '0'; // Prevents conversion to signed
302
2.07k
    uint32_t fast = bits * 1000 / 3322 + 1; // log(10)/log(2) ~= 3.322
303
2.07k
    unsigned int d;
304
305
2.07k
    if (*v == '+')
306
0
        v++;
307
308
10.2k
    while (--fast && *v>='0' && *v<='9')
309
8.22k
        n = n*10 + *v++ - ascii_zero;
310
311
2.07k
    if ((unsigned)(*v - ascii_zero) < 10 && !fast) {
312
431
        uint64_t limit_d_10 = limit / 10;
313
431
        uint64_t limit_m_10 = limit - 10 * limit_d_10;
314
1.33k
        while ((d = *v - ascii_zero) < 10) {
315
1.20k
            if (n < limit_d_10 || (n == limit_d_10 && d <= limit_m_10)) {
316
903
                n = n*10 + d;
317
903
                v++;
318
903
            } else {
319
3.01k
                do { v++; } while (*v - ascii_zero < 10);
320
298
                n = limit;
321
298
                *failed = 1;
322
298
                break;
323
298
            }
324
1.20k
        }
325
431
    }
326
327
2.07k
    *end = (char *)v;
328
2.07k
    return n;
329
2.07k
}
Unexecuted instantiation: cram_index.c:hts_str2uint
Unexecuted instantiation: cram_io.c:hts_str2uint
Unexecuted instantiation: hfile_libcurl.c:hts_str2uint
Unexecuted instantiation: hfile_gcs.c:hts_str2uint
Unexecuted instantiation: hfile_s3.c:hts_str2uint
Unexecuted instantiation: hfile_s3_write.c:hts_str2uint
Unexecuted instantiation: bgzf.c:hts_str2uint
Unexecuted instantiation: faidx.c:hts_str2uint
Unexecuted instantiation: tbx.c:hts_str2uint
330
331
/// Convert a string to a double, with overflow detection
332
/** @param[in]  in     Input string
333
    @param[out] end    Returned end pointer
334
    @param[out] failed Location of overflow flag
335
    @return String value converted to a double
336
337
Converts a floating point value string to a double.  The string should
338
have the format [+-]?[0-9]*[.]?[0-9]* with at least one and no more than 15
339
digits.  Strings that do not match (inf, nan, values with exponents) will
340
be passed on to strtod() for processing.
341
342
If the value is too big, the largest possible value will be returned;
343
if it is too small to be represented in a double zero will be returned.
344
In both cases errno will be set to ERANGE.
345
346
If no characters could be converted, *failed will be set to 1.
347
348
The address of the first character following the converted number will
349
be stored in *end.
350
351
Both end and failed must be non-NULL.
352
 */
353
354
78.0M
static inline double hts_str2dbl(const char *in, char **end, int *failed) {
355
78.0M
    uint64_t n = 0;
356
78.0M
    int max_len = 15;
357
78.0M
    const unsigned char *v = (const unsigned char *) in;
358
78.0M
    const unsigned int ascii_zero = '0'; // Prevents conversion to signed
359
78.0M
    int neg = 0, point = -1;
360
78.0M
    double d;
361
78.0M
    static double D[] = {1,1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7,
362
78.0M
                         1e8, 1e9, 1e10,1e11,1e12,1e13,1e14,1e15,
363
78.0M
                         1e16,1e17,1e18,1e19,1e20};
364
365
78.0M
    while (isspace(*v))
366
35.4k
        v++;
367
368
78.0M
    if (*v == '-') {
369
644k
        neg = 1;
370
644k
        v++;
371
77.3M
    } else if (*v == '+') {
372
1.21k
        v++;
373
1.21k
    }
374
375
78.0M
    switch(*v) {
376
471k
    case '1': case '2': case '3': case '4':
377
894k
    case '5': case '6': case '7': case '8': case '9':
378
894k
        break;
379
380
59.3k
    case '0':
381
59.3k
        if (v[1] != 'x' && v[1] != 'X') break;
382
        // else fall through - hex number
383
384
77.0M
    default:
385
        // Non numbers, like NaN, Inf
386
77.0M
        d = strtod(in, end);
387
77.0M
        if (*end == in)
388
76.9M
            *failed = 1;
389
77.0M
        return d;
390
78.0M
    }
391
392
1.01M
    while (*v == '0') ++v;
393
394
953k
    const unsigned char *start = v;
395
396
5.12M
    while (--max_len && *v>='0' && *v<='9')
397
4.16M
        n = n*10 + *v++ - ascii_zero;
398
953k
    if (max_len && *v == '.') {
399
20.1k
        point = v - start;
400
20.1k
        v++;
401
53.2k
        while (--max_len && *v>='0' && *v<='9')
402
33.0k
            n = n*10 + *v++ - ascii_zero;
403
20.1k
    }
404
953k
    if (point < 0)
405
933k
        point = v - start;
406
407
    // Outside the scope of this quick and dirty parser.
408
953k
    if (!max_len || *v == 'e' || *v == 'E') {
409
10.6k
        d = strtod(in, end);
410
10.6k
        if (*end == in)
411
0
            *failed = 1;
412
10.6k
        return d;
413
10.6k
    }
414
415
943k
    *end = (char *)v;
416
943k
    d = n / D[v - start - point];
417
418
943k
    return neg ? -d : d;
419
953k
}
Unexecuted instantiation: header.c:hts_str2dbl
Unexecuted instantiation: hfile.c:hts_str2dbl
Unexecuted instantiation: hts.c:hts_str2dbl
Unexecuted instantiation: hts_expr.c:hts_str2dbl
Unexecuted instantiation: multipart.c:hts_str2dbl
Unexecuted instantiation: sam.c:hts_str2dbl
Unexecuted instantiation: sam_mods.c:hts_str2dbl
Unexecuted instantiation: textutils.c:hts_str2dbl
vcf.c:hts_str2dbl
Line
Count
Source
354
78.0M
static inline double hts_str2dbl(const char *in, char **end, int *failed) {
355
78.0M
    uint64_t n = 0;
356
78.0M
    int max_len = 15;
357
78.0M
    const unsigned char *v = (const unsigned char *) in;
358
78.0M
    const unsigned int ascii_zero = '0'; // Prevents conversion to signed
359
78.0M
    int neg = 0, point = -1;
360
78.0M
    double d;
361
78.0M
    static double D[] = {1,1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7,
362
78.0M
                         1e8, 1e9, 1e10,1e11,1e12,1e13,1e14,1e15,
363
78.0M
                         1e16,1e17,1e18,1e19,1e20};
364
365
78.0M
    while (isspace(*v))
366
35.4k
        v++;
367
368
78.0M
    if (*v == '-') {
369
644k
        neg = 1;
370
644k
        v++;
371
77.3M
    } else if (*v == '+') {
372
1.21k
        v++;
373
1.21k
    }
374
375
78.0M
    switch(*v) {
376
471k
    case '1': case '2': case '3': case '4':
377
894k
    case '5': case '6': case '7': case '8': case '9':
378
894k
        break;
379
380
59.3k
    case '0':
381
59.3k
        if (v[1] != 'x' && v[1] != 'X') break;
382
        // else fall through - hex number
383
384
77.0M
    default:
385
        // Non numbers, like NaN, Inf
386
77.0M
        d = strtod(in, end);
387
77.0M
        if (*end == in)
388
76.9M
            *failed = 1;
389
77.0M
        return d;
390
78.0M
    }
391
392
1.01M
    while (*v == '0') ++v;
393
394
953k
    const unsigned char *start = v;
395
396
5.12M
    while (--max_len && *v>='0' && *v<='9')
397
4.16M
        n = n*10 + *v++ - ascii_zero;
398
953k
    if (max_len && *v == '.') {
399
20.1k
        point = v - start;
400
20.1k
        v++;
401
53.2k
        while (--max_len && *v>='0' && *v<='9')
402
33.0k
            n = n*10 + *v++ - ascii_zero;
403
20.1k
    }
404
953k
    if (point < 0)
405
933k
        point = v - start;
406
407
    // Outside the scope of this quick and dirty parser.
408
953k
    if (!max_len || *v == 'e' || *v == 'E') {
409
10.6k
        d = strtod(in, end);
410
10.6k
        if (*end == in)
411
0
            *failed = 1;
412
10.6k
        return d;
413
10.6k
    }
414
415
943k
    *end = (char *)v;
416
943k
    d = n / D[v - start - point];
417
418
943k
    return neg ? -d : d;
419
953k
}
Unexecuted instantiation: cram_encode.c:hts_str2dbl
Unexecuted instantiation: cram_index.c:hts_str2dbl
Unexecuted instantiation: cram_io.c:hts_str2dbl
Unexecuted instantiation: hfile_libcurl.c:hts_str2dbl
Unexecuted instantiation: hfile_gcs.c:hts_str2dbl
Unexecuted instantiation: hfile_s3.c:hts_str2dbl
Unexecuted instantiation: hfile_s3_write.c:hts_str2dbl
Unexecuted instantiation: bgzf.c:hts_str2dbl
Unexecuted instantiation: faidx.c:hts_str2dbl
Unexecuted instantiation: tbx.c:hts_str2dbl
420
421
422
#ifdef __cplusplus
423
}
424
#endif
425
426
#endif