Coverage Report

Created: 2026-01-17 06:40

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/htslib/kstring.c
Line
Count
Source
1
/* The MIT License
2
3
   Copyright (C) 2011 by Attractive Chaos <attractor@live.co.uk>
4
   Copyright (C) 2013-2018, 2020-2021, 2023, 2025 Genome Research Ltd.
5
6
   Permission is hereby granted, free of charge, to any person obtaining
7
   a copy of this software and associated documentation files (the
8
   "Software"), to deal in the Software without restriction, including
9
   without limitation the rights to use, copy, modify, merge, publish,
10
   distribute, sublicense, and/or sell copies of the Software, and to
11
   permit persons to whom the Software is furnished to do so, subject to
12
   the following conditions:
13
14
   The above copyright notice and this permission notice shall be
15
   included in all copies or substantial portions of the Software.
16
17
   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
21
   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
22
   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23
   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24
   SOFTWARE.
25
*/
26
27
#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
28
#include <config.h>
29
30
#include <stdarg.h>
31
#include <stdio.h>
32
#include <ctype.h>
33
#include <string.h>
34
#include <stdint.h>
35
#include <math.h>
36
#include "htslib/kstring.h"
37
38
623k
int kputd(double d, kstring_t *s) {
39
623k
  int len = 0;
40
623k
  char buf[21], *cp = buf+20, *ep;
41
623k
  if (d == 0) {
42
73.7k
    if (signbit(d)) {
43
5.15k
      kputsn("-0",2,s);
44
5.15k
      return 2;
45
68.6k
    } else {
46
68.6k
      kputsn("0",1,s);
47
68.6k
      return 1;
48
68.6k
    }
49
73.7k
  }
50
51
549k
  if (d < 0) {
52
145k
    kputc('-',s);
53
145k
    len = 1;
54
145k
    d=-d;
55
145k
  }
56
549k
  if (!(d >= 0.0001 && d <= 999999)) {
57
144k
    if (ks_resize(s, s->l + 50) < 0)
58
0
      return EOF;
59
    // We let stdio handle the exponent cases
60
144k
    int s2 = snprintf(s->s + s->l, s->m - s->l, "%g", d);
61
144k
    len += s2;
62
144k
    s->l += s2;
63
144k
    return len;
64
144k
  }
65
66
  // Correction for rounding - rather ugly
67
  // Optimised for small numbers.
68
69
405k
  uint32_t i;
70
405k
  if (d<0.001)         i = rint(d*1000000000), cp -= 1;
71
405k
  else if (d < 0.01)   i = rint(d*100000000),  cp -= 2;
72
394k
  else if (d < 0.1)    i = rint(d*10000000),   cp -= 3;
73
374k
  else if (d < 1)      i = rint(d*1000000),    cp -= 4;
74
357k
  else if (d < 10)     i = rint(d*100000),     cp -= 5;
75
253k
  else if (d < 100)    i = rint(d*10000),      cp -= 6;
76
208k
  else if (d < 1000)   i = rint(d*1000),       cp -= 7;
77
77.2k
  else if (d < 10000)  i = rint(d*100),        cp -= 8;
78
67.3k
  else if (d < 100000) i = rint(d*10),         cp -= 9;
79
49.0k
  else                 i = rint(d),            cp -= 10;
80
81
  // integer i is always 6 digits, so print it 2 at a time.
82
405k
  static const char kputuw_dig2r[] =
83
405k
    "00010203040506070809"
84
405k
    "10111213141516171819"
85
405k
    "20212223242526272829"
86
405k
    "30313233343536373839"
87
405k
    "40414243444546474849"
88
405k
    "50515253545556575859"
89
405k
    "60616263646566676869"
90
405k
    "70717273747576777879"
91
405k
    "80818283848586878889"
92
405k
    "90919293949596979899";
93
94
405k
  memcpy(cp-=2, &kputuw_dig2r[2*(i%100)], 2); i /= 100;
95
405k
  memcpy(cp-=2, &kputuw_dig2r[2*(i%100)], 2); i /= 100;
96
405k
  memcpy(cp-=2, &kputuw_dig2r[2*(i%100)], 2);
97
98
  // Except when it rounds up (d=0.009999999 is i=1000000)
99
405k
  if (i >= 100)
100
11.2k
    *--cp = '0' + (i/100);
101
102
103
405k
  int p = buf+20-cp;
104
405k
  if (p <= 10) { /* d < 1 */
105
    // 0.00123 is 123, so add leading zeros and 0.
106
48.0k
    ep = cp+5; // 6 precision
107
78.5k
    while (p < 10) { // aka d < 1
108
30.4k
      *--cp = '0';
109
30.4k
      p++;
110
30.4k
    }
111
48.0k
    *--cp = '.';
112
48.0k
    *--cp = '0';
113
357k
  } else {
114
    // 123.001 is 123001 with p==13, so move 123 down and add "."
115
    // Equiv to memmove(cp-1, cp, p-10); cp--;
116
357k
    char *xp = --cp;
117
357k
    ep = cp+6;
118
1.37M
    while (p > 10) {
119
1.01M
      xp[0] = xp[1];
120
1.01M
      xp++;
121
1.01M
      p--;
122
1.01M
    }
123
357k
    xp[0] = '.';
124
357k
  }
125
126
  // Cull trailing zeros
127
1.76M
  while (*ep == '0' && ep > cp)
128
1.35M
    ep--;
129
130
  // End can be 1 out due to the mostly-6 but occasionally 7 (i==1) case.
131
  // Also code with "123." which should be "123"
132
405k
  if (*ep && *ep != '.')
133
52.0k
    ep++;
134
405k
  *ep = 0;
135
136
405k
  int sl = ep-cp;
137
405k
  len += sl;
138
405k
  kputsn(cp, sl, s);
139
405k
  return len;
140
549k
}
141
142
int kvsprintf(kstring_t *s, const char *fmt, va_list ap)
143
100k
{
144
100k
  va_list args;
145
100k
  int l;
146
100k
  va_copy(args, ap);
147
148
100k
  if (fmt[0] == '%' && fmt[1] == 'g' && fmt[2] == 0) {
149
0
    double d = va_arg(args, double);
150
0
    l = kputd(d, s);
151
0
    va_end(args);
152
0
    return l;
153
0
  }
154
155
100k
  if (!s->s) {
156
63.0k
    const size_t sz = 64;
157
63.0k
    s->s = malloc(sz);
158
63.0k
    if (!s->s)
159
0
      return -1;
160
63.0k
    s->m = sz;
161
63.0k
    s->l = 0;
162
63.0k
  }
163
164
100k
  l = vsnprintf(s->s + s->l, s->m - s->l, fmt, args); // This line does not work with glibc 2.0. See `man snprintf'.
165
100k
  va_end(args);
166
100k
  if (l + 1 > s->m - s->l) {
167
37.7k
    if (ks_resize(s, s->l + l + 2) < 0)
168
0
      return -1;
169
37.7k
    va_copy(args, ap);
170
37.7k
    l = vsnprintf(s->s + s->l, s->m - s->l, fmt, args);
171
37.7k
    va_end(args);
172
37.7k
  }
173
100k
  s->l += l;
174
100k
  return l;
175
100k
}
176
177
int ksprintf(kstring_t *s, const char *fmt, ...)
178
100k
{
179
100k
  va_list ap;
180
100k
  int l;
181
100k
  va_start(ap, fmt);
182
100k
  l = kvsprintf(s, fmt, ap);
183
100k
  va_end(ap);
184
100k
  return l;
185
100k
}
186
187
char *kstrtok(const char *str, const char *sep_in, ks_tokaux_t *aux)
188
1.61M
{
189
1.61M
  const unsigned char *p, *start, *sep = (unsigned char *) sep_in;
190
1.61M
  if (sep) { // set up the table
191
50.4k
    if (str == 0 && aux->finished) return 0; // no need to set up if we have finished
192
50.4k
    aux->finished = 0;
193
50.4k
    if (sep[0] && sep[1]) {
194
0
      aux->sep = -1;
195
0
      aux->tab[0] = aux->tab[1] = aux->tab[2] = aux->tab[3] = 0;
196
0
      for (p = sep; *p; ++p) aux->tab[*p>>6] |= 1ull<<(*p&0x3f);
197
50.4k
    } else aux->sep = sep[0];
198
50.4k
  }
199
1.61M
  if (aux->finished) return 0;
200
1.58M
  else if (str) start = (unsigned char *) str, aux->finished = 0;
201
1.53M
  else start = (unsigned char *) aux->p + 1;
202
1.58M
  if (aux->sep < 0) {
203
0
    for (p = start; *p; ++p)
204
0
      if (aux->tab[*p>>6]>>(*p&0x3f)&1) break;
205
1.58M
  } else {
206
    // Using strchr is fast for next token, but slower for
207
    // last token due to extra pass from strlen.  Overall
208
    // on a VCF parse this func was 146% faster with // strchr.
209
    // Equiv to:
210
    // for (p = start; *p; ++p) if (*p == aux->sep) break;
211
212
    // NB: We could use strchrnul() here from glibc if detected,
213
    // which is ~40% faster again, but it's not so portable.
214
    // i.e.   p = (uint8_t *)strchrnul((char *)start, aux->sep);
215
1.58M
    uint8_t *p2 = (uint8_t *)strchr((char *)start, aux->sep);
216
1.58M
    p = p2 ? p2 : start + strlen((char *)start);
217
1.58M
  }
218
1.58M
  aux->p = (const char *) p; // end of token
219
1.58M
  if (*p == 0) aux->finished = 1; // no more tokens
220
1.58M
  return (char*)start;
221
1.61M
}
222
223
// s MUST BE a null terminated string; l = strlen(s)
224
int ksplit_core(char *s, int delimiter, int *_max, int **_offsets)
225
0
{
226
0
  int i, n, max, last_char, last_start, *offsets, l;
227
0
  n = 0; max = *_max; offsets = *_offsets;
228
0
  l = strlen(s);
229
230
0
#define __ksplit_aux do {           \
231
0
    if (_offsets) {           \
232
0
      s[i] = 0;         \
233
0
      if (n == max) {         \
234
0
        int *tmp;       \
235
0
        max = max? max<<1 : 2;     \
236
0
        if ((tmp = (int*)realloc(offsets, sizeof(int) * max))) {  \
237
0
          offsets = tmp;      \
238
0
        } else {       \
239
0
          free(offsets);      \
240
0
          *_offsets = NULL;   \
241
0
          return 0;     \
242
0
        }          \
243
0
      }           \
244
0
      offsets[n++] = last_start;      \
245
0
    } else ++n;           \
246
0
  } while (0)
247
248
0
  for (i = 0, last_char = last_start = 0; i <= l; ++i) {
249
0
    if (delimiter == 0) {
250
0
      if (isspace((int)((unsigned char) s[i])) || s[i] == 0) {
251
0
        if (isgraph(last_char))
252
0
                    __ksplit_aux; // the end of a field
253
0
      } else {
254
0
        if (isspace(last_char) || last_char == 0)
255
0
                    last_start = i;
256
0
      }
257
0
    } else {
258
0
      if (s[i] == delimiter || s[i] == 0) {
259
0
        if (last_char != 0 && last_char != delimiter) __ksplit_aux; // the end of a field
260
0
      } else {
261
0
        if (last_char == delimiter || last_char == 0) last_start = i;
262
0
      }
263
0
    }
264
0
    last_char = (int)((unsigned char)s[i]);
265
0
  }
266
0
  *_max = max; *_offsets = offsets;
267
0
  return n;
268
0
}
269
270
int kgetline(kstring_t *s, kgets_func *fgets_fn, void *fp)
271
0
{
272
0
  size_t l0 = s->l;
273
274
0
  while (s->l == l0 || s->s[s->l-1] != '\n') {
275
0
    if (s->m - s->l < 200) {
276
0
      if (ks_resize(s, s->m + 200) < 0)
277
0
        return EOF;
278
0
    }
279
0
    if (fgets_fn(s->s + s->l, s->m - s->l, fp) == NULL) break;
280
0
    s->l += strlen(s->s + s->l);
281
0
  }
282
283
0
  if (s->l == l0) return EOF;
284
285
0
  if (s->l > l0 && s->s[s->l-1] == '\n') {
286
0
    s->l--;
287
0
    if (s->l > l0 && s->s[s->l-1] == '\r') s->l--;
288
0
  }
289
0
  s->s[s->l] = '\0';
290
0
  return 0;
291
0
}
292
293
int kgetline2(kstring_t *s, kgets_func2 *fgets_fn, void *fp)
294
1.10M
{
295
1.10M
  size_t l0 = s->l;
296
297
2.22M
  while (s->l == l0 || s->s[s->l-1] != '\n') {
298
1.12M
    if (s->m - s->l < 200) {
299
      // We return EOF for both EOF and error and the caller
300
      // needs to check for errors in fp, and we haven't
301
      // even got there yet.
302
      //
303
      // The only way of propagating memory errors is to
304
      // deliberately call something that we know triggers
305
      // and error so fp is also set.  This works for
306
      // hgets, but not for gets where reading <= 0 bytes
307
      // isn't an error.
308
543k
      if (ks_resize(s, s->m + 200) < 0) {
309
0
        fgets_fn(s->s + s->l, 0, fp);
310
0
        return EOF;
311
0
      }
312
543k
    }
313
1.12M
    ssize_t len = fgets_fn(s->s + s->l, s->m - s->l, fp);
314
1.12M
    if (len <= 0) break;
315
1.11M
    s->l += len;
316
1.11M
  }
317
318
1.10M
  if (s->l == l0) return EOF;
319
320
1.10M
  if (s->l > l0 && s->s[s->l-1] == '\n') {
321
1.10M
    s->l--;
322
1.10M
    if (s->l > l0 && s->s[s->l-1] == '\r') s->l--;
323
1.10M
  }
324
1.10M
  s->s[s->l] = '\0';
325
1.10M
  return 0;
326
1.10M
}
327
328
/**********************
329
 * Boyer-Moore search *
330
 **********************/
331
332
typedef unsigned char ubyte_t;
333
334
// reference: http://www-igm.univ-mlv.fr/~lecroq/string/node14.html
335
static int *ksBM_prep(const ubyte_t *pat, int m)
336
183
{
337
183
  int i, *suff, *prep, *bmGs, *bmBc;
338
183
  prep = (int*)calloc(m + 256, sizeof(int));
339
183
    if (!prep) return NULL;
340
183
  bmGs = prep; bmBc = prep + m;
341
183
  { // preBmBc()
342
47.0k
    for (i = 0; i < 256; ++i) bmBc[i] = m;
343
3.64k
    for (i = 0; i < m - 1; ++i) bmBc[pat[i]] = m - i - 1;
344
183
  }
345
183
  suff = (int*)calloc(m, sizeof(int));
346
183
    if (!suff) { free(prep); return NULL; }
347
183
  { // suffixes()
348
183
    int f = 0, g;
349
183
    suff[m - 1] = m;
350
183
    g = m - 1;
351
3.64k
    for (i = m - 2; i >= 0; --i) {
352
3.46k
      if (i > g && suff[i + m - 1 - f] < i - g)
353
0
        suff[i] = suff[i + m - 1 - f];
354
3.46k
      else {
355
3.46k
        if (i < g) g = i;
356
3.46k
        f = i;
357
3.52k
        while (g >= 0 && pat[g] == pat[g + m - 1 - f]) --g;
358
3.46k
        suff[i] = f - g;
359
3.46k
      }
360
3.46k
    }
361
183
  }
362
183
  { // preBmGs()
363
183
    int j = 0;
364
3.83k
    for (i = 0; i < m; ++i) bmGs[i] = m;
365
3.83k
    for (i = m - 1; i >= 0; --i)
366
3.64k
      if (suff[i] == i + 1)
367
183
        for (; j < m - 1 - i; ++j)
368
0
          if (bmGs[j] == m)
369
0
            bmGs[j] = m - 1 - i;
370
3.64k
    for (i = 0; i <= m - 2; ++i)
371
3.46k
      bmGs[m - 1 - suff[i]] = m - 1 - i;
372
183
  }
373
183
  free(suff);
374
183
  return prep;
375
183
}
376
377
void *kmemmem(const void *_str, int n, const void *_pat, int m, int **_prep)
378
183
{
379
183
  int i, j, *prep = 0, *bmGs, *bmBc;
380
183
  const ubyte_t *str, *pat;
381
183
  str = (const ubyte_t*)_str; pat = (const ubyte_t*)_pat;
382
183
  if (_prep && *_prep) {
383
0
    prep = *_prep;
384
183
  } else {
385
183
    prep = ksBM_prep(pat, m);
386
183
    if (!prep) return NULL;
387
183
    if (_prep)
388
0
      *_prep = prep;
389
183
  }
390
183
  bmGs = prep; bmBc = prep + m;
391
183
  j = 0;
392
29.2M
  while (j <= n - m) {
393
29.2M
    for (i = m - 1; i >= 0 && pat[i] == str[i+j]; --i);
394
29.2M
    if (i >= 0) {
395
29.2M
      int max = bmBc[str[i+j]] - m + 1 + i;
396
29.2M
      if (max < bmGs[i]) max = bmGs[i];
397
29.2M
      j += max;
398
29.2M
    } else {
399
6
      if (_prep == 0) free(prep);
400
6
      return (void*)(str + j);
401
6
    }
402
29.2M
  }
403
177
  if (_prep == 0) free(prep);
404
177
  return 0;
405
183
}
406
407
char *kstrstr(const char *str, const char *pat, int **_prep)
408
0
{
409
0
  return (char*)kmemmem(str, strlen(str), pat, strlen(pat), _prep);
410
0
}
411
412
char *kstrnstr(const char *str, const char *pat, int n, int **_prep)
413
0
{
414
0
  return (char*)kmemmem(str, n, pat, strlen(pat), _prep);
415
0
}
416
417
/***********************
418
 * The main() function *
419
 ***********************/
420
421
#ifdef KSTRING_MAIN
422
#include <stdio.h>
423
int main()
424
{
425
  kstring_t *s;
426
  int *fields, n, i;
427
  ks_tokaux_t aux;
428
  char *p;
429
  s = (kstring_t*)calloc(1, sizeof(kstring_t));
430
  // test ksprintf()
431
  ksprintf(s, " abcdefg:    %d ", 100);
432
  printf("'%s'\n", s->s);
433
  // test ksplit()
434
  fields = ksplit(s, 0, &n);
435
  for (i = 0; i < n; ++i)
436
    printf("field[%d] = '%s'\n", i, s->s + fields[i]);
437
  // test kstrtok()
438
  s->l = 0;
439
  for (p = kstrtok("ab:cde:fg/hij::k", ":/", &aux); p; p = kstrtok(0, 0, &aux)) {
440
    kputsn(p, aux.p - p, s);
441
    kputc('\n', s);
442
  }
443
  printf("%s", s->s);
444
  // free
445
  free(s->s); free(s); free(fields);
446
447
  {
448
    static char *str = "abcdefgcdgcagtcakcdcd";
449
    static char *pat = "cd";
450
    char *ret, *s = str;
451
    int *prep = 0;
452
    while ((ret = kstrstr(s, pat, &prep)) != 0) {
453
      printf("match: %s\n", ret);
454
      s = ret + prep[0];
455
    }
456
    free(prep);
457
  }
458
  return 0;
459
}
460
#endif