Coverage Report

Created: 2023-01-17 06:24

/src/htslib/kstring.c
Line
Count
Source (jump to first uncovered line)
1
/* The MIT License
2
3
   Copyright (C) 2011 by Attractive Chaos <attractor@live.co.uk>
4
   Copyright (C) 2013-2018, 2020-2021 Genome Research Ltd.
5
6
   Permission is hereby granted, free of charge, to any person obtaining
7
   a copy of this software and associated documentation files (the
8
   "Software"), to deal in the Software without restriction, including
9
   without limitation the rights to use, copy, modify, merge, publish,
10
   distribute, sublicense, and/or sell copies of the Software, and to
11
   permit persons to whom the Software is furnished to do so, subject to
12
   the following conditions:
13
14
   The above copyright notice and this permission notice shall be
15
   included in all copies or substantial portions of the Software.
16
17
   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
21
   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
22
   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23
   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24
   SOFTWARE.
25
*/
26
27
#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
28
#include <config.h>
29
30
#include <stdarg.h>
31
#include <stdio.h>
32
#include <ctype.h>
33
#include <string.h>
34
#include <stdint.h>
35
#include <math.h>
36
#include "htslib/kstring.h"
37
38
829k
int kputd(double d, kstring_t *s) {
39
829k
  int len = 0;
40
829k
  char buf[21], *cp = buf+20, *ep;
41
829k
  if (d == 0) {
42
388k
    if (signbit(d)) {
43
14.6k
      kputsn("-0",2,s);
44
14.6k
      return 2;
45
374k
    } else {
46
374k
      kputsn("0",1,s);
47
374k
      return 1;
48
374k
    }
49
388k
  }
50
51
440k
  if (d < 0) {
52
105k
    kputc('-',s);
53
105k
    len = 1;
54
105k
    d=-d;
55
105k
  }
56
440k
  if (!(d >= 0.0001 && d <= 999999)) {
57
226k
    if (ks_resize(s, s->l + 50) < 0)
58
0
      return EOF;
59
    // We let stdio handle the exponent cases
60
226k
    int s2 = sprintf(s->s + s->l, "%g", d);
61
226k
    len += s2;
62
226k
    s->l += s2;
63
226k
    return len;
64
226k
  }
65
66
213k
  uint64_t i = d*10000000000LL;
67
  // Correction for rounding - rather ugly
68
69
  // Optimised for small numbers.
70
  // Better still would be __builtin_clz on hi/lo 32 and get the
71
  // starting point very rapidly.
72
213k
  if (d<.0001)
73
0
    i+=0;
74
213k
  else if (d<0.001)
75
131
    i+=5;
76
213k
  else if (d < 0.01)
77
154
    i+=50;
78
213k
  else if (d < 0.1)
79
350
    i+=500;
80
212k
  else if (d < 1)
81
99
    i+=5000;
82
212k
  else if (d < 10)
83
40.2k
    i+=50000;
84
172k
  else if (d < 100)
85
21.6k
    i+=500000;
86
150k
  else if (d < 1000)
87
72.4k
    i+=5000000;
88
78.3k
  else if (d < 10000)
89
730
    i+=50000000;
90
77.5k
  else if (d < 100000)
91
21.4k
    i+=500000000;
92
56.1k
  else
93
56.1k
    i+=5000000000LL;
94
95
2.88M
  do {
96
2.88M
    *--cp = '0' + i%10;
97
2.88M
    i /= 10;
98
2.88M
  } while (i >= 1);
99
213k
  buf[20] = 0;
100
213k
  int p = buf+20-cp;
101
213k
  if (p <= 10) { // d < 1
102
    //assert(d/1);
103
734
    cp[6] = 0; ep = cp+5;// 6 precision
104
1.64k
    while (p < 10) {
105
910
      *--cp = '0';
106
910
      p++;
107
910
    }
108
734
    *--cp = '.';
109
734
    *--cp = '0';
110
212k
  } else {
111
212k
    char *xp = --cp;
112
960k
    while (p > 10) {
113
747k
      xp[0] = xp[1];
114
747k
      p--;
115
747k
      xp++;
116
747k
    }
117
212k
    xp[0] = '.';
118
212k
    cp[7] = 0; ep=cp+6;
119
212k
    if (cp[6] == '.') cp[6] = 0;
120
212k
  }
121
122
  // Cull trailing zeros
123
745k
  while (*ep == '0' && ep > cp)
124
531k
    ep--;
125
213k
  char *z = ep+1;
126
551k
  while (ep > cp) {
127
495k
    if (*ep == '.') {
128
157k
      if (z[-1] == '.')
129
156k
        z[-1] = 0;
130
778
      else
131
778
        z[0] = 0;
132
157k
      break;
133
157k
    }
134
338k
    ep--;
135
338k
  }
136
137
213k
  int sl = strlen(cp);
138
213k
  len += sl;
139
213k
  kputsn(cp, sl, s);
140
213k
  return len;
141
440k
}
142
143
int kvsprintf(kstring_t *s, const char *fmt, va_list ap)
144
47.2k
{
145
47.2k
  va_list args;
146
47.2k
  int l;
147
47.2k
  va_copy(args, ap);
148
149
47.2k
  if (fmt[0] == '%' && fmt[1] == 'g' && fmt[2] == 0) {
150
0
    double d = va_arg(args, double);
151
0
    l = kputd(d, s);
152
0
    va_end(args);
153
0
    return l;
154
0
  }
155
156
47.2k
  if (!s->s) {
157
11.5k
    const size_t sz = 64;
158
11.5k
    s->s = malloc(sz);
159
11.5k
    if (!s->s)
160
0
      return -1;
161
11.5k
    s->m = sz;
162
11.5k
    s->l = 0;
163
11.5k
  }
164
165
47.2k
  l = vsnprintf(s->s + s->l, s->m - s->l, fmt, args); // This line does not work with glibc 2.0. See `man snprintf'.
166
47.2k
  va_end(args);
167
47.2k
  if (l + 1 > s->m - s->l) {
168
4.77k
    if (ks_resize(s, s->l + l + 2) < 0)
169
0
      return -1;
170
4.77k
    va_copy(args, ap);
171
4.77k
    l = vsnprintf(s->s + s->l, s->m - s->l, fmt, args);
172
4.77k
    va_end(args);
173
4.77k
  }
174
47.2k
  s->l += l;
175
47.2k
  return l;
176
47.2k
}
177
178
int ksprintf(kstring_t *s, const char *fmt, ...)
179
47.2k
{
180
47.2k
  va_list ap;
181
47.2k
  int l;
182
47.2k
  va_start(ap, fmt);
183
47.2k
  l = kvsprintf(s, fmt, ap);
184
47.2k
  va_end(ap);
185
47.2k
  return l;
186
47.2k
}
187
188
char *kstrtok(const char *str, const char *sep_in, ks_tokaux_t *aux)
189
2.42M
{
190
2.42M
  const unsigned char *p, *start, *sep = (unsigned char *) sep_in;
191
2.42M
  if (sep) { // set up the table
192
47.3k
    if (str == 0 && aux->finished) return 0; // no need to set up if we have finished
193
47.3k
    aux->finished = 0;
194
47.3k
    if (sep[0] && sep[1]) {
195
0
      aux->sep = -1;
196
0
      aux->tab[0] = aux->tab[1] = aux->tab[2] = aux->tab[3] = 0;
197
0
      for (p = sep; *p; ++p) aux->tab[*p>>6] |= 1ull<<(*p&0x3f);
198
47.3k
    } else aux->sep = sep[0];
199
47.3k
  }
200
2.42M
  if (aux->finished) return 0;
201
2.38M
  else if (str) start = (unsigned char *) str, aux->finished = 0;
202
2.33M
  else start = (unsigned char *) aux->p + 1;
203
2.38M
  if (aux->sep < 0) {
204
0
    for (p = start; *p; ++p)
205
0
      if (aux->tab[*p>>6]>>(*p&0x3f)&1) break;
206
2.38M
  } else {
207
1.03G
    for (p = start; *p; ++p)
208
1.03G
      if (*p == aux->sep) break;
209
2.38M
  }
210
2.38M
  aux->p = (const char *) p; // end of token
211
2.38M
  if (*p == 0) aux->finished = 1; // no more tokens
212
2.38M
  return (char*)start;
213
2.42M
}
214
215
// s MUST BE a null terminated string; l = strlen(s)
216
int ksplit_core(char *s, int delimiter, int *_max, int **_offsets)
217
0
{
218
0
  int i, n, max, last_char, last_start, *offsets, l;
219
0
  n = 0; max = *_max; offsets = *_offsets;
220
0
  l = strlen(s);
221
222
0
#define __ksplit_aux do {           \
223
0
    if (_offsets) {           \
224
0
      s[i] = 0;         \
225
0
      if (n == max) {         \
226
0
        int *tmp;       \
227
0
        max = max? max<<1 : 2;     \
228
0
        if ((tmp = (int*)realloc(offsets, sizeof(int) * max))) {  \
229
0
          offsets = tmp;      \
230
0
        } else {       \
231
0
          free(offsets);      \
232
0
          *_offsets = NULL;   \
233
0
          return 0;     \
234
0
        }          \
235
0
      }           \
236
0
      offsets[n++] = last_start;      \
237
0
    } else ++n;           \
238
0
  } while (0)
239
240
0
  for (i = 0, last_char = last_start = 0; i <= l; ++i) {
241
0
    if (delimiter == 0) {
242
0
      if (isspace((int)((unsigned char) s[i])) || s[i] == 0) {
243
0
        if (isgraph(last_char))
244
0
                    __ksplit_aux; // the end of a field
245
0
      } else {
246
0
        if (isspace(last_char) || last_char == 0)
247
0
                    last_start = i;
248
0
      }
249
0
    } else {
250
0
      if (s[i] == delimiter || s[i] == 0) {
251
0
        if (last_char != 0 && last_char != delimiter) __ksplit_aux; // the end of a field
252
0
      } else {
253
0
        if (last_char == delimiter || last_char == 0) last_start = i;
254
0
      }
255
0
    }
256
0
    last_char = (int)((unsigned char)s[i]);
257
0
  }
258
0
  *_max = max; *_offsets = offsets;
259
0
  return n;
260
0
}
261
262
int kgetline(kstring_t *s, kgets_func *fgets_fn, void *fp)
263
0
{
264
0
  size_t l0 = s->l;
265
266
0
  while (s->l == l0 || s->s[s->l-1] != '\n') {
267
0
    if (s->m - s->l < 200) {
268
0
      if (ks_resize(s, s->m + 200) < 0)
269
0
        return EOF;
270
0
    }
271
0
    if (fgets_fn(s->s + s->l, s->m - s->l, fp) == NULL) break;
272
0
    s->l += strlen(s->s + s->l);
273
0
  }
274
275
0
  if (s->l == l0) return EOF;
276
277
0
  if (s->l > l0 && s->s[s->l-1] == '\n') {
278
0
    s->l--;
279
0
    if (s->l > l0 && s->s[s->l-1] == '\r') s->l--;
280
0
  }
281
0
  s->s[s->l] = '\0';
282
0
  return 0;
283
0
}
284
285
int kgetline2(kstring_t *s, kgets_func2 *fgets_fn, void *fp)
286
76.4k
{
287
76.4k
  size_t l0 = s->l;
288
289
156k
  while (s->l == l0 || s->s[s->l-1] != '\n') {
290
81.4k
    if (s->m - s->l < 200) {
291
      // We return EOF for both EOF and error and the caller
292
      // needs to check for errors in fp, and we haven't
293
      // even got there yet.
294
      //
295
      // The only way of propagating memory errors is to
296
      // deliberately call something that we know triggers
297
      // and error so fp is also set.  This works for
298
      // hgets, but not for gets where reading <= 0 bytes
299
      // isn't an error.
300
14.6k
      if (ks_resize(s, s->m + 200) < 0) {
301
0
        fgets_fn(s->s + s->l, 0, fp);
302
0
        return EOF;
303
0
      }
304
14.6k
    }
305
81.4k
    ssize_t len = fgets_fn(s->s + s->l, s->m - s->l, fp);
306
81.4k
    if (len <= 0) break;
307
79.7k
    s->l += len;
308
79.7k
  }
309
310
76.4k
  if (s->l == l0) return EOF;
311
312
75.7k
  if (s->l > l0 && s->s[s->l-1] == '\n') {
313
74.7k
    s->l--;
314
74.7k
    if (s->l > l0 && s->s[s->l-1] == '\r') s->l--;
315
74.7k
  }
316
75.7k
  s->s[s->l] = '\0';
317
75.7k
  return 0;
318
76.4k
}
319
320
/**********************
321
 * Boyer-Moore search *
322
 **********************/
323
324
typedef unsigned char ubyte_t;
325
326
// reference: http://www-igm.univ-mlv.fr/~lecroq/string/node14.html
327
static int *ksBM_prep(const ubyte_t *pat, int m)
328
0
{
329
0
  int i, *suff, *prep, *bmGs, *bmBc;
330
0
  prep = (int*)calloc(m + 256, sizeof(int));
331
0
    if (!prep) return NULL;
332
0
  bmGs = prep; bmBc = prep + m;
333
0
  { // preBmBc()
334
0
    for (i = 0; i < 256; ++i) bmBc[i] = m;
335
0
    for (i = 0; i < m - 1; ++i) bmBc[pat[i]] = m - i - 1;
336
0
  }
337
0
  suff = (int*)calloc(m, sizeof(int));
338
0
    if (!suff) { free(prep); return NULL; }
339
0
  { // suffixes()
340
0
    int f = 0, g;
341
0
    suff[m - 1] = m;
342
0
    g = m - 1;
343
0
    for (i = m - 2; i >= 0; --i) {
344
0
      if (i > g && suff[i + m - 1 - f] < i - g)
345
0
        suff[i] = suff[i + m - 1 - f];
346
0
      else {
347
0
        if (i < g) g = i;
348
0
        f = i;
349
0
        while (g >= 0 && pat[g] == pat[g + m - 1 - f]) --g;
350
0
        suff[i] = f - g;
351
0
      }
352
0
    }
353
0
  }
354
0
  { // preBmGs()
355
0
    int j = 0;
356
0
    for (i = 0; i < m; ++i) bmGs[i] = m;
357
0
    for (i = m - 1; i >= 0; --i)
358
0
      if (suff[i] == i + 1)
359
0
        for (; j < m - 1 - i; ++j)
360
0
          if (bmGs[j] == m)
361
0
            bmGs[j] = m - 1 - i;
362
0
    for (i = 0; i <= m - 2; ++i)
363
0
      bmGs[m - 1 - suff[i]] = m - 1 - i;
364
0
  }
365
0
  free(suff);
366
0
  return prep;
367
0
}
368
369
void *kmemmem(const void *_str, int n, const void *_pat, int m, int **_prep)
370
0
{
371
0
  int i, j, *prep = 0, *bmGs, *bmBc;
372
0
  const ubyte_t *str, *pat;
373
0
  str = (const ubyte_t*)_str; pat = (const ubyte_t*)_pat;
374
0
  prep = (_prep == 0 || *_prep == 0)? ksBM_prep(pat, m) : *_prep;
375
0
    if (!prep) return NULL;
376
0
  if (_prep && *_prep == 0) *_prep = prep;
377
0
  bmGs = prep; bmBc = prep + m;
378
0
  j = 0;
379
0
  while (j <= n - m) {
380
0
    for (i = m - 1; i >= 0 && pat[i] == str[i+j]; --i);
381
0
    if (i >= 0) {
382
0
      int max = bmBc[str[i+j]] - m + 1 + i;
383
0
      if (max < bmGs[i]) max = bmGs[i];
384
0
      j += max;
385
0
    } else return (void*)(str + j);
386
0
  }
387
0
  if (_prep == 0) free(prep);
388
0
  return 0;
389
0
}
390
391
char *kstrstr(const char *str, const char *pat, int **_prep)
392
0
{
393
0
  return (char*)kmemmem(str, strlen(str), pat, strlen(pat), _prep);
394
0
}
395
396
char *kstrnstr(const char *str, const char *pat, int n, int **_prep)
397
0
{
398
0
  return (char*)kmemmem(str, n, pat, strlen(pat), _prep);
399
0
}
400
401
/***********************
402
 * The main() function *
403
 ***********************/
404
405
#ifdef KSTRING_MAIN
406
#include <stdio.h>
407
int main()
408
{
409
  kstring_t *s;
410
  int *fields, n, i;
411
  ks_tokaux_t aux;
412
  char *p;
413
  s = (kstring_t*)calloc(1, sizeof(kstring_t));
414
  // test ksprintf()
415
  ksprintf(s, " abcdefg:    %d ", 100);
416
  printf("'%s'\n", s->s);
417
  // test ksplit()
418
  fields = ksplit(s, 0, &n);
419
  for (i = 0; i < n; ++i)
420
    printf("field[%d] = '%s'\n", i, s->s + fields[i]);
421
  // test kstrtok()
422
  s->l = 0;
423
  for (p = kstrtok("ab:cde:fg/hij::k", ":/", &aux); p; p = kstrtok(0, 0, &aux)) {
424
    kputsn(p, aux.p - p, s);
425
    kputc('\n', s);
426
  }
427
  printf("%s", s->s);
428
  // free
429
  free(s->s); free(s); free(fields);
430
431
  {
432
    static char *str = "abcdefgcdgcagtcakcdcd";
433
    static char *pat = "cd";
434
    char *ret, *s = str;
435
    int *prep = 0;
436
    while ((ret = kstrstr(s, pat, &prep)) != 0) {
437
      printf("match: %s\n", ret);
438
      s = ret + prep[0];
439
    }
440
    free(prep);
441
  }
442
  return 0;
443
}
444
#endif