Line | Count | Source |
1 | | /* The MIT License |
2 | | |
3 | | Copyright (C) 2011 by Attractive Chaos <attractor@live.co.uk> |
4 | | Copyright (C) 2013-2018, 2020-2021, 2023, 2025 Genome Research Ltd. |
5 | | |
6 | | Permission is hereby granted, free of charge, to any person obtaining |
7 | | a copy of this software and associated documentation files (the |
8 | | "Software"), to deal in the Software without restriction, including |
9 | | without limitation the rights to use, copy, modify, merge, publish, |
10 | | distribute, sublicense, and/or sell copies of the Software, and to |
11 | | permit persons to whom the Software is furnished to do so, subject to |
12 | | the following conditions: |
13 | | |
14 | | The above copyright notice and this permission notice shall be |
15 | | included in all copies or substantial portions of the Software. |
16 | | |
17 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
18 | | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
19 | | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
20 | | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
21 | | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
22 | | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
23 | | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
24 | | SOFTWARE. |
25 | | */ |
26 | | |
27 | | #define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h |
28 | | #include <config.h> |
29 | | |
30 | | #include <stdarg.h> |
31 | | #include <stdio.h> |
32 | | #include <ctype.h> |
33 | | #include <string.h> |
34 | | #include <stdint.h> |
35 | | #include <math.h> |
36 | | #include "htslib/kstring.h" |
37 | | |
38 | 722k | int kputd(double d, kstring_t *s) { |
39 | 722k | int len = 0; |
40 | 722k | char buf[21], *cp = buf+20, *ep; |
41 | 722k | if (d == 0) { |
42 | 96.5k | if (signbit(d)) { |
43 | 3.83k | kputsn("-0",2,s); |
44 | 3.83k | return 2; |
45 | 92.6k | } else { |
46 | 92.6k | kputsn("0",1,s); |
47 | 92.6k | return 1; |
48 | 92.6k | } |
49 | 96.5k | } |
50 | | |
51 | 625k | if (d < 0) { |
52 | 173k | kputc('-',s); |
53 | 173k | len = 1; |
54 | 173k | d=-d; |
55 | 173k | } |
56 | 625k | if (!(d >= 0.0001 && d <= 999999)) { |
57 | 168k | if (ks_resize(s, s->l + 50) < 0) |
58 | 0 | return EOF; |
59 | | // We let stdio handle the exponent cases |
60 | 168k | int s2 = snprintf(s->s + s->l, s->m - s->l, "%g", d); |
61 | 168k | len += s2; |
62 | 168k | s->l += s2; |
63 | 168k | return len; |
64 | 168k | } |
65 | | |
66 | | // Correction for rounding - rather ugly |
67 | | // Optimised for small numbers. |
68 | | |
69 | 457k | uint32_t i; |
70 | 457k | if (d<0.001) i = rint(d*1000000000), cp -= 1; |
71 | 457k | else if (d < 0.01) i = rint(d*100000000), cp -= 2; |
72 | 450k | else if (d < 0.1) i = rint(d*10000000), cp -= 3; |
73 | 439k | else if (d < 1) i = rint(d*1000000), cp -= 4; |
74 | 428k | else if (d < 10) i = rint(d*100000), cp -= 5; |
75 | 320k | else if (d < 100) i = rint(d*10000), cp -= 6; |
76 | 266k | else if (d < 1000) i = rint(d*1000), cp -= 7; |
77 | 99.2k | else if (d < 10000) i = rint(d*100), cp -= 8; |
78 | 86.0k | else if (d < 100000) i = rint(d*10), cp -= 9; |
79 | 61.5k | else i = rint(d), cp -= 10; |
80 | | |
81 | | // integer i is always 6 digits, so print it 2 at a time. |
82 | 457k | static const char kputuw_dig2r[] = |
83 | 457k | "00010203040506070809" |
84 | 457k | "10111213141516171819" |
85 | 457k | "20212223242526272829" |
86 | 457k | "30313233343536373839" |
87 | 457k | "40414243444546474849" |
88 | 457k | "50515253545556575859" |
89 | 457k | "60616263646566676869" |
90 | 457k | "70717273747576777879" |
91 | 457k | "80818283848586878889" |
92 | 457k | "90919293949596979899"; |
93 | | |
94 | 457k | memcpy(cp-=2, &kputuw_dig2r[2*(i%100)], 2); i /= 100; |
95 | 457k | memcpy(cp-=2, &kputuw_dig2r[2*(i%100)], 2); i /= 100; |
96 | 457k | memcpy(cp-=2, &kputuw_dig2r[2*(i%100)], 2); |
97 | | |
98 | | // Except when it rounds up (d=0.009999999 is i=1000000) |
99 | 457k | if (i >= 100) |
100 | 6.67k | *--cp = '0' + (i/100); |
101 | | |
102 | | |
103 | 457k | int p = buf+20-cp; |
104 | 457k | if (p <= 10) { /* d < 1 */ |
105 | | // 0.00123 is 123, so add leading zeros and 0. |
106 | 28.8k | ep = cp+5; // 6 precision |
107 | 48.2k | while (p < 10) { // aka d < 1 |
108 | 19.4k | *--cp = '0'; |
109 | 19.4k | p++; |
110 | 19.4k | } |
111 | 28.8k | *--cp = '.'; |
112 | 28.8k | *--cp = '0'; |
113 | 428k | } else { |
114 | | // 123.001 is 123001 with p==13, so move 123 down and add "." |
115 | | // Equiv to memmove(cp-1, cp, p-10); cp--; |
116 | 428k | char *xp = --cp; |
117 | 428k | ep = cp+6; |
118 | 1.69M | while (p > 10) { |
119 | 1.26M | xp[0] = xp[1]; |
120 | 1.26M | xp++; |
121 | 1.26M | p--; |
122 | 1.26M | } |
123 | 428k | xp[0] = '.'; |
124 | 428k | } |
125 | | |
126 | | // Cull trailing zeros |
127 | 1.89M | while (*ep == '0' && ep > cp) |
128 | 1.44M | ep--; |
129 | | |
130 | | // End can be 1 out due to the mostly-6 but occasionally 7 (i==1) case. |
131 | | // Also code with "123." which should be "123" |
132 | 457k | if (*ep && *ep != '.') |
133 | 31.7k | ep++; |
134 | 457k | *ep = 0; |
135 | | |
136 | 457k | int sl = ep-cp; |
137 | 457k | len += sl; |
138 | 457k | kputsn(cp, sl, s); |
139 | 457k | return len; |
140 | 625k | } |
141 | | |
142 | | int kvsprintf(kstring_t *s, const char *fmt, va_list ap) |
143 | 159k | { |
144 | 159k | va_list args; |
145 | 159k | int l; |
146 | 159k | va_copy(args, ap); |
147 | | |
148 | 159k | if (fmt[0] == '%' && fmt[1] == 'g' && fmt[2] == 0) { |
149 | 0 | double d = va_arg(args, double); |
150 | 0 | l = kputd(d, s); |
151 | 0 | va_end(args); |
152 | 0 | return l; |
153 | 0 | } |
154 | | |
155 | 159k | if (!s->s) { |
156 | 97.4k | const size_t sz = 64; |
157 | 97.4k | s->s = malloc(sz); |
158 | 97.4k | if (!s->s) |
159 | 0 | return -1; |
160 | 97.4k | s->m = sz; |
161 | 97.4k | s->l = 0; |
162 | 97.4k | } |
163 | | |
164 | 159k | l = vsnprintf(s->s + s->l, s->m - s->l, fmt, args); // This line does not work with glibc 2.0. See `man snprintf'. |
165 | 159k | va_end(args); |
166 | 159k | if (l + 1 > s->m - s->l) { |
167 | 56.7k | if (ks_resize(s, s->l + l + 2) < 0) |
168 | 0 | return -1; |
169 | 56.7k | va_copy(args, ap); |
170 | 56.7k | l = vsnprintf(s->s + s->l, s->m - s->l, fmt, args); |
171 | 56.7k | va_end(args); |
172 | 56.7k | } |
173 | 159k | s->l += l; |
174 | 159k | return l; |
175 | 159k | } |
176 | | |
177 | | int ksprintf(kstring_t *s, const char *fmt, ...) |
178 | 159k | { |
179 | 159k | va_list ap; |
180 | 159k | int l; |
181 | 159k | va_start(ap, fmt); |
182 | 159k | l = kvsprintf(s, fmt, ap); |
183 | 159k | va_end(ap); |
184 | 159k | return l; |
185 | 159k | } |
186 | | |
187 | | char *kstrtok(const char *str, const char *sep_in, ks_tokaux_t *aux) |
188 | 2.15M | { |
189 | 2.15M | const unsigned char *p, *start, *sep = (unsigned char *) sep_in; |
190 | 2.15M | if (sep) { // set up the table |
191 | 67.5k | if (str == 0 && aux->finished) return 0; // no need to set up if we have finished |
192 | 67.5k | aux->finished = 0; |
193 | 67.5k | if (sep[0] && sep[1]) { |
194 | 0 | aux->sep = -1; |
195 | 0 | aux->tab[0] = aux->tab[1] = aux->tab[2] = aux->tab[3] = 0; |
196 | 0 | for (p = sep; *p; ++p) aux->tab[*p>>6] |= 1ull<<(*p&0x3f); |
197 | 67.5k | } else aux->sep = sep[0]; |
198 | 67.5k | } |
199 | 2.15M | if (aux->finished) return 0; |
200 | 2.11M | else if (str) start = (unsigned char *) str, aux->finished = 0; |
201 | 2.04M | else start = (unsigned char *) aux->p + 1; |
202 | 2.11M | if (aux->sep < 0) { |
203 | 0 | for (p = start; *p; ++p) |
204 | 0 | if (aux->tab[*p>>6]>>(*p&0x3f)&1) break; |
205 | 2.11M | } else { |
206 | | // Using strchr is fast for next token, but slower for |
207 | | // last token due to extra pass from strlen. Overall |
208 | | // on a VCF parse this func was 146% faster with // strchr. |
209 | | // Equiv to: |
210 | | // for (p = start; *p; ++p) if (*p == aux->sep) break; |
211 | | |
212 | | // NB: We could use strchrnul() here from glibc if detected, |
213 | | // which is ~40% faster again, but it's not so portable. |
214 | | // i.e. p = (uint8_t *)strchrnul((char *)start, aux->sep); |
215 | 2.11M | uint8_t *p2 = (uint8_t *)strchr((char *)start, aux->sep); |
216 | 2.11M | p = p2 ? p2 : start + strlen((char *)start); |
217 | 2.11M | } |
218 | 2.11M | aux->p = (const char *) p; // end of token |
219 | 2.11M | if (*p == 0) aux->finished = 1; // no more tokens |
220 | 2.11M | return (char*)start; |
221 | 2.15M | } |
222 | | |
223 | | // s MUST BE a null terminated string; l = strlen(s) |
224 | | int ksplit_core(char *s, int delimiter, int *_max, int **_offsets) |
225 | 0 | { |
226 | 0 | int i, n, max, last_char, last_start, *offsets, l; |
227 | 0 | n = 0; max = *_max; offsets = *_offsets; |
228 | 0 | l = strlen(s); |
229 | |
|
230 | 0 | #define __ksplit_aux do { \ |
231 | 0 | if (_offsets) { \ |
232 | 0 | s[i] = 0; \ |
233 | 0 | if (n == max) { \ |
234 | 0 | int *tmp; \ |
235 | 0 | max = max? max<<1 : 2; \ |
236 | 0 | if ((tmp = (int*)realloc(offsets, sizeof(int) * max))) { \ |
237 | 0 | offsets = tmp; \ |
238 | 0 | } else { \ |
239 | 0 | free(offsets); \ |
240 | 0 | *_offsets = NULL; \ |
241 | 0 | return 0; \ |
242 | 0 | } \ |
243 | 0 | } \ |
244 | 0 | offsets[n++] = last_start; \ |
245 | 0 | } else ++n; \ |
246 | 0 | } while (0) |
247 | |
|
248 | 0 | for (i = 0, last_char = last_start = 0; i <= l; ++i) { |
249 | 0 | if (delimiter == 0) { |
250 | 0 | if (isspace((int)((unsigned char) s[i])) || s[i] == 0) { |
251 | 0 | if (isgraph(last_char)) |
252 | 0 | __ksplit_aux; // the end of a field |
253 | 0 | } else { |
254 | 0 | if (isspace(last_char) || last_char == 0) |
255 | 0 | last_start = i; |
256 | 0 | } |
257 | 0 | } else { |
258 | 0 | if (s[i] == delimiter || s[i] == 0) { |
259 | 0 | if (last_char != 0 && last_char != delimiter) __ksplit_aux; // the end of a field |
260 | 0 | } else { |
261 | 0 | if (last_char == delimiter || last_char == 0) last_start = i; |
262 | 0 | } |
263 | 0 | } |
264 | 0 | last_char = (int)((unsigned char)s[i]); |
265 | 0 | } |
266 | 0 | *_max = max; *_offsets = offsets; |
267 | 0 | return n; |
268 | 0 | } |
269 | | |
270 | | int kgetline(kstring_t *s, kgets_func *fgets_fn, void *fp) |
271 | 0 | { |
272 | 0 | size_t l0 = s->l; |
273 | |
|
274 | 0 | while (s->l == l0 || s->s[s->l-1] != '\n') { |
275 | 0 | if (s->m - s->l < 200) { |
276 | 0 | if (ks_resize(s, s->m + 200) < 0) |
277 | 0 | return EOF; |
278 | 0 | } |
279 | 0 | if (fgets_fn(s->s + s->l, s->m - s->l, fp) == NULL) break; |
280 | 0 | s->l += strlen(s->s + s->l); |
281 | 0 | } |
282 | | |
283 | 0 | if (s->l == l0) return EOF; |
284 | | |
285 | 0 | if (s->l > l0 && s->s[s->l-1] == '\n') { |
286 | 0 | s->l--; |
287 | 0 | if (s->l > l0 && s->s[s->l-1] == '\r') s->l--; |
288 | 0 | } |
289 | 0 | s->s[s->l] = '\0'; |
290 | 0 | return 0; |
291 | 0 | } |
292 | | |
293 | | int kgetline2(kstring_t *s, kgets_func2 *fgets_fn, void *fp) |
294 | 1.35M | { |
295 | 1.35M | size_t l0 = s->l; |
296 | | |
297 | 2.73M | while (s->l == l0 || s->s[s->l-1] != '\n') { |
298 | 1.38M | if (s->m - s->l < 200) { |
299 | | // We return EOF for both EOF and error and the caller |
300 | | // needs to check for errors in fp, and we haven't |
301 | | // even got there yet. |
302 | | // |
303 | | // The only way of propagating memory errors is to |
304 | | // deliberately call something that we know triggers |
305 | | // and error so fp is also set. This works for |
306 | | // hgets, but not for gets where reading <= 0 bytes |
307 | | // isn't an error. |
308 | 624k | if (ks_resize(s, s->m + 200) < 0) { |
309 | 0 | fgets_fn(s->s + s->l, 0, fp); |
310 | 0 | return EOF; |
311 | 0 | } |
312 | 624k | } |
313 | 1.38M | ssize_t len = fgets_fn(s->s + s->l, s->m - s->l, fp); |
314 | 1.38M | if (len <= 0) break; |
315 | 1.37M | s->l += len; |
316 | 1.37M | } |
317 | | |
318 | 1.35M | if (s->l == l0) return EOF; |
319 | | |
320 | 1.35M | if (s->l > l0 && s->s[s->l-1] == '\n') { |
321 | 1.34M | s->l--; |
322 | 1.34M | if (s->l > l0 && s->s[s->l-1] == '\r') s->l--; |
323 | 1.34M | } |
324 | 1.35M | s->s[s->l] = '\0'; |
325 | 1.35M | return 0; |
326 | 1.35M | } |
327 | | |
328 | | typedef unsigned char ubyte_t; |
329 | | |
330 | | /********************** |
331 | | * Karp-Rabin search * |
332 | | **********************/ |
333 | | |
334 | | // Backup solution for when we can't use Boyer-Moore, e.g. due to memory |
335 | | // required. Karp-Rabin only needs to store a couple of hash values |
336 | | // so will always complete. |
337 | | |
338 | | static uint64_t fast_exp(uint64_t x, uint64_t n) |
339 | 0 | { |
340 | 0 | uint64_t y = 1; |
341 | 0 | if (n == 0) |
342 | 0 | return 1; |
343 | 0 | while (n > 1) { |
344 | 0 | if (n & 1) |
345 | 0 | y *= x; |
346 | 0 | x *= x; |
347 | 0 | n >>= 1; |
348 | 0 | } |
349 | 0 | return y * x; |
350 | 0 | } |
351 | | |
352 | | static void * karp_rabin(const void *str_, size_t n, const void *pat_, size_t m) |
353 | 0 | { |
354 | 0 | const ubyte_t *str = (const ubyte_t *) str_; |
355 | 0 | const ubyte_t *pat = (const ubyte_t *) pat_; |
356 | 0 | const uint64_t b = 31; |
357 | 0 | uint64_t hash_pat = 0; |
358 | 0 | uint64_t hash_str = 0; |
359 | 0 | uint64_t b_to_m = fast_exp(b, m); |
360 | 0 | size_t i; |
361 | 0 | ubyte_t mismatch = 0; |
362 | |
|
363 | 0 | if (m > n) |
364 | 0 | return NULL; |
365 | | |
366 | | // Calculate hash of pat and initial part of str. |
367 | 0 | for (i = 0; i < m; i++) { |
368 | 0 | mismatch |= str[i] ^ pat[i]; |
369 | 0 | hash_pat = hash_pat * b + pat[i] + 1U; |
370 | 0 | hash_str = hash_str * b + str[i] + 1U; |
371 | 0 | } |
372 | |
|
373 | 0 | if (!mismatch) // Match found at start (or m == 0) |
374 | 0 | return (void *) str_; |
375 | | |
376 | 0 | for (; i < n; i++) { |
377 | 0 | hash_str = hash_str * b + str[i] + 1U - b_to_m * (str[i - m] + 1U); |
378 | 0 | if (hash_str == hash_pat) { |
379 | 0 | if (memcmp(pat, str + i + 1 - m, m) == 0) |
380 | 0 | return (void *) (str + i + 1 - m); |
381 | 0 | } |
382 | 0 | } |
383 | 0 | return NULL; |
384 | 0 | } |
385 | | |
386 | | /********************** |
387 | | * Boyer-Moore search * |
388 | | **********************/ |
389 | | |
390 | | |
391 | | // reference: http://www-igm.univ-mlv.fr/~lecroq/string/node14.html |
392 | | static int *ksBM_prep(const ubyte_t *pat, int m) |
393 | 394 | { |
394 | 394 | int i, *suff, *prep, *bmGs, *bmBc; |
395 | 394 | if (m < 1) |
396 | 0 | return NULL; |
397 | 394 | prep = (int*)calloc((size_t) m + 256, sizeof(int)); |
398 | 394 | if (!prep) return NULL; |
399 | 394 | bmGs = prep; bmBc = prep + m; |
400 | 394 | { // preBmBc() |
401 | 101k | for (i = 0; i < 256; ++i) bmBc[i] = m; |
402 | 7.87k | for (i = 0; i < m - 1; ++i) bmBc[pat[i]] = m - i - 1; |
403 | 394 | } |
404 | 394 | suff = (int*)calloc(m, sizeof(int)); |
405 | 394 | if (!suff) { free(prep); return NULL; } |
406 | 394 | { // suffixes() |
407 | 394 | int f = 0, g; |
408 | 394 | suff[m - 1] = m; |
409 | 394 | g = m - 1; |
410 | 7.87k | for (i = m - 2; i >= 0; --i) { |
411 | 7.48k | if (i > g && suff[i + m - 1 - f] < i - g) |
412 | 0 | suff[i] = suff[i + m - 1 - f]; |
413 | 7.48k | else { |
414 | 7.48k | if (i < g) g = i; |
415 | 7.48k | f = i; |
416 | 7.61k | while (g >= 0 && pat[g] == pat[g + m - 1 - f]) --g; |
417 | 7.48k | suff[i] = f - g; |
418 | 7.48k | } |
419 | 7.48k | } |
420 | 394 | } |
421 | 394 | { // preBmGs() |
422 | 394 | int j = 0; |
423 | 8.27k | for (i = 0; i < m; ++i) bmGs[i] = m; |
424 | 8.27k | for (i = m - 1; i >= 0; --i) |
425 | 7.87k | if (suff[i] == i + 1) |
426 | 394 | for (; j < m - 1 - i; ++j) |
427 | 0 | if (bmGs[j] == m) |
428 | 0 | bmGs[j] = m - 1 - i; |
429 | 7.87k | for (i = 0; i <= m - 2; ++i) |
430 | 7.48k | bmGs[m - 1 - suff[i]] = m - 1 - i; |
431 | 394 | } |
432 | 394 | free(suff); |
433 | 394 | return prep; |
434 | 394 | } |
435 | | |
436 | | static void *boyer_moore(const void *str_, size_t n, const void *pat_, int m, |
437 | | int **stored_prep_ptr) |
438 | 466 | { |
439 | 466 | size_t j; |
440 | 466 | int i, *prep = 0, *bmGs, *bmBc; |
441 | 466 | const ubyte_t *str, *pat; |
442 | | |
443 | 466 | if (!str_ || !pat_) |
444 | 0 | return (void *) str_; |
445 | | |
446 | 466 | str = (const ubyte_t*) str_; |
447 | 466 | pat = (const ubyte_t*) pat_; |
448 | | |
449 | 466 | if (m <= 0) // Empty string always matches at the start |
450 | 0 | return (void *) str_; |
451 | 466 | if (n < m) // Input shorter than pattern can't match |
452 | 72 | return NULL; |
453 | 394 | if (m == 1) // Switch to memchr for 1 byte patterns (likely faster) |
454 | 0 | return memchr(str, pat[0], n); |
455 | | |
456 | 394 | if (stored_prep_ptr && *stored_prep_ptr) { |
457 | 0 | prep = *stored_prep_ptr; |
458 | 394 | } else { |
459 | 394 | prep = ksBM_prep(pat, m); |
460 | 394 | if (!prep) return karp_rabin(str_, n, pat_, m); |
461 | 394 | if (stored_prep_ptr) |
462 | 0 | *stored_prep_ptr = prep; |
463 | 394 | } |
464 | 394 | bmGs = prep; // Good-suffix shift array |
465 | 394 | bmBc = prep + m; // Bad character shift array |
466 | 394 | j = 0; |
467 | 15.8M | while (j <= n - m) { |
468 | 15.8M | for (i = m - 1; i >= 0 && pat[i] == str[i+j]; --i); |
469 | 15.8M | if (i >= 0) { |
470 | 15.8M | int max = bmBc[str[i+j]] - m + 1 + i; |
471 | 15.8M | if (max < bmGs[i]) max = bmGs[i]; |
472 | 15.8M | j += max; |
473 | 15.8M | } else { // Match found |
474 | 20 | if (!stored_prep_ptr) free(prep); |
475 | 20 | return (void*)(str + j); |
476 | 20 | } |
477 | 15.8M | } |
478 | | // No match |
479 | 374 | if (!stored_prep_ptr) free(prep); |
480 | 374 | return NULL; |
481 | 394 | } |
482 | | |
483 | | void *kmemmem(const void *str, int n, const void *pat, int m, int **prep) |
484 | 466 | { |
485 | 466 | return boyer_moore(str, n >= 0 ? n : 0, pat, m, prep); |
486 | 466 | } |
487 | | |
488 | | char *kstrstr(const char *str, const char *pat, int **prep) |
489 | 0 | { |
490 | 0 | size_t patlen = strlen(pat); |
491 | 0 | if (patlen <= INT_MAX) |
492 | 0 | return (char*)boyer_moore(str, strlen(str), pat, patlen, prep); |
493 | 0 | else |
494 | 0 | return karp_rabin(str, strlen(str), pat, patlen); |
495 | 0 | } |
496 | | |
497 | | char *kstrnstr(const char *str, const char *pat, int n, int **prep) |
498 | 0 | { |
499 | 0 | if (!pat || *pat == '\0') |
500 | 0 | return (char *) str; |
501 | 0 | if (n <= 0) |
502 | 0 | return NULL; |
503 | 0 | const char *endp = memchr(str, '\0', n); |
504 | 0 | if (endp != NULL && endp - str < n) |
505 | 0 | n = endp - str; |
506 | 0 | size_t patlen = strlen(pat); |
507 | 0 | if (patlen > n) |
508 | 0 | return NULL; |
509 | 0 | return (char*)boyer_moore(str, n, pat, patlen, prep); |
510 | 0 | } |
511 | | |
512 | | /*********************** |
513 | | * The main() function * |
514 | | ***********************/ |
515 | | |
516 | | #ifdef KSTRING_MAIN |
517 | | #include <stdio.h> |
518 | | int main() |
519 | | { |
520 | | kstring_t *s; |
521 | | int *fields, n, i; |
522 | | ks_tokaux_t aux; |
523 | | char *p; |
524 | | s = (kstring_t*)calloc(1, sizeof(kstring_t)); |
525 | | // test ksprintf() |
526 | | ksprintf(s, " abcdefg: %d ", 100); |
527 | | printf("'%s'\n", s->s); |
528 | | // test ksplit() |
529 | | fields = ksplit(s, 0, &n); |
530 | | for (i = 0; i < n; ++i) |
531 | | printf("field[%d] = '%s'\n", i, s->s + fields[i]); |
532 | | // test kstrtok() |
533 | | s->l = 0; |
534 | | for (p = kstrtok("ab:cde:fg/hij::k", ":/", &aux); p; p = kstrtok(0, 0, &aux)) { |
535 | | kputsn(p, aux.p - p, s); |
536 | | kputc('\n', s); |
537 | | } |
538 | | printf("%s", s->s); |
539 | | // free |
540 | | free(s->s); free(s); free(fields); |
541 | | |
542 | | { |
543 | | static char *str = "abcdefgcdgcagtcakcdcd"; |
544 | | static char *pat = "cd"; |
545 | | char *ret, *s = str; |
546 | | int *prep = 0; |
547 | | while ((ret = kstrstr(s, pat, &prep)) != 0) { |
548 | | printf("match: %s\n", ret); |
549 | | s = ret + prep[0]; |
550 | | } |
551 | | free(prep); |
552 | | } |
553 | | return 0; |
554 | | } |
555 | | #endif |