Line | Count | Source |
1 | | /* The MIT License |
2 | | |
3 | | Copyright (C) 2011 by Attractive Chaos <attractor@live.co.uk> |
4 | | Copyright (C) 2013-2018, 2020-2021, 2023, 2025 Genome Research Ltd. |
5 | | |
6 | | Permission is hereby granted, free of charge, to any person obtaining |
7 | | a copy of this software and associated documentation files (the |
8 | | "Software"), to deal in the Software without restriction, including |
9 | | without limitation the rights to use, copy, modify, merge, publish, |
10 | | distribute, sublicense, and/or sell copies of the Software, and to |
11 | | permit persons to whom the Software is furnished to do so, subject to |
12 | | the following conditions: |
13 | | |
14 | | The above copyright notice and this permission notice shall be |
15 | | included in all copies or substantial portions of the Software. |
16 | | |
17 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
18 | | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
19 | | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
20 | | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
21 | | BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
22 | | ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
23 | | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
24 | | SOFTWARE. |
25 | | */ |
26 | | |
27 | | #define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h |
28 | | #include <config.h> |
29 | | |
30 | | #include <stdarg.h> |
31 | | #include <stdio.h> |
32 | | #include <ctype.h> |
33 | | #include <string.h> |
34 | | #include <stdint.h> |
35 | | #include <math.h> |
36 | | #include "htslib/kstring.h" |
37 | | #include "htslib/hts_alloc.h" |
38 | | |
39 | 577k | int kputd(double d, kstring_t *s) { |
40 | 577k | int len = 0; |
41 | 577k | char buf[21], *cp = buf+20, *ep; |
42 | 577k | if (d == 0) { |
43 | 40.6k | if (signbit(d)) { |
44 | 4.85k | kputsn("-0",2,s); |
45 | 4.85k | return 2; |
46 | 35.7k | } else { |
47 | 35.7k | kputsn("0",1,s); |
48 | 35.7k | return 1; |
49 | 35.7k | } |
50 | 40.6k | } |
51 | | |
52 | 537k | if (d < 0) { |
53 | 154k | kputc('-',s); |
54 | 154k | len = 1; |
55 | 154k | d=-d; |
56 | 154k | } |
57 | 537k | if (!(d >= 0.0001 && d <= 999999)) { |
58 | 140k | if (ks_resize(s, s->l + 50) < 0) |
59 | 0 | return EOF; |
60 | | // We let stdio handle the exponent cases |
61 | 140k | int s2 = snprintf(s->s + s->l, s->m - s->l, "%g", d); |
62 | 140k | len += s2; |
63 | 140k | s->l += s2; |
64 | 140k | return len; |
65 | 140k | } |
66 | | |
67 | | // Correction for rounding - rather ugly |
68 | | // Optimised for small numbers. |
69 | | |
70 | 396k | uint32_t i; |
71 | 396k | if (d<0.001) i = rint(d*1000000000), cp -= 1; |
72 | 396k | else if (d < 0.01) i = rint(d*100000000), cp -= 2; |
73 | 388k | else if (d < 0.1) i = rint(d*10000000), cp -= 3; |
74 | 373k | else if (d < 1) i = rint(d*1000000), cp -= 4; |
75 | 361k | else if (d < 10) i = rint(d*100000), cp -= 5; |
76 | 262k | else if (d < 100) i = rint(d*10000), cp -= 6; |
77 | 214k | else if (d < 1000) i = rint(d*1000), cp -= 7; |
78 | 79.0k | else if (d < 10000) i = rint(d*100), cp -= 8; |
79 | 69.4k | else if (d < 100000) i = rint(d*10), cp -= 9; |
80 | 51.3k | else i = rint(d), cp -= 10; |
81 | | |
82 | | // integer i is always 6 digits, so print it 2 at a time. |
83 | 396k | static const char kputuw_dig2r[] = |
84 | 396k | "00010203040506070809" |
85 | 396k | "10111213141516171819" |
86 | 396k | "20212223242526272829" |
87 | 396k | "30313233343536373839" |
88 | 396k | "40414243444546474849" |
89 | 396k | "50515253545556575859" |
90 | 396k | "60616263646566676869" |
91 | 396k | "70717273747576777879" |
92 | 396k | "80818283848586878889" |
93 | 396k | "90919293949596979899"; |
94 | | |
95 | 396k | memcpy(cp-=2, &kputuw_dig2r[2*(i%100)], 2); i /= 100; |
96 | 396k | memcpy(cp-=2, &kputuw_dig2r[2*(i%100)], 2); i /= 100; |
97 | 396k | memcpy(cp-=2, &kputuw_dig2r[2*(i%100)], 2); |
98 | | |
99 | | // Except when it rounds up (d=0.009999999 is i=1000000) |
100 | 396k | if (i >= 100) |
101 | 8.29k | *--cp = '0' + (i/100); |
102 | | |
103 | | |
104 | 396k | int p = buf+20-cp; |
105 | 396k | if (p <= 10) { /* d < 1 */ |
106 | | // 0.00123 is 123, so add leading zeros and 0. |
107 | 35.8k | ep = cp+5; // 6 precision |
108 | 59.7k | while (p < 10) { // aka d < 1 |
109 | 23.9k | *--cp = '0'; |
110 | 23.9k | p++; |
111 | 23.9k | } |
112 | 35.8k | *--cp = '.'; |
113 | 35.8k | *--cp = '0'; |
114 | 361k | } else { |
115 | | // 123.001 is 123001 with p==13, so move 123 down and add "." |
116 | | // Equiv to memmove(cp-1, cp, p-10); cp--; |
117 | 361k | char *xp = --cp; |
118 | 361k | ep = cp+6; |
119 | 1.39M | while (p > 10) { |
120 | 1.03M | xp[0] = xp[1]; |
121 | 1.03M | xp++; |
122 | 1.03M | p--; |
123 | 1.03M | } |
124 | 361k | xp[0] = '.'; |
125 | 361k | } |
126 | | |
127 | | // Cull trailing zeros |
128 | 1.68M | while (*ep == '0' && ep > cp) |
129 | 1.29M | ep--; |
130 | | |
131 | | // End can be 1 out due to the mostly-6 but occasionally 7 (i==1) case. |
132 | | // Also code with "123." which should be "123" |
133 | 396k | if (*ep && *ep != '.') |
134 | 39.3k | ep++; |
135 | 396k | *ep = 0; |
136 | | |
137 | 396k | int sl = ep-cp; |
138 | 396k | len += sl; |
139 | 396k | kputsn(cp, sl, s); |
140 | 396k | return len; |
141 | 537k | } |
142 | | |
143 | | int kvsprintf(kstring_t *s, const char *fmt, va_list ap) |
144 | 197k | { |
145 | 197k | va_list args; |
146 | 197k | int l; |
147 | 197k | va_copy(args, ap); |
148 | | |
149 | 197k | if (fmt[0] == '%' && fmt[1] == 'g' && fmt[2] == 0) { |
150 | 0 | double d = va_arg(args, double); |
151 | 0 | l = kputd(d, s); |
152 | 0 | va_end(args); |
153 | 0 | return l; |
154 | 0 | } |
155 | | |
156 | 197k | if (!s->s) { |
157 | 121k | const size_t sz = 64; |
158 | 121k | s->s = malloc(sz); |
159 | 121k | if (!s->s) |
160 | 0 | return -1; |
161 | 121k | s->m = sz; |
162 | 121k | s->l = 0; |
163 | 121k | } |
164 | | |
165 | 197k | l = vsnprintf(s->s + s->l, s->m - s->l, fmt, args); // This line does not work with glibc 2.0. See `man snprintf'. |
166 | 197k | va_end(args); |
167 | 197k | if (l + 1 > s->m - s->l) { |
168 | 63.8k | if (ks_resize(s, s->l + l + 2) < 0) |
169 | 0 | return -1; |
170 | 63.8k | va_copy(args, ap); |
171 | 63.8k | l = vsnprintf(s->s + s->l, s->m - s->l, fmt, args); |
172 | 63.8k | va_end(args); |
173 | 63.8k | } |
174 | 197k | s->l += l; |
175 | 197k | return l; |
176 | 197k | } |
177 | | |
178 | | int ksprintf(kstring_t *s, const char *fmt, ...) |
179 | 197k | { |
180 | 197k | va_list ap; |
181 | 197k | int l; |
182 | 197k | va_start(ap, fmt); |
183 | 197k | l = kvsprintf(s, fmt, ap); |
184 | 197k | va_end(ap); |
185 | 197k | return l; |
186 | 197k | } |
187 | | |
188 | | char *kstrtok(const char *str, const char *sep_in, ks_tokaux_t *aux) |
189 | 2.05M | { |
190 | 2.05M | const unsigned char *p, *start, *sep = (unsigned char *) sep_in; |
191 | 2.05M | if (sep) { // set up the table |
192 | 75.5k | if (str == 0 && aux->finished) return 0; // no need to set up if we have finished |
193 | 75.5k | aux->finished = 0; |
194 | 75.5k | if (sep[0] && sep[1]) { |
195 | 0 | aux->sep = -1; |
196 | 0 | aux->tab[0] = aux->tab[1] = aux->tab[2] = aux->tab[3] = 0; |
197 | 0 | for (p = sep; *p; ++p) aux->tab[*p>>6] |= 1ull<<(*p&0x3f); |
198 | 75.5k | } else aux->sep = sep[0]; |
199 | 75.5k | } |
200 | 2.05M | if (aux->finished) return 0; |
201 | 2.01M | else if (str) start = (unsigned char *) str, aux->finished = 0; |
202 | 1.93M | else start = (unsigned char *) aux->p + 1; |
203 | 2.01M | if (aux->sep < 0) { |
204 | 0 | for (p = start; *p; ++p) |
205 | 0 | if (aux->tab[*p>>6]>>(*p&0x3f)&1) break; |
206 | 2.01M | } else { |
207 | | // Using strchr is fast for next token, but slower for |
208 | | // last token due to extra pass from strlen. Overall |
209 | | // on a VCF parse this func was 146% faster with // strchr. |
210 | | // Equiv to: |
211 | | // for (p = start; *p; ++p) if (*p == aux->sep) break; |
212 | | |
213 | | // NB: We could use strchrnul() here from glibc if detected, |
214 | | // which is ~40% faster again, but it's not so portable. |
215 | | // i.e. p = (uint8_t *)strchrnul((char *)start, aux->sep); |
216 | 2.01M | uint8_t *p2 = (uint8_t *)strchr((char *)start, aux->sep); |
217 | 2.01M | p = p2 ? p2 : start + strlen((char *)start); |
218 | 2.01M | } |
219 | 2.01M | aux->p = (const char *) p; // end of token |
220 | 2.01M | if (*p == 0) aux->finished = 1; // no more tokens |
221 | 2.01M | return (char*)start; |
222 | 2.05M | } |
223 | | |
224 | | // s MUST BE a null terminated string; l = strlen(s) |
225 | | int ksplit_core(char *s, int delimiter, int *_max, int **_offsets) |
226 | 0 | { |
227 | 0 | int i, n, max, last_char, last_start, *offsets, l; |
228 | 0 | n = 0; max = *_max; offsets = *_offsets; |
229 | 0 | l = strlen(s); |
230 | |
|
231 | 0 | #define __ksplit_aux do { \ |
232 | 0 | if (_offsets) { \ |
233 | 0 | s[i] = 0; \ |
234 | 0 | if (n == max) { \ |
235 | 0 | int *tmp; \ |
236 | 0 | max = max? max<<1 : 2; \ |
237 | 0 | if ((tmp = hts_realloc_p(offsets, sizeof(int), max))) { \ |
238 | 0 | offsets = tmp; \ |
239 | 0 | } else { \ |
240 | 0 | free(offsets); \ |
241 | 0 | *_offsets = NULL; \ |
242 | 0 | return 0; \ |
243 | 0 | } \ |
244 | 0 | } \ |
245 | 0 | offsets[n++] = last_start; \ |
246 | 0 | } else ++n; \ |
247 | 0 | } while (0) |
248 | |
|
249 | 0 | for (i = 0, last_char = last_start = 0; i <= l; ++i) { |
250 | 0 | if (delimiter == 0) { |
251 | 0 | if (isspace((int)((unsigned char) s[i])) || s[i] == 0) { |
252 | 0 | if (isgraph(last_char)) |
253 | 0 | __ksplit_aux; // the end of a field |
254 | 0 | } else { |
255 | 0 | if (isspace(last_char) || last_char == 0) |
256 | 0 | last_start = i; |
257 | 0 | } |
258 | 0 | } else { |
259 | 0 | if (s[i] == delimiter || s[i] == 0) { |
260 | 0 | if (last_char != 0 && last_char != delimiter) __ksplit_aux; // the end of a field |
261 | 0 | } else { |
262 | 0 | if (last_char == delimiter || last_char == 0) last_start = i; |
263 | 0 | } |
264 | 0 | } |
265 | 0 | last_char = (int)((unsigned char)s[i]); |
266 | 0 | } |
267 | 0 | *_max = max; *_offsets = offsets; |
268 | 0 | return n; |
269 | 0 | } |
270 | | |
271 | | int kgetline(kstring_t *s, kgets_func *fgets_fn, void *fp) |
272 | 0 | { |
273 | 0 | size_t l0 = s->l; |
274 | |
|
275 | 0 | while (s->l == l0 || s->s[s->l-1] != '\n') { |
276 | 0 | if (s->m - s->l < 200) { |
277 | 0 | if (ks_resize(s, s->m + 200) < 0) |
278 | 0 | return EOF; |
279 | 0 | } |
280 | 0 | if (fgets_fn(s->s + s->l, s->m - s->l, fp) == NULL) break; |
281 | 0 | s->l += strlen(s->s + s->l); |
282 | 0 | } |
283 | | |
284 | 0 | if (s->l == l0) return EOF; |
285 | | |
286 | 0 | if (s->l > l0 && s->s[s->l-1] == '\n') { |
287 | 0 | s->l--; |
288 | 0 | if (s->l > l0 && s->s[s->l-1] == '\r') s->l--; |
289 | 0 | } |
290 | 0 | s->s[s->l] = '\0'; |
291 | 0 | return 0; |
292 | 0 | } |
293 | | |
294 | | // Wrap around fgets to get the right signature for kgets_func |
295 | | static char * fgets_wrapper(char *buffer, int size, void *stream) |
296 | 0 | { |
297 | 0 | return fgets(buffer, size, (FILE *) stream); |
298 | 0 | } |
299 | | |
300 | | int kfgetline(kstring_t *s, FILE *fp) |
301 | 0 | { |
302 | 0 | if (!s || !fp) |
303 | 0 | return EOF; |
304 | 0 | return kgetline(s, fgets_wrapper, fp); |
305 | 0 | } |
306 | | |
307 | | int kgetline2(kstring_t *s, kgets_func2 *fgets_fn, void *fp) |
308 | 1.23M | { |
309 | 1.23M | size_t l0 = s->l; |
310 | | |
311 | 2.47M | while (s->l == l0 || s->s[s->l-1] != '\n') { |
312 | 1.26M | if (s->m - s->l < 200) { |
313 | | // We return EOF for both EOF and error and the caller |
314 | | // needs to check for errors in fp, and we haven't |
315 | | // even got there yet. |
316 | | // |
317 | | // The only way of propagating memory errors is to |
318 | | // deliberately call something that we know triggers |
319 | | // and error so fp is also set. This works for |
320 | | // hgets, but not for gets where reading <= 0 bytes |
321 | | // isn't an error. |
322 | 542k | if (ks_resize(s, s->m + 200) < 0) { |
323 | 0 | fgets_fn(s->s + s->l, 0, fp); |
324 | 0 | return EOF; |
325 | 0 | } |
326 | 542k | } |
327 | 1.26M | ssize_t len = fgets_fn(s->s + s->l, s->m - s->l, fp); |
328 | 1.26M | if (len <= 0) break; |
329 | 1.24M | s->l += len; |
330 | 1.24M | } |
331 | | |
332 | 1.23M | if (s->l == l0) return EOF; |
333 | | |
334 | 1.22M | if (s->l > l0 && s->s[s->l-1] == '\n') { |
335 | 1.20M | s->l--; |
336 | 1.20M | if (s->l > l0 && s->s[s->l-1] == '\r') s->l--; |
337 | 1.20M | } |
338 | 1.22M | s->s[s->l] = '\0'; |
339 | 1.22M | return 0; |
340 | 1.23M | } |
341 | | |
342 | | typedef unsigned char ubyte_t; |
343 | | |
344 | | /********************** |
345 | | * Karp-Rabin search * |
346 | | **********************/ |
347 | | |
348 | | // Backup solution for when we can't use Boyer-Moore, e.g. due to memory |
349 | | // required. Karp-Rabin only needs to store a couple of hash values |
350 | | // so will always complete. |
351 | | |
352 | | static uint64_t fast_exp(uint64_t x, uint64_t n) |
353 | 0 | { |
354 | 0 | uint64_t y = 1; |
355 | 0 | if (n == 0) |
356 | 0 | return 1; |
357 | 0 | while (n > 1) { |
358 | 0 | if (n & 1) |
359 | 0 | y *= x; |
360 | 0 | x *= x; |
361 | 0 | n >>= 1; |
362 | 0 | } |
363 | 0 | return y * x; |
364 | 0 | } |
365 | | |
366 | | static void * karp_rabin(const void *str_, size_t n, const void *pat_, size_t m) |
367 | 0 | { |
368 | 0 | const ubyte_t *str = (const ubyte_t *) str_; |
369 | 0 | const ubyte_t *pat = (const ubyte_t *) pat_; |
370 | 0 | const uint64_t b = 31; |
371 | 0 | uint64_t hash_pat = 0; |
372 | 0 | uint64_t hash_str = 0; |
373 | 0 | uint64_t b_to_m = fast_exp(b, m); |
374 | 0 | size_t i; |
375 | 0 | ubyte_t mismatch = 0; |
376 | |
|
377 | 0 | if (m > n) |
378 | 0 | return NULL; |
379 | | |
380 | | // Calculate hash of pat and initial part of str. |
381 | 0 | for (i = 0; i < m; i++) { |
382 | 0 | mismatch |= str[i] ^ pat[i]; |
383 | 0 | hash_pat = hash_pat * b + pat[i] + 1U; |
384 | 0 | hash_str = hash_str * b + str[i] + 1U; |
385 | 0 | } |
386 | |
|
387 | 0 | if (!mismatch) // Match found at start (or m == 0) |
388 | 0 | return (void *) str_; |
389 | | |
390 | 0 | for (; i < n; i++) { |
391 | 0 | hash_str = hash_str * b + str[i] + 1U - b_to_m * (str[i - m] + 1U); |
392 | 0 | if (hash_str == hash_pat) { |
393 | 0 | if (memcmp(pat, str + i + 1 - m, m) == 0) |
394 | 0 | return (void *) (str + i + 1 - m); |
395 | 0 | } |
396 | 0 | } |
397 | 0 | return NULL; |
398 | 0 | } |
399 | | |
400 | | /********************** |
401 | | * Boyer-Moore search * |
402 | | **********************/ |
403 | | |
404 | | |
405 | | // reference: http://www-igm.univ-mlv.fr/~lecroq/string/node14.html |
406 | | static int *ksBM_prep(const ubyte_t *pat, int m) |
407 | 898 | { |
408 | 898 | int i, *suff, *prep, *bmGs, *bmBc; |
409 | 898 | if (m < 1) |
410 | 0 | return NULL; |
411 | 898 | prep = hts_calloc_ps(sizeof(int), m, 256); |
412 | 898 | if (!prep) return NULL; |
413 | 898 | bmGs = prep; bmBc = prep + m; |
414 | 898 | { // preBmBc() |
415 | 230k | for (i = 0; i < 256; ++i) bmBc[i] = m; |
416 | 17.7k | for (i = 0; i < m - 1; ++i) bmBc[pat[i]] = m - i - 1; |
417 | 898 | } |
418 | 898 | suff = (int*)calloc(m, sizeof(int)); |
419 | 898 | if (!suff) { free(prep); return NULL; } |
420 | 898 | { // suffixes() |
421 | 898 | int f = 0, g; |
422 | 898 | suff[m - 1] = m; |
423 | 898 | g = m - 1; |
424 | 17.7k | for (i = m - 2; i >= 0; --i) { |
425 | 16.8k | if (i > g && suff[i + m - 1 - f] < i - g) |
426 | 0 | suff[i] = suff[i + m - 1 - f]; |
427 | 16.8k | else { |
428 | 16.8k | if (i < g) g = i; |
429 | 16.8k | f = i; |
430 | 17.1k | while (g >= 0 && pat[g] == pat[g + m - 1 - f]) --g; |
431 | 16.8k | suff[i] = f - g; |
432 | 16.8k | } |
433 | 16.8k | } |
434 | 898 | } |
435 | 898 | { // preBmGs() |
436 | 898 | int j = 0; |
437 | 18.6k | for (i = 0; i < m; ++i) bmGs[i] = m; |
438 | 18.6k | for (i = m - 1; i >= 0; --i) |
439 | 17.7k | if (suff[i] == i + 1) |
440 | 898 | for (; j < m - 1 - i; ++j) |
441 | 0 | if (bmGs[j] == m) |
442 | 0 | bmGs[j] = m - 1 - i; |
443 | 17.7k | for (i = 0; i <= m - 2; ++i) |
444 | 16.8k | bmGs[m - 1 - suff[i]] = m - 1 - i; |
445 | 898 | } |
446 | 898 | free(suff); |
447 | 898 | return prep; |
448 | 898 | } |
449 | | |
450 | | static void *boyer_moore(const void *str_, size_t n, const void *pat_, int m, |
451 | | int **stored_prep_ptr) |
452 | 1.32k | { |
453 | 1.32k | size_t j; |
454 | 1.32k | int i, *prep = 0, *bmGs, *bmBc; |
455 | 1.32k | const ubyte_t *str, *pat; |
456 | | |
457 | 1.32k | if (!str_ || !pat_) |
458 | 0 | return (void *) str_; |
459 | | |
460 | 1.32k | str = (const ubyte_t*) str_; |
461 | 1.32k | pat = (const ubyte_t*) pat_; |
462 | | |
463 | 1.32k | if (m <= 0) // Empty string always matches at the start |
464 | 0 | return (void *) str_; |
465 | 1.32k | if (n < m) // Input shorter than pattern can't match |
466 | 422 | return NULL; |
467 | 898 | if (m == 1) // Switch to memchr for 1 byte patterns (likely faster) |
468 | 0 | return memchr(str, pat[0], n); |
469 | | |
470 | 898 | if (stored_prep_ptr && *stored_prep_ptr) { |
471 | 0 | prep = *stored_prep_ptr; |
472 | 898 | } else { |
473 | 898 | prep = ksBM_prep(pat, m); |
474 | 898 | if (!prep) return karp_rabin(str_, n, pat_, m); |
475 | 898 | if (stored_prep_ptr) |
476 | 0 | *stored_prep_ptr = prep; |
477 | 898 | } |
478 | 898 | bmGs = prep; // Good-suffix shift array |
479 | 898 | bmBc = prep + m; // Bad character shift array |
480 | 898 | j = 0; |
481 | 1.58M | while (j <= n - m) { |
482 | 1.58M | for (i = m - 1; i >= 0 && pat[i] == str[i+j]; --i); |
483 | 1.58M | if (i >= 0) { |
484 | 1.58M | int max = bmBc[str[i+j]] - m + 1 + i; |
485 | 1.58M | if (max < bmGs[i]) max = bmGs[i]; |
486 | 1.58M | j += max; |
487 | 1.58M | } else { // Match found |
488 | 15 | if (!stored_prep_ptr) free(prep); |
489 | 15 | return (void*)(str + j); |
490 | 15 | } |
491 | 1.58M | } |
492 | | // No match |
493 | 883 | if (!stored_prep_ptr) free(prep); |
494 | 883 | return NULL; |
495 | 898 | } |
496 | | |
497 | | void *kmemmem(const void *str, int n, const void *pat, int m, int **prep) |
498 | 1.32k | { |
499 | 1.32k | return boyer_moore(str, n >= 0 ? n : 0, pat, m, prep); |
500 | 1.32k | } |
501 | | |
502 | | char *kstrstr(const char *str, const char *pat, int **prep) |
503 | 0 | { |
504 | 0 | size_t patlen = strlen(pat); |
505 | 0 | if (patlen <= INT_MAX) |
506 | 0 | return (char*)boyer_moore(str, strlen(str), pat, patlen, prep); |
507 | 0 | else |
508 | 0 | return karp_rabin(str, strlen(str), pat, patlen); |
509 | 0 | } |
510 | | |
511 | | char *kstrnstr(const char *str, const char *pat, int n, int **prep) |
512 | 0 | { |
513 | 0 | if (!pat || *pat == '\0') |
514 | 0 | return (char *) str; |
515 | 0 | if (n <= 0) |
516 | 0 | return NULL; |
517 | 0 | const char *endp = memchr(str, '\0', n); |
518 | 0 | if (endp != NULL && endp - str < n) |
519 | 0 | n = endp - str; |
520 | 0 | size_t patlen = strlen(pat); |
521 | 0 | if (patlen > n) |
522 | 0 | return NULL; |
523 | 0 | return (char*)boyer_moore(str, n, pat, patlen, prep); |
524 | 0 | } |
525 | | |
526 | | /*********************** |
527 | | * The main() function * |
528 | | ***********************/ |
529 | | |
530 | | #ifdef KSTRING_MAIN |
531 | | #include <stdio.h> |
532 | | int main() |
533 | | { |
534 | | kstring_t *s; |
535 | | int *fields, n, i; |
536 | | ks_tokaux_t aux; |
537 | | char *p; |
538 | | s = (kstring_t*)calloc(1, sizeof(kstring_t)); |
539 | | // test ksprintf() |
540 | | ksprintf(s, " abcdefg: %d ", 100); |
541 | | printf("'%s'\n", s->s); |
542 | | // test ksplit() |
543 | | fields = ksplit(s, 0, &n); |
544 | | for (i = 0; i < n; ++i) |
545 | | printf("field[%d] = '%s'\n", i, s->s + fields[i]); |
546 | | // test kstrtok() |
547 | | s->l = 0; |
548 | | for (p = kstrtok("ab:cde:fg/hij::k", ":/", &aux); p; p = kstrtok(0, 0, &aux)) { |
549 | | kputsn(p, aux.p - p, s); |
550 | | kputc('\n', s); |
551 | | } |
552 | | printf("%s", s->s); |
553 | | // free |
554 | | free(s->s); free(s); free(fields); |
555 | | |
556 | | { |
557 | | static char *str = "abcdefgcdgcagtcakcdcd"; |
558 | | static char *pat = "cd"; |
559 | | char *ret, *s = str; |
560 | | int *prep = 0; |
561 | | while ((ret = kstrstr(s, pat, &prep)) != 0) { |
562 | | printf("match: %s\n", ret); |
563 | | s = ret + prep[0]; |
564 | | } |
565 | | free(prep); |
566 | | } |
567 | | return 0; |
568 | | } |
569 | | #endif |