/src/mupdf/source/fitz/string.c
Line | Count | Source |
1 | | // Copyright (C) 2004-2025 Artifex Software, Inc. |
2 | | // |
3 | | // This file is part of MuPDF. |
4 | | // |
5 | | // MuPDF is free software: you can redistribute it and/or modify it under the |
6 | | // terms of the GNU Affero General Public License as published by the Free |
7 | | // Software Foundation, either version 3 of the License, or (at your option) |
8 | | // any later version. |
9 | | // |
10 | | // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY |
11 | | // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
12 | | // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more |
13 | | // details. |
14 | | // |
15 | | // You should have received a copy of the GNU Affero General Public License |
16 | | // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html> |
17 | | // |
18 | | // Alternative licensing terms are available from the licensor. |
19 | | // For commercial licensing, see <https://www.artifex.com/> or contact |
20 | | // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
21 | | // CA 94129, USA, for further information. |
22 | | |
23 | | #include "mupdf/fitz.h" |
24 | | |
25 | | #include <string.h> |
26 | | #include <errno.h> |
27 | | #include <math.h> |
28 | | #include <float.h> |
29 | | #include <stdlib.h> |
30 | | |
31 | | #ifdef _WIN32 |
32 | | #include <windows.h> /* for MultiByteToWideChar etc. */ |
33 | | #endif |
34 | | |
35 | | #include "utfdata.h" |
36 | | |
37 | | static const int * |
38 | | fz_ucd_bsearch(int c, const int *t, int n, int ne) |
39 | 0 | { |
40 | 0 | const int *p; |
41 | 0 | int m; |
42 | 0 | while (n > 1) |
43 | 0 | { |
44 | 0 | m = n/2; |
45 | 0 | p = t + m*ne; |
46 | 0 | if (c >= p[0]) |
47 | 0 | { |
48 | 0 | t = p; |
49 | 0 | n = n - m; |
50 | 0 | } |
51 | 0 | else |
52 | 0 | { |
53 | 0 | n = m; |
54 | 0 | } |
55 | 0 | } |
56 | 0 | if (n && c >= t[0]) |
57 | 0 | return t; |
58 | 0 | return 0; |
59 | 0 | } |
60 | | |
61 | | int |
62 | | fz_tolower(int c) |
63 | 18.8k | { |
64 | 18.8k | const int *p; |
65 | | |
66 | | /* Make ASCII fast. */ |
67 | 18.8k | if (c < 128) |
68 | 18.8k | { |
69 | 18.8k | if (c >= 'A' && c <= 'Z') |
70 | 1.56k | c += 'a' - 'A'; |
71 | 18.8k | return c; |
72 | 18.8k | } |
73 | | |
74 | 0 | p = fz_ucd_bsearch(c, ucd_tolower2, nelem(ucd_tolower2) / 3, 3); |
75 | 0 | if (p && c >= p[0] && c <= p[1]) |
76 | 0 | return c + p[2]; |
77 | 0 | p = fz_ucd_bsearch(c, ucd_tolower1, nelem(ucd_tolower1) / 2, 2); |
78 | 0 | if (p && c == p[0]) |
79 | 0 | return c + p[1]; |
80 | 0 | return c; |
81 | 0 | } |
82 | | |
83 | | int |
84 | | fz_toupper(int c) |
85 | 0 | { |
86 | 0 | const int *p; |
87 | 0 | p = fz_ucd_bsearch(c, ucd_toupper2, nelem(ucd_toupper2) / 3, 3); |
88 | 0 | if (p && c >= p[0] && c <= p[1]) |
89 | 0 | return c + p[2]; |
90 | 0 | p = fz_ucd_bsearch(c, ucd_toupper1, nelem(ucd_toupper1) / 2, 2); |
91 | 0 | if (p && c == p[0]) |
92 | 0 | return c + p[1]; |
93 | 0 | return c; |
94 | 0 | } |
95 | | |
96 | | size_t |
97 | | fz_strnlen(const char *s, size_t n) |
98 | 0 | { |
99 | 0 | const char *p = memchr(s, 0, n); |
100 | 0 | return p ? (size_t) (p - s) : n; |
101 | 0 | } |
102 | | |
103 | | int |
104 | | fz_strncasecmp(const char *a, const char *b, size_t n) |
105 | 0 | { |
106 | 0 | while (n > 0) |
107 | 0 | { |
108 | 0 | int ucs_a, ucs_b, n_a, n_b; |
109 | 0 | n_a = fz_chartorunen(&ucs_a, a, n); |
110 | 0 | n_b = fz_chartorunen(&ucs_b, b, n); |
111 | | /* We believe that for all unicode characters X and Y, s.t. |
112 | | * fz_tolower(X) == fz_tolower(Y), X and Y must utf8 encode to |
113 | | * the same number of bytes. */ |
114 | 0 | assert(n_a == n_b); |
115 | 0 | assert((size_t)n_a <= n); |
116 | | |
117 | | // one or both of the strings are short |
118 | 0 | if (ucs_a == 0 || ucs_b == 0) |
119 | 0 | return ucs_a - ucs_b; |
120 | | |
121 | 0 | if (ucs_a != ucs_b) |
122 | 0 | { |
123 | 0 | ucs_a = fz_tolower(ucs_a); |
124 | 0 | ucs_b = fz_tolower(ucs_b); |
125 | 0 | } |
126 | 0 | if (ucs_a != ucs_b) |
127 | 0 | return ucs_a - ucs_b; |
128 | | |
129 | 0 | a += n_a; |
130 | 0 | b += n_b; |
131 | 0 | n -= n_a; |
132 | 0 | } |
133 | 0 | return 0; |
134 | 0 | } |
135 | | |
136 | | int |
137 | | fz_strcasecmp(const char *a, const char *b) |
138 | 7.24k | { |
139 | 9.42k | while (1) |
140 | 9.42k | { |
141 | 9.42k | int ucs_a, ucs_b; |
142 | 9.42k | a += fz_chartorune(&ucs_a, a); |
143 | 9.42k | b += fz_chartorune(&ucs_b, b); |
144 | 9.42k | ucs_a = fz_tolower(ucs_a); |
145 | 9.42k | ucs_b = fz_tolower(ucs_b); |
146 | 9.42k | if (ucs_a == ucs_b) |
147 | 2.40k | { |
148 | 2.40k | if (ucs_a == 0) |
149 | 225 | return 0; |
150 | 2.40k | } |
151 | 7.01k | else |
152 | 7.01k | return ucs_a - ucs_b; |
153 | 9.42k | } |
154 | 7.24k | } |
155 | | |
156 | | char * |
157 | | fz_strsep(char **stringp, const char *delim) |
158 | 1.91k | { |
159 | 1.91k | char *ret = *stringp; |
160 | 1.91k | if (!ret) return NULL; |
161 | 1.60k | if ((*stringp = strpbrk(*stringp, delim)) != NULL) |
162 | 1.26k | *((*stringp)++) = '\0'; |
163 | 1.60k | return ret; |
164 | 1.91k | } |
165 | | |
166 | | size_t |
167 | | fz_strlcpy(char *dst, const char *src, size_t siz) |
168 | 370k | { |
169 | 370k | register char *d = dst; |
170 | 370k | register const char *s = src; |
171 | 370k | register size_t n = siz; |
172 | | |
173 | | /* Copy as many bytes as will fit */ |
174 | 370k | if (n != 0 && --n != 0) { |
175 | 5.73M | do { |
176 | 5.73M | if ((*d++ = *s++) == 0) |
177 | 370k | break; |
178 | 5.73M | } while (--n != 0); |
179 | 370k | } |
180 | | |
181 | | /* Not enough room in dst, add NUL and traverse rest of src */ |
182 | 370k | if (n == 0) { |
183 | 64 | if (siz != 0) |
184 | 64 | *d = '\0'; /* NUL-terminate dst */ |
185 | 200 | while (*s++) |
186 | 136 | ; |
187 | 64 | } |
188 | | |
189 | 370k | return(s - src - 1); /* count does not include NUL */ |
190 | 370k | } |
191 | | |
192 | | size_t |
193 | | fz_strlcat(char *dst, const char *src, size_t siz) |
194 | 64 | { |
195 | 64 | register char *d = dst; |
196 | 64 | register const char *s = src; |
197 | 64 | register size_t n = siz; |
198 | 64 | size_t dlen; |
199 | | |
200 | | /* Find the end of dst and adjust bytes left but don't go past end */ |
201 | 1.91k | while (*d != '\0' && n-- != 0) |
202 | 1.85k | d++; |
203 | 64 | dlen = d - dst; |
204 | 64 | n = siz - dlen; |
205 | | |
206 | 64 | if (n == 0) |
207 | 0 | return dlen + strlen(s); |
208 | 263 | while (*s != '\0') { |
209 | 199 | if (n != 1) { |
210 | 199 | *d++ = *s; |
211 | 199 | n--; |
212 | 199 | } |
213 | 199 | s++; |
214 | 199 | } |
215 | 64 | *d = '\0'; |
216 | | |
217 | 64 | return dlen + (s - src); /* count does not include NUL */ |
218 | 64 | } |
219 | | |
220 | | void |
221 | | fz_dirname(char *dir, const char *path, size_t n) |
222 | 0 | { |
223 | 0 | size_t i; |
224 | |
|
225 | 0 | if (!path || !path[0]) |
226 | 0 | { |
227 | 0 | fz_strlcpy(dir, ".", n); |
228 | 0 | return; |
229 | 0 | } |
230 | | |
231 | 0 | fz_strlcpy(dir, path, n); |
232 | |
|
233 | 0 | i = strlen(dir); |
234 | 0 | for(; dir[i] == '/'; --i) if (!i) { fz_strlcpy(dir, "/", n); return; } |
235 | 0 | for(; dir[i] != '/'; --i) if (!i) { fz_strlcpy(dir, ".", n); return; } |
236 | 0 | for(; dir[i] == '/'; --i) if (!i) { fz_strlcpy(dir, "/", n); return; } |
237 | 0 | dir[i+1] = 0; |
238 | 0 | } |
239 | | |
240 | | const char * |
241 | | fz_basename(const char *path) |
242 | 0 | { |
243 | 0 | const char *name = strrchr(path, '/'); |
244 | 0 | if (!name) |
245 | 0 | name = strrchr(path, '\\'); |
246 | 0 | if (!name) |
247 | 0 | return path; |
248 | 0 | return name + 1; |
249 | 0 | } |
250 | | |
251 | | #ifdef _WIN32 |
252 | | |
253 | | char *fz_realpath(const char *path, char *buf) |
254 | | { |
255 | | wchar_t wpath[PATH_MAX]; |
256 | | wchar_t wbuf[PATH_MAX]; |
257 | | int i; |
258 | | if (!MultiByteToWideChar(CP_UTF8, 0, path, -1, wpath, PATH_MAX)) |
259 | | return NULL; |
260 | | if (!GetFullPathNameW(wpath, PATH_MAX, wbuf, NULL)) |
261 | | return NULL; |
262 | | if (!WideCharToMultiByte(CP_UTF8, 0, wbuf, -1, buf, PATH_MAX, NULL, NULL)) |
263 | | return NULL; |
264 | | for (i=0; buf[i]; ++i) |
265 | | if (buf[i] == '\\') |
266 | | buf[i] = '/'; |
267 | | return buf; |
268 | | } |
269 | | |
270 | | #else |
271 | | |
272 | | char *fz_realpath(const char *path, char *buf) |
273 | 0 | { |
274 | 0 | return realpath(path, buf); |
275 | 0 | } |
276 | | |
277 | | #endif |
278 | | |
279 | | static inline int ishex(int a) |
280 | 0 | { |
281 | 0 | return (a >= 'A' && a <= 'F') || |
282 | 0 | (a >= 'a' && a <= 'f') || |
283 | 0 | (a >= '0' && a <= '9'); |
284 | 0 | } |
285 | | |
286 | | static inline int tohex(int c) |
287 | 0 | { |
288 | 0 | if (c >= '0' && c <= '9') return c - '0'; |
289 | 0 | if (c >= 'a' && c <= 'f') return c - 'a' + 0xA; |
290 | 0 | if (c >= 'A' && c <= 'F') return c - 'A' + 0xA; |
291 | 0 | return 0; |
292 | 0 | } |
293 | | |
294 | 0 | #define URIRESERVED ";/?:@&=+$," |
295 | 0 | #define URIALPHA "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" |
296 | | #define URIDIGIT "0123456789" |
297 | 0 | #define URIMARK "-_.!~*'()" |
298 | 0 | #define URIUNESCAPED URIALPHA URIDIGIT URIMARK |
299 | 0 | #define HEX "0123456789ABCDEF" |
300 | | |
301 | | /* Same as fz_decode_uri_component but in-place */ |
302 | | char * |
303 | | fz_urldecode(char *url) |
304 | 0 | { |
305 | 0 | char *s = url; |
306 | 0 | char *p = url; |
307 | 0 | while (*s) |
308 | 0 | { |
309 | 0 | int c = (unsigned char) *s++; |
310 | 0 | if (c == '%' && ishex(s[0]) && ishex(s[1])) |
311 | 0 | { |
312 | 0 | int a = tohex(*s++); |
313 | 0 | int b = tohex(*s++); |
314 | 0 | *p++ = a << 4 | b; |
315 | 0 | } |
316 | 0 | else |
317 | 0 | { |
318 | 0 | *p++ = c; |
319 | 0 | } |
320 | 0 | } |
321 | 0 | *p = 0; |
322 | 0 | return url; |
323 | 0 | } |
324 | | |
325 | | char * |
326 | | fz_decode_uri_component(fz_context *ctx, const char *s) |
327 | 0 | { |
328 | 0 | char *uri = fz_malloc(ctx, strlen(s) + 1); |
329 | 0 | char *p = uri; |
330 | 0 | while (*s) |
331 | 0 | { |
332 | 0 | int c = (unsigned char) *s++; |
333 | 0 | if (c == '%' && ishex(s[0]) && ishex(s[1])) |
334 | 0 | { |
335 | 0 | int a = tohex(*s++); |
336 | 0 | int b = tohex(*s++); |
337 | 0 | *p++ = a << 4 | b; |
338 | 0 | } |
339 | 0 | else |
340 | 0 | { |
341 | 0 | *p++ = c; |
342 | 0 | } |
343 | 0 | } |
344 | 0 | *p = 0; |
345 | 0 | return uri; |
346 | 0 | } |
347 | | |
348 | | char * |
349 | | fz_decode_uri(fz_context *ctx, const char *s) |
350 | 0 | { |
351 | 0 | char *uri = fz_malloc(ctx, strlen(s) + 1); |
352 | 0 | char *p = uri; |
353 | 0 | while (*s) |
354 | 0 | { |
355 | 0 | int c = (unsigned char) *s++; |
356 | 0 | if (c == '%' && ishex(s[0]) && ishex(s[1])) |
357 | 0 | { |
358 | 0 | int a = tohex(*s++); |
359 | 0 | int b = tohex(*s++); |
360 | 0 | c = a << 4 | b; |
361 | 0 | if (strchr(URIRESERVED "#", c)) { |
362 | 0 | *p++ = '%'; |
363 | 0 | *p++ = HEX[a]; |
364 | 0 | *p++ = HEX[b]; |
365 | 0 | } else { |
366 | 0 | *p++ = c; |
367 | 0 | } |
368 | 0 | } |
369 | 0 | else |
370 | 0 | { |
371 | 0 | *p++ = c; |
372 | 0 | } |
373 | 0 | } |
374 | 0 | *p = 0; |
375 | 0 | return uri; |
376 | 0 | } |
377 | | |
378 | | static char * |
379 | | fz_encode_uri_imp(fz_context *ctx, const char *s, const char *unescaped) |
380 | 0 | { |
381 | 0 | char *uri = fz_malloc(ctx, strlen(s) * 3 + 1); /* allocate enough for worst case */ |
382 | 0 | char *p = uri; |
383 | 0 | while (*s) |
384 | 0 | { |
385 | 0 | int c = (unsigned char) *s++; |
386 | 0 | if (strchr(unescaped, c)) |
387 | 0 | { |
388 | 0 | *p++ = c; |
389 | 0 | } |
390 | 0 | else |
391 | 0 | { |
392 | 0 | *p++ = '%'; |
393 | 0 | *p++ = HEX[(c >> 4) & 15]; |
394 | 0 | *p++ = HEX[(c) & 15]; |
395 | 0 | } |
396 | 0 | } |
397 | 0 | *p = 0; |
398 | 0 | return uri; |
399 | 0 | } |
400 | | |
401 | | char * |
402 | | fz_encode_uri_component(fz_context *ctx, const char *s) |
403 | 0 | { |
404 | 0 | return fz_encode_uri_imp(ctx, s, URIUNESCAPED); |
405 | 0 | } |
406 | | |
407 | | char * |
408 | | fz_encode_uri_pathname(fz_context *ctx, const char *s) |
409 | 0 | { |
410 | 0 | return fz_encode_uri_imp(ctx, s, URIUNESCAPED "/"); |
411 | 0 | } |
412 | | |
413 | | char * |
414 | | fz_encode_uri(fz_context *ctx, const char *s) |
415 | 0 | { |
416 | 0 | return fz_encode_uri_imp(ctx, s, URIUNESCAPED URIRESERVED "#"); |
417 | 0 | } |
418 | | |
419 | | void |
420 | | fz_format_output_path(fz_context *ctx, char *path, size_t size, const char *fmt, int page) |
421 | 0 | { |
422 | 0 | const char *s, *p; |
423 | 0 | char num[40]; |
424 | 0 | int i, n; |
425 | 0 | int z = 0; |
426 | |
|
427 | 0 | for (i = 0; page; page /= 10) |
428 | 0 | num[i++] = '0' + page % 10; |
429 | 0 | num[i] = 0; |
430 | |
|
431 | 0 | s = p = strchr(fmt, '%'); |
432 | 0 | if (p) |
433 | 0 | { |
434 | 0 | ++p; |
435 | 0 | while (*p >= '0' && *p <= '9') |
436 | 0 | z = z * 10 + (*p++ - '0'); |
437 | 0 | } |
438 | 0 | if (p && *p == 'd') |
439 | 0 | { |
440 | 0 | ++p; |
441 | 0 | } |
442 | 0 | else |
443 | 0 | { |
444 | 0 | const char *psep = strrchr(fmt, '/'); |
445 | 0 | s = p = strrchr(fmt, '.'); |
446 | | /* Ensure we only match a . in the last path segment. */ |
447 | 0 | if (psep != NULL && p < psep) |
448 | 0 | p = NULL; |
449 | 0 | if (!p) |
450 | 0 | s = p = fmt + strlen(fmt); |
451 | 0 | } |
452 | |
|
453 | 0 | if (z < 1) |
454 | 0 | z = 1; |
455 | 0 | while (i < z && i < (int)sizeof num) |
456 | 0 | num[i++] = '0'; |
457 | 0 | n = s - fmt; |
458 | 0 | if (n + i + strlen(p) >= size) |
459 | 0 | fz_throw(ctx, FZ_ERROR_ARGUMENT, "path name buffer overflow"); |
460 | 0 | memcpy(path, fmt, n); |
461 | 0 | while (i > 0) |
462 | 0 | path[n++] = num[--i]; |
463 | 0 | fz_strlcpy(path + n, p, size - n); |
464 | 0 | } |
465 | | |
466 | 0 | #define SEP(x) ((x)=='/' || (x) == 0) |
467 | | |
468 | | char * |
469 | | fz_cleanname(char *name) |
470 | 0 | { |
471 | 0 | char *p, *q, *dotdot; |
472 | 0 | int rooted; |
473 | |
|
474 | 0 | rooted = name[0] == '/'; |
475 | | |
476 | | /* |
477 | | * invariants: |
478 | | * p points at beginning of path element we're considering. |
479 | | * q points just past the last path element we wrote (no slash). |
480 | | * dotdot points just past the point where .. cannot backtrack |
481 | | * any further (no slash). |
482 | | */ |
483 | 0 | p = q = dotdot = name + rooted; |
484 | 0 | while (*p) |
485 | 0 | { |
486 | 0 | if(p[0] == '/') /* null element */ |
487 | 0 | p++; |
488 | 0 | else if (p[0] == '.' && SEP(p[1])) |
489 | 0 | p += 1; /* don't count the separator in case it is nul */ |
490 | 0 | else if (p[0] == '.' && p[1] == '.' && SEP(p[2])) |
491 | 0 | { |
492 | 0 | p += 2; |
493 | 0 | if (q > dotdot) /* can backtrack */ |
494 | 0 | { |
495 | 0 | while(--q > dotdot && *q != '/') |
496 | 0 | ; |
497 | 0 | } |
498 | 0 | else if (!rooted) /* /.. is / but ./../ is .. */ |
499 | 0 | { |
500 | 0 | if (q != name) |
501 | 0 | *q++ = '/'; |
502 | 0 | *q++ = '.'; |
503 | 0 | *q++ = '.'; |
504 | 0 | dotdot = q; |
505 | 0 | } |
506 | 0 | } |
507 | 0 | else /* real path element */ |
508 | 0 | { |
509 | 0 | if (q != name+rooted) |
510 | 0 | *q++ = '/'; |
511 | 0 | while ((*q = *p) != '/' && *q != 0) |
512 | 0 | p++, q++; |
513 | 0 | } |
514 | 0 | } |
515 | |
|
516 | 0 | if (q == name) /* empty string is really "." */ |
517 | 0 | *q++ = '.'; |
518 | 0 | *q = '\0'; |
519 | 0 | return name; |
520 | 0 | } |
521 | | |
522 | | char * |
523 | | fz_cleanname_strdup(fz_context *ctx, const char *name) |
524 | 0 | { |
525 | 0 | size_t len = strlen(name); |
526 | 0 | char *newname = fz_malloc(ctx, fz_maxz(2, len + 1)); |
527 | 0 | memcpy(newname, name, len + 1); |
528 | 0 | newname[len] = '\0'; |
529 | 0 | return fz_cleanname(newname); |
530 | 0 | } |
531 | | |
532 | | enum |
533 | | { |
534 | | UTFmax = 4, /* maximum bytes per rune */ |
535 | | Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */ |
536 | | Runeself = 0x80, /* rune and UTF sequences are the same (<) */ |
537 | | Runeerror = 0xFFFD, /* decoding error in UTF */ |
538 | | Runemax = 0x10FFFF, /* maximum rune value */ |
539 | | }; |
540 | | |
541 | | enum |
542 | | { |
543 | | Bit1 = 7, |
544 | | Bitx = 6, |
545 | | Bit2 = 5, |
546 | | Bit3 = 4, |
547 | | Bit4 = 3, |
548 | | Bit5 = 2, |
549 | | |
550 | | T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ |
551 | | Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ |
552 | | T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ |
553 | | T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ |
554 | | T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ |
555 | | T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ |
556 | | |
557 | | Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */ |
558 | | Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */ |
559 | | Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */ |
560 | | Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0001 1111 1111 1111 1111 1111 */ |
561 | | |
562 | | Maskx = (1<<Bitx)-1, /* 0011 1111 */ |
563 | | Testx = Maskx ^ 0xFF, /* 1100 0000 */ |
564 | | |
565 | | Bad = Runeerror, |
566 | | }; |
567 | | |
568 | | int |
569 | | fz_chartorune(int *rune, const char *str) |
570 | 18.8k | { |
571 | 18.8k | int c, c1, c2, c3; |
572 | 18.8k | int l; |
573 | | |
574 | | /* overlong null character */ |
575 | 18.8k | if((unsigned char)str[0] == 0xc0 && (unsigned char)str[1] == 0x80) { |
576 | 0 | *rune = 0; |
577 | 0 | return 2; |
578 | 0 | } |
579 | | |
580 | | /* |
581 | | * one character sequence |
582 | | * 00000-0007F => T1 |
583 | | */ |
584 | 18.8k | c = *(const unsigned char*)str; |
585 | 18.8k | if(c < Tx) { |
586 | 18.8k | *rune = c; |
587 | 18.8k | return 1; |
588 | 18.8k | } |
589 | | |
590 | | /* |
591 | | * two character sequence |
592 | | * 0080-07FF => T2 Tx |
593 | | */ |
594 | 0 | c1 = *(const unsigned char*)(str+1) ^ Tx; |
595 | 0 | if(c1 & Testx) |
596 | 0 | goto bad; |
597 | 0 | if(c < T3) { |
598 | 0 | if(c < T2) |
599 | 0 | goto bad; |
600 | 0 | l = ((c << Bitx) | c1) & Rune2; |
601 | 0 | if(l <= Rune1) |
602 | 0 | goto bad; |
603 | 0 | *rune = l; |
604 | 0 | return 2; |
605 | 0 | } |
606 | | |
607 | | /* |
608 | | * three character sequence |
609 | | * 0800-FFFF => T3 Tx Tx |
610 | | */ |
611 | 0 | c2 = *(const unsigned char*)(str+2) ^ Tx; |
612 | 0 | if(c2 & Testx) |
613 | 0 | goto bad; |
614 | 0 | if(c < T4) { |
615 | 0 | l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; |
616 | 0 | if(l <= Rune2) |
617 | 0 | goto bad; |
618 | 0 | *rune = l; |
619 | 0 | return 3; |
620 | 0 | } |
621 | | |
622 | | /* |
623 | | * four character sequence (21-bit value) |
624 | | * 10000-1FFFFF => T4 Tx Tx Tx |
625 | | */ |
626 | 0 | c3 = *(const unsigned char*)(str+3) ^ Tx; |
627 | 0 | if (c3 & Testx) |
628 | 0 | goto bad; |
629 | 0 | if (c < T5) { |
630 | 0 | l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; |
631 | 0 | if (l <= Rune3) |
632 | 0 | goto bad; |
633 | 0 | *rune = l; |
634 | 0 | return 4; |
635 | 0 | } |
636 | | /* |
637 | | * Support for 5-byte or longer UTF-8 would go here, but |
638 | | * since we don't have that, we'll just fall through to bad. |
639 | | */ |
640 | | |
641 | | /* |
642 | | * bad decoding |
643 | | */ |
644 | 0 | bad: |
645 | 0 | *rune = Bad; |
646 | 0 | return 1; |
647 | 0 | } |
648 | | |
649 | | int |
650 | | fz_chartorunen(int *rune, const char *str, size_t n) |
651 | 0 | { |
652 | 0 | int c, c1, c2, c3; |
653 | 0 | int l; |
654 | |
|
655 | 0 | if (n < 1) |
656 | 0 | goto bad; |
657 | | |
658 | | /* |
659 | | * one character sequence |
660 | | * 00000-0007F => T1 |
661 | | */ |
662 | 0 | c = *(const unsigned char*)str; |
663 | 0 | if(c < Tx) { |
664 | 0 | *rune = c; |
665 | 0 | return 1; |
666 | 0 | } |
667 | | |
668 | 0 | if (n < 2) |
669 | 0 | goto bad; |
670 | | |
671 | | /* overlong null character */ |
672 | 0 | if((unsigned char)str[0] == 0xc0 && (unsigned char)str[1] == 0x80) { |
673 | 0 | *rune = 0; |
674 | 0 | return 2; |
675 | 0 | } |
676 | | |
677 | | /* |
678 | | * two character sequence |
679 | | * 0080-07FF => T2 Tx |
680 | | */ |
681 | 0 | c1 = *(const unsigned char*)(str+1) ^ Tx; |
682 | 0 | if(c1 & Testx) |
683 | 0 | goto bad; |
684 | 0 | if(c < T3) { |
685 | 0 | if(c < T2) |
686 | 0 | goto bad; |
687 | 0 | l = ((c << Bitx) | c1) & Rune2; |
688 | 0 | if(l <= Rune1) |
689 | 0 | goto bad; |
690 | 0 | *rune = l; |
691 | 0 | return 2; |
692 | 0 | } |
693 | | |
694 | 0 | if (n < 3) |
695 | 0 | goto bad; |
696 | | |
697 | | /* |
698 | | * three character sequence |
699 | | * 0800-FFFF => T3 Tx Tx |
700 | | */ |
701 | 0 | c2 = *(const unsigned char*)(str+2) ^ Tx; |
702 | 0 | if(c2 & Testx) |
703 | 0 | goto bad; |
704 | 0 | if(c < T4) { |
705 | 0 | l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; |
706 | 0 | if(l <= Rune2) |
707 | 0 | goto bad; |
708 | 0 | *rune = l; |
709 | 0 | return 3; |
710 | 0 | } |
711 | | |
712 | 0 | if (n < 4) |
713 | 0 | goto bad; |
714 | | |
715 | | /* |
716 | | * four character sequence (21-bit value) |
717 | | * 10000-1FFFFF => T4 Tx Tx Tx |
718 | | */ |
719 | 0 | c3 = *(const unsigned char*)(str+3) ^ Tx; |
720 | 0 | if (c3 & Testx) |
721 | 0 | goto bad; |
722 | 0 | if (c < T5) { |
723 | 0 | l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; |
724 | 0 | if (l <= Rune3) |
725 | 0 | goto bad; |
726 | 0 | *rune = l; |
727 | 0 | return 4; |
728 | 0 | } |
729 | | /* |
730 | | * Support for 5-byte or longer UTF-8 would go here, but |
731 | | * since we don't have that, we'll just fall through to bad. |
732 | | */ |
733 | | |
734 | | /* |
735 | | * bad decoding |
736 | | */ |
737 | 0 | bad: |
738 | 0 | *rune = Bad; |
739 | 0 | return 1; |
740 | 0 | } |
741 | | |
742 | | int |
743 | | fz_runetochar(char *str, int rune) |
744 | 0 | { |
745 | | /* Runes are signed, so convert to unsigned for range check. */ |
746 | 0 | unsigned int c = (unsigned int)rune; |
747 | | |
748 | | /* overlong null character */ |
749 | 0 | if (c == 0) { |
750 | 0 | ((unsigned char *)str)[0] = 0xc0; |
751 | 0 | ((unsigned char *)str)[1] = 0x80; |
752 | 0 | return 2; |
753 | 0 | } |
754 | | |
755 | | /* |
756 | | * one character sequence |
757 | | * 00000-0007F => 00-7F |
758 | | */ |
759 | 0 | if(c <= Rune1) { |
760 | 0 | str[0] = c; |
761 | 0 | return 1; |
762 | 0 | } |
763 | | |
764 | | /* |
765 | | * two character sequence |
766 | | * 0080-07FF => T2 Tx |
767 | | */ |
768 | 0 | if(c <= Rune2) { |
769 | 0 | str[0] = T2 | (c >> 1*Bitx); |
770 | 0 | str[1] = Tx | (c & Maskx); |
771 | 0 | return 2; |
772 | 0 | } |
773 | | |
774 | | /* |
775 | | * If the Rune is out of range, convert it to the error rune. |
776 | | * Do this test here because the error rune encodes to three bytes. |
777 | | * Doing it earlier would duplicate work, since an out of range |
778 | | * Rune wouldn't have fit in one or two bytes. |
779 | | */ |
780 | 0 | if (c > Runemax) |
781 | 0 | c = Runeerror; |
782 | | |
783 | | /* |
784 | | * three character sequence |
785 | | * 0800-FFFF => T3 Tx Tx |
786 | | */ |
787 | 0 | if (c <= Rune3) { |
788 | 0 | str[0] = T3 | (c >> 2*Bitx); |
789 | 0 | str[1] = Tx | ((c >> 1*Bitx) & Maskx); |
790 | 0 | str[2] = Tx | (c & Maskx); |
791 | 0 | return 3; |
792 | 0 | } |
793 | | |
794 | | /* |
795 | | * four character sequence (21-bit value) |
796 | | * 10000-1FFFFF => T4 Tx Tx Tx |
797 | | */ |
798 | 0 | str[0] = T4 | (c >> 3*Bitx); |
799 | 0 | str[1] = Tx | ((c >> 2*Bitx) & Maskx); |
800 | 0 | str[2] = Tx | ((c >> 1*Bitx) & Maskx); |
801 | 0 | str[3] = Tx | (c & Maskx); |
802 | 0 | return 4; |
803 | 0 | } |
804 | | |
805 | | int |
806 | | fz_runelen(int c) |
807 | 0 | { |
808 | 0 | char str[10]; |
809 | 0 | return fz_runetochar(str, c); |
810 | 0 | } |
811 | | |
812 | | int |
813 | | fz_runeidx(const char *s, const char *p) |
814 | 0 | { |
815 | 0 | int rune; |
816 | 0 | int i = 0; |
817 | 0 | while (s < p) { |
818 | 0 | if (*(unsigned char *)s < Runeself) |
819 | 0 | ++s; |
820 | 0 | else |
821 | 0 | s += fz_chartorune(&rune, s); |
822 | 0 | ++i; |
823 | 0 | } |
824 | 0 | return i; |
825 | 0 | } |
826 | | |
827 | | const char * |
828 | | fz_runeptr(const char *s, int i) |
829 | 0 | { |
830 | 0 | int rune; |
831 | 0 | while (i-- > 0) { |
832 | 0 | rune = *(unsigned char*)s; |
833 | 0 | if (rune < Runeself) { |
834 | 0 | if (rune == 0) |
835 | 0 | return NULL; |
836 | 0 | ++s; |
837 | 0 | } else |
838 | 0 | s += fz_chartorune(&rune, s); |
839 | 0 | } |
840 | 0 | return s; |
841 | 0 | } |
842 | | |
843 | | int |
844 | | fz_utflen(const char *s) |
845 | 0 | { |
846 | 0 | int c, n, rune; |
847 | 0 | n = 0; |
848 | 0 | for(;;) { |
849 | 0 | c = *(const unsigned char*)s; |
850 | 0 | if(c < Runeself) { |
851 | 0 | if(c == 0) |
852 | 0 | return n; |
853 | 0 | s++; |
854 | 0 | } else |
855 | 0 | s += fz_chartorune(&rune, s); |
856 | 0 | n++; |
857 | 0 | } |
858 | 0 | } |
859 | | |
860 | | float fz_atof(const char *s) |
861 | 4.61M | { |
862 | 4.61M | float result; |
863 | | |
864 | 4.61M | if (s == NULL) |
865 | 0 | return 0; |
866 | | |
867 | 4.61M | errno = 0; |
868 | 4.61M | result = fz_strtof(s, NULL); |
869 | 4.61M | if ((errno == ERANGE && result == 0) || isnan(result)) |
870 | | /* Return 1.0 on underflow, as it's a small known value that won't cause a divide by 0. */ |
871 | 6 | return 1; |
872 | 4.61M | result = fz_clamp(result, -FLT_MAX, FLT_MAX); |
873 | 4.61M | return result; |
874 | 4.61M | } |
875 | | |
876 | | int fz_atoi(const char *s) |
877 | 65 | { |
878 | 65 | if (s == NULL) |
879 | 0 | return 0; |
880 | 65 | return atoi(s); |
881 | 65 | } |
882 | | |
883 | | int64_t fz_atoi64(const char *s) |
884 | 0 | { |
885 | 0 | if (s == NULL) |
886 | 0 | return 0; |
887 | 0 | return atoll(s); |
888 | 0 | } |
889 | | |
890 | | size_t fz_atoz(const char *s) |
891 | 0 | { |
892 | 0 | int64_t i; |
893 | |
|
894 | 0 | if (s == NULL) |
895 | 0 | return 0; |
896 | 0 | i = atoll(s); |
897 | 0 | if (i < 0 || (int64_t)(size_t)i != i) |
898 | 0 | return 0; |
899 | 0 | return (size_t)i; |
900 | 0 | } |
901 | | |
902 | | int fz_is_page_range(fz_context *ctx, const char *s) |
903 | 0 | { |
904 | | /* TODO: check the actual syntax... */ |
905 | 0 | while (*s) |
906 | 0 | { |
907 | 0 | if ((*s < '0' || *s > '9') && *s != 'N' && *s != '-' && *s != ',') |
908 | 0 | return 0; |
909 | 0 | s++; |
910 | 0 | } |
911 | 0 | return 1; |
912 | 0 | } |
913 | | |
914 | | const char *fz_parse_page_range(fz_context *ctx, const char *s, int *a, int *b, int n) |
915 | 0 | { |
916 | 0 | const char *orig = s; |
917 | |
|
918 | 0 | if (!s || !s[0]) |
919 | 0 | return NULL; |
920 | | |
921 | 0 | if (s[0] == ',') |
922 | 0 | s += 1; |
923 | |
|
924 | 0 | if (s[0] == 'N') |
925 | 0 | { |
926 | 0 | *a = n; |
927 | 0 | s += 1; |
928 | 0 | } |
929 | 0 | else |
930 | 0 | *a = strtol(s, (char**)&s, 10); |
931 | |
|
932 | 0 | if (s[0] == '-') |
933 | 0 | { |
934 | 0 | if (s[1] == 'N') |
935 | 0 | { |
936 | 0 | *b = n; |
937 | 0 | s += 2; |
938 | 0 | } |
939 | 0 | else |
940 | 0 | *b = strtol(s+1, (char**)&s, 10); |
941 | 0 | } |
942 | 0 | else |
943 | 0 | *b = *a; |
944 | |
|
945 | 0 | if (*a < 0) *a = n + 1 + *a; |
946 | 0 | if (*b < 0) *b = n + 1 + *b; |
947 | |
|
948 | 0 | *a = fz_clampi(*a, 1, n); |
949 | 0 | *b = fz_clampi(*b, 1, n); |
950 | |
|
951 | 0 | if (s == orig) |
952 | 0 | { |
953 | 0 | fz_warn(ctx, "skipping invalid page range"); |
954 | 0 | return NULL; |
955 | 0 | } |
956 | | |
957 | 0 | return s; |
958 | 0 | } |
959 | | |
960 | | /* memmem from musl */ |
961 | | |
962 | 24.2k | #define MAX(a,b) ((a)>(b)?(a):(b)) |
963 | | |
964 | | #define BITOP(a,b,op) \ |
965 | 152k | ((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof *(a)))) |
966 | | |
967 | | static char *twobyte_memmem(const unsigned char *h, size_t k, const unsigned char *n) |
968 | 0 | { |
969 | 0 | uint16_t nw = n[0]<<8 | n[1], hw = h[0]<<8 | h[1]; |
970 | 0 | for (h++, k--; k; k--, hw = hw<<8 | *++h) |
971 | 0 | if (hw == nw) return (char *)h-1; |
972 | 0 | return 0; |
973 | 0 | } |
974 | | |
975 | | static char *threebyte_memmem(const unsigned char *h, size_t k, const unsigned char *n) |
976 | 0 | { |
977 | 0 | uint32_t nw = (uint32_t)n[0]<<24 | (uint32_t)n[1]<<16 | (uint32_t)n[2]<<8; |
978 | 0 | uint32_t hw = (uint32_t)h[0]<<24 | (uint32_t)h[1]<<16 | (uint32_t)h[2]<<8; |
979 | 0 | for (h+=2, k-=2; k; k--, hw = (hw|*++h)<<8) |
980 | 0 | if (hw == nw) return (char *)h-2; |
981 | 0 | return 0; |
982 | 0 | } |
983 | | |
984 | | static char *fourbyte_memmem(const unsigned char *h, size_t k, const unsigned char *n) |
985 | 0 | { |
986 | 0 | uint32_t nw = (uint32_t)n[0]<<24 | (uint32_t)n[1]<<16 | (uint32_t)n[2]<<8 | (uint32_t)n[3]; |
987 | 0 | uint32_t hw = (uint32_t)h[0]<<24 | (uint32_t)h[1]<<16 | (uint32_t)h[2]<<8 | (uint32_t)h[3]; |
988 | 0 | for (h+=3, k-=3; k; k--, hw = hw<<8 | *++h) |
989 | 0 | if (hw == nw) return (char *)h-3; |
990 | 0 | return 0; |
991 | 0 | } |
992 | | |
993 | | static char *twoway_memmem(const unsigned char *h, const unsigned char *z, const unsigned char *n, size_t l) |
994 | 12.1k | { |
995 | 12.1k | size_t i, ip, jp, k, p, ms, p0, mem, mem0; |
996 | 12.1k | size_t byteset[32 / sizeof(size_t)] = { 0 }; |
997 | 12.1k | size_t shift[256]; |
998 | | |
999 | | /* Computing length of needle and fill shift table */ |
1000 | 121k | for (i=0; i<l; i++) |
1001 | 109k | BITOP(byteset, n[i], |=), shift[n[i]] = i+1; |
1002 | | |
1003 | | /* Compute maximal suffix */ |
1004 | 12.1k | ip = (size_t)-1; jp = 0; k = p = 1; |
1005 | 109k | while (jp+k<l) { |
1006 | 97.5k | if (n[ip+k] == n[jp+k]) { |
1007 | 0 | if (k == p) { |
1008 | 0 | jp += p; |
1009 | 0 | k = 1; |
1010 | 0 | } else k++; |
1011 | 97.5k | } else if (n[ip+k] > n[jp+k]) { |
1012 | 60.9k | jp += k; |
1013 | 60.9k | k = 1; |
1014 | 60.9k | p = jp - ip; |
1015 | 60.9k | } else { |
1016 | 36.5k | ip = jp++; |
1017 | 36.5k | k = p = 1; |
1018 | 36.5k | } |
1019 | 97.5k | } |
1020 | 12.1k | ms = ip; |
1021 | 12.1k | p0 = p; |
1022 | | |
1023 | | /* And with the opposite comparison */ |
1024 | 12.1k | ip = (size_t)-1; jp = 0; k = p = 1; |
1025 | 109k | while (jp+k<l) { |
1026 | 97.5k | if (n[ip+k] == n[jp+k]) { |
1027 | 0 | if (k == p) { |
1028 | 0 | jp += p; |
1029 | 0 | k = 1; |
1030 | 0 | } else k++; |
1031 | 97.5k | } else if (n[ip+k] < n[jp+k]) { |
1032 | 73.1k | jp += k; |
1033 | 73.1k | k = 1; |
1034 | 73.1k | p = jp - ip; |
1035 | 73.1k | } else { |
1036 | 24.3k | ip = jp++; |
1037 | 24.3k | k = p = 1; |
1038 | 24.3k | } |
1039 | 97.5k | } |
1040 | 12.1k | if (ip+1 > ms+1) ms = ip; |
1041 | 0 | else p = p0; |
1042 | | |
1043 | | /* Periodic needle? */ |
1044 | 12.1k | if (memcmp(n, n+p, ms+1)) { |
1045 | 12.1k | mem0 = 0; |
1046 | 12.1k | p = MAX(ms, l-ms-1) + 1; |
1047 | 12.1k | } else mem0 = l-p; |
1048 | 12.1k | mem = 0; |
1049 | | |
1050 | | /* Search loop */ |
1051 | 42.4k | for (;;) { |
1052 | | /* If remainder of haystack is shorter than needle, done */ |
1053 | 42.4k | if ((size_t)(z-h) < l) return 0; |
1054 | | |
1055 | | /* Check last byte first; advance by shift on mismatch */ |
1056 | 42.2k | if (BITOP(byteset, h[l-1], &)) { |
1057 | 12.9k | k = l-shift[h[l-1]]; |
1058 | 12.9k | if (k) { |
1059 | 939 | if (mem0 && mem && k < p) k = l-p; |
1060 | 939 | h += k; |
1061 | 939 | mem = 0; |
1062 | 939 | continue; |
1063 | 939 | } |
1064 | 29.2k | } else { |
1065 | 29.2k | h += l; |
1066 | 29.2k | mem = 0; |
1067 | 29.2k | continue; |
1068 | 29.2k | } |
1069 | | |
1070 | | /* Compare right half */ |
1071 | 35.9k | for (k=MAX(ms+1,mem); k<l && n[k] == h[k]; k++); |
1072 | 12.0k | if (k < l) { |
1073 | 97 | h += k-ms; |
1074 | 97 | mem = 0; |
1075 | 97 | continue; |
1076 | 97 | } |
1077 | | /* Compare left half */ |
1078 | 95.6k | for (k=ms+1; k>mem && n[k-1] == h[k-1]; k--); |
1079 | 11.9k | if (k <= mem) return (char *)h; |
1080 | 0 | h += p; |
1081 | 0 | mem = mem0; |
1082 | 0 | } |
1083 | 12.1k | } |
1084 | | |
1085 | | void *fz_memmem(const void *h0, size_t k, const void *n0, size_t l) |
1086 | 12.7k | { |
1087 | 12.7k | const unsigned char *h = h0, *n = n0; |
1088 | | |
1089 | | /* Return immediately on empty needle */ |
1090 | 12.7k | if (!l) return (void *)h; |
1091 | | |
1092 | | /* Return immediately when needle is longer than haystack */ |
1093 | 12.7k | if (k<l) return 0; |
1094 | | |
1095 | | /* Use faster algorithms for short needles */ |
1096 | 12.7k | h = memchr(h0, *n, k); |
1097 | 12.7k | if (!h || l==1) return (void *)h; |
1098 | 12.2k | k -= h - (const unsigned char *)h0; |
1099 | 12.2k | if (k<l) return 0; |
1100 | 12.1k | if (l==2) return twobyte_memmem(h, k, n); |
1101 | 12.1k | if (l==3) return threebyte_memmem(h, k, n); |
1102 | 12.1k | if (l==4) return fourbyte_memmem(h, k, n); |
1103 | | |
1104 | 12.1k | return twoway_memmem(h, h+k, n, l); |
1105 | 12.1k | } |
1106 | | |
1107 | | char * |
1108 | | fz_utf8_from_wchar(fz_context *ctx, const wchar_t *s) |
1109 | 0 | { |
1110 | 0 | const wchar_t *src = s; |
1111 | 0 | char *d; |
1112 | 0 | char *dst; |
1113 | 0 | int len = 1; |
1114 | |
|
1115 | 0 | while (*src) |
1116 | 0 | { |
1117 | 0 | len += fz_runelen(*src++); |
1118 | 0 | } |
1119 | |
|
1120 | 0 | d = Memento_label(fz_malloc(ctx, len), "utf8_from_wchar"); |
1121 | 0 | dst = d; |
1122 | 0 | src = s; |
1123 | 0 | while (*src) |
1124 | 0 | { |
1125 | 0 | dst += fz_runetochar(dst, *src++); |
1126 | 0 | } |
1127 | 0 | *dst = 0; |
1128 | |
|
1129 | 0 | return d; |
1130 | 0 | } |
1131 | | |
1132 | | wchar_t * |
1133 | | fz_wchar_from_utf8(fz_context *ctx, const char *path) |
1134 | 0 | { |
1135 | 0 | size_t z = 0; |
1136 | 0 | const char *p = path; |
1137 | 0 | wchar_t *wpath, *w; |
1138 | |
|
1139 | 0 | if (!path) |
1140 | 0 | return NULL; |
1141 | | |
1142 | 0 | while (*p) |
1143 | 0 | { |
1144 | 0 | int c; |
1145 | 0 | p += fz_chartorune(&c, p); |
1146 | 0 | z++; |
1147 | 0 | if (c >= 0x10000) |
1148 | 0 | z++; |
1149 | 0 | } |
1150 | |
|
1151 | 0 | w = wpath = fz_malloc(ctx, 2*(z+1)); |
1152 | 0 | while (*path) |
1153 | 0 | { |
1154 | 0 | int c; |
1155 | 0 | path += fz_chartorune(&c, path); |
1156 | 0 | if (c >= 0x10000) |
1157 | 0 | { |
1158 | 0 | c -= 0x10000; |
1159 | 0 | *w++ = 0xd800 + (c>>10); |
1160 | 0 | *w++ = 0xdc00 + (c&1023); |
1161 | 0 | } |
1162 | 0 | else |
1163 | 0 | *w++ = c; |
1164 | 0 | } |
1165 | 0 | *w = 0; |
1166 | |
|
1167 | 0 | return wpath; |
1168 | 0 | } |
1169 | | |
1170 | | const char * |
1171 | | fz_strstr(const char *haystack, const char *needle) |
1172 | 0 | { |
1173 | 0 | size_t matchlen = 0; |
1174 | 0 | char d; |
1175 | |
|
1176 | 0 | if (haystack == NULL || needle == NULL) |
1177 | 0 | return NULL; |
1178 | | |
1179 | 0 | while ((d = needle[matchlen]) != 0) |
1180 | 0 | { |
1181 | 0 | char c = *haystack++; |
1182 | 0 | if (c == 0) |
1183 | 0 | return NULL; |
1184 | 0 | if (c == d) |
1185 | 0 | matchlen++; |
1186 | 0 | else |
1187 | 0 | { |
1188 | 0 | haystack -= matchlen; |
1189 | 0 | matchlen = 0; |
1190 | 0 | } |
1191 | 0 | } |
1192 | | |
1193 | 0 | return haystack - matchlen; |
1194 | 0 | } |
1195 | | |
1196 | | const char * |
1197 | | fz_strstrcase(const char *haystack, const char *needle) |
1198 | 0 | { |
1199 | 0 | size_t matchlen = 0; |
1200 | 0 | size_t firstlen; |
1201 | |
|
1202 | 0 | if (haystack == NULL || needle == NULL) |
1203 | 0 | return NULL; |
1204 | | |
1205 | 0 | while (1) |
1206 | 0 | { |
1207 | 0 | int c, d; |
1208 | 0 | int nc, nd; |
1209 | |
|
1210 | 0 | nd = fz_chartorune(&d, &needle[matchlen]); |
1211 | 0 | if (d == 0) |
1212 | 0 | break; |
1213 | 0 | nc = fz_chartorune(&c, haystack); |
1214 | 0 | if (matchlen == 0) |
1215 | 0 | firstlen = nc; |
1216 | 0 | haystack += nc; |
1217 | 0 | matchlen += nd; |
1218 | 0 | if (c == 0) |
1219 | 0 | return NULL; |
1220 | 0 | if (c != d) |
1221 | 0 | haystack -= matchlen - firstlen, matchlen = 0; |
1222 | 0 | } |
1223 | | |
1224 | 0 | return haystack - matchlen; |
1225 | 0 | } |
1226 | | |
1227 | 0 | static inline int my_isdigit(int c) { |
1228 | 0 | return c >= '0' && c <= '9'; |
1229 | 0 | } |
1230 | | |
1231 | | int |
1232 | | fz_strverscmp(const char *l0, const char *r0) |
1233 | 0 | { |
1234 | | // This strverscmp implementation is borrowed from musl. |
1235 | | // Copyright © 2005-2020 Rich Felker, et al. |
1236 | | // Standard MIT license. |
1237 | 0 | const unsigned char *l = (const void *)l0; |
1238 | 0 | const unsigned char *r = (const void *)r0; |
1239 | 0 | size_t i, dp, j; |
1240 | 0 | int z = 1; |
1241 | | |
1242 | | /* Find maximal matching prefix and track its maximal digit |
1243 | | * suffix and whether those digits are all zeros. */ |
1244 | 0 | for (dp=i=0; l[i]==r[i]; i++) { |
1245 | 0 | int c = l[i]; |
1246 | 0 | if (!c) return 0; |
1247 | 0 | if (!my_isdigit(c)) dp=i+1, z=1; |
1248 | 0 | else if (c!='0') z=0; |
1249 | 0 | } |
1250 | | |
1251 | 0 | if (l[dp]!='0' && r[dp]!='0') { |
1252 | | /* If we're not looking at a digit sequence that began |
1253 | | * with a zero, longest digit string is greater. */ |
1254 | 0 | for (j=i; my_isdigit(l[j]); j++) |
1255 | 0 | if (!my_isdigit(r[j])) return 1; |
1256 | 0 | if (my_isdigit(r[j])) return -1; |
1257 | 0 | } else if (z && dp<i && (my_isdigit(l[i]) || my_isdigit(r[i]))) { |
1258 | | /* Otherwise, if common prefix of digit sequence is |
1259 | | * all zeros, digits order less than non-digits. */ |
1260 | 0 | return (unsigned char)(l[i]-'0') - (unsigned char)(r[i]-'0'); |
1261 | 0 | } |
1262 | | |
1263 | 0 | return l[i] - r[i]; |
1264 | 0 | } |