Coverage Report

Created: 2026-06-30 07:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/mupdf/source/fitz/string.c
Line
Count
Source
1
// Copyright (C) 2004-2025 Artifex Software, Inc.
2
//
3
// This file is part of MuPDF.
4
//
5
// MuPDF is free software: you can redistribute it and/or modify it under the
6
// terms of the GNU Affero General Public License as published by the Free
7
// Software Foundation, either version 3 of the License, or (at your option)
8
// any later version.
9
//
10
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
11
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
13
// details.
14
//
15
// You should have received a copy of the GNU Affero General Public License
16
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
17
//
18
// Alternative licensing terms are available from the licensor.
19
// For commercial licensing, see <https://www.artifex.com/> or contact
20
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
21
// CA 94129, USA, for further information.
22
23
#include "mupdf/fitz.h"
24
25
#include <string.h>
26
#include <errno.h>
27
#include <math.h>
28
#include <float.h>
29
#include <stdlib.h>
30
31
#ifdef _WIN32
32
#include <windows.h> /* for MultiByteToWideChar etc. */
33
#endif
34
35
#include "utfdata.h"
36
37
static const int *
38
fz_ucd_bsearch(int c, const int *t, int n, int ne)
39
0
{
40
0
  const int *p;
41
0
  int m;
42
0
  while (n > 1)
43
0
  {
44
0
    m = n/2;
45
0
    p = t + m*ne;
46
0
    if (c >= p[0])
47
0
    {
48
0
      t = p;
49
0
      n = n - m;
50
0
    }
51
0
    else
52
0
    {
53
0
      n = m;
54
0
    }
55
0
  }
56
0
  if (n && c >= t[0])
57
0
    return t;
58
0
  return 0;
59
0
}
60
61
int
62
fz_tolower(int c)
63
18.8k
{
64
18.8k
  const int *p;
65
66
  /* Make ASCII fast. */
67
18.8k
  if (c < 128)
68
18.8k
  {
69
18.8k
    if (c >= 'A' && c <= 'Z')
70
1.56k
      c += 'a' - 'A';
71
18.8k
    return c;
72
18.8k
  }
73
74
0
  p = fz_ucd_bsearch(c, ucd_tolower2, nelem(ucd_tolower2) / 3, 3);
75
0
  if (p && c >= p[0] && c <= p[1])
76
0
    return c + p[2];
77
0
  p = fz_ucd_bsearch(c, ucd_tolower1, nelem(ucd_tolower1) / 2, 2);
78
0
  if (p && c == p[0])
79
0
    return c + p[1];
80
0
  return c;
81
0
}
82
83
int
84
fz_toupper(int c)
85
0
{
86
0
  const int *p;
87
0
  p = fz_ucd_bsearch(c, ucd_toupper2, nelem(ucd_toupper2) / 3, 3);
88
0
  if (p && c >= p[0] && c <= p[1])
89
0
    return c + p[2];
90
0
  p = fz_ucd_bsearch(c, ucd_toupper1, nelem(ucd_toupper1) / 2, 2);
91
0
  if (p && c == p[0])
92
0
    return c + p[1];
93
0
  return c;
94
0
}
95
96
size_t
97
fz_strnlen(const char *s, size_t n)
98
0
{
99
0
  const char *p = memchr(s, 0, n);
100
0
  return p ? (size_t) (p - s) : n;
101
0
}
102
103
int
104
fz_strncasecmp(const char *a, const char *b, size_t n)
105
0
{
106
0
  while (n > 0)
107
0
  {
108
0
    int ucs_a, ucs_b, n_a, n_b;
109
0
    n_a = fz_chartorunen(&ucs_a, a, n);
110
0
    n_b = fz_chartorunen(&ucs_b, b, n);
111
    /* We believe that for all unicode characters X and Y, s.t.
112
     * fz_tolower(X) == fz_tolower(Y), X and Y must utf8 encode to
113
     * the same number of bytes. */
114
0
    assert(n_a == n_b);
115
0
    assert((size_t)n_a <= n);
116
117
    // one or both of the strings are short
118
0
    if (ucs_a == 0 || ucs_b == 0)
119
0
      return ucs_a - ucs_b;
120
121
0
    if (ucs_a != ucs_b)
122
0
    {
123
0
      ucs_a = fz_tolower(ucs_a);
124
0
      ucs_b = fz_tolower(ucs_b);
125
0
    }
126
0
    if (ucs_a != ucs_b)
127
0
      return ucs_a - ucs_b;
128
129
0
    a += n_a;
130
0
    b += n_b;
131
0
    n -= n_a;
132
0
  }
133
0
  return 0;
134
0
}
135
136
int
137
fz_strcasecmp(const char *a, const char *b)
138
7.24k
{
139
9.42k
  while (1)
140
9.42k
  {
141
9.42k
    int ucs_a, ucs_b;
142
9.42k
    a += fz_chartorune(&ucs_a, a);
143
9.42k
    b += fz_chartorune(&ucs_b, b);
144
9.42k
    ucs_a = fz_tolower(ucs_a);
145
9.42k
    ucs_b = fz_tolower(ucs_b);
146
9.42k
    if (ucs_a == ucs_b)
147
2.40k
    {
148
2.40k
      if (ucs_a == 0)
149
225
        return 0;
150
2.40k
    }
151
7.01k
    else
152
7.01k
      return ucs_a - ucs_b;
153
9.42k
  }
154
7.24k
}
155
156
char *
157
fz_strsep(char **stringp, const char *delim)
158
1.91k
{
159
1.91k
  char *ret = *stringp;
160
1.91k
  if (!ret) return NULL;
161
1.60k
  if ((*stringp = strpbrk(*stringp, delim)) != NULL)
162
1.26k
    *((*stringp)++) = '\0';
163
1.60k
  return ret;
164
1.91k
}
165
166
size_t
167
fz_strlcpy(char *dst, const char *src, size_t siz)
168
370k
{
169
370k
  register char *d = dst;
170
370k
  register const char *s = src;
171
370k
  register size_t n = siz;
172
173
  /* Copy as many bytes as will fit */
174
370k
  if (n != 0 && --n != 0) {
175
5.73M
    do {
176
5.73M
      if ((*d++ = *s++) == 0)
177
370k
        break;
178
5.73M
    } while (--n != 0);
179
370k
  }
180
181
  /* Not enough room in dst, add NUL and traverse rest of src */
182
370k
  if (n == 0) {
183
64
    if (siz != 0)
184
64
      *d = '\0';   /* NUL-terminate dst */
185
200
    while (*s++)
186
136
      ;
187
64
  }
188
189
370k
  return(s - src - 1);  /* count does not include NUL */
190
370k
}
191
192
size_t
193
fz_strlcat(char *dst, const char *src, size_t siz)
194
64
{
195
64
  register char *d = dst;
196
64
  register const char *s = src;
197
64
  register size_t n = siz;
198
64
  size_t dlen;
199
200
  /* Find the end of dst and adjust bytes left but don't go past end */
201
1.91k
  while (*d != '\0' && n-- != 0)
202
1.85k
    d++;
203
64
  dlen = d - dst;
204
64
  n = siz - dlen;
205
206
64
  if (n == 0)
207
0
    return dlen + strlen(s);
208
263
  while (*s != '\0') {
209
199
    if (n != 1) {
210
199
      *d++ = *s;
211
199
      n--;
212
199
    }
213
199
    s++;
214
199
  }
215
64
  *d = '\0';
216
217
64
  return dlen + (s - src);  /* count does not include NUL */
218
64
}
219
220
void
221
fz_dirname(char *dir, const char *path, size_t n)
222
0
{
223
0
  size_t i;
224
225
0
  if (!path || !path[0])
226
0
  {
227
0
    fz_strlcpy(dir, ".", n);
228
0
    return;
229
0
  }
230
231
0
  fz_strlcpy(dir, path, n);
232
233
0
  i = strlen(dir);
234
0
  for(; dir[i] == '/'; --i) if (!i) { fz_strlcpy(dir, "/", n); return; }
235
0
  for(; dir[i] != '/'; --i) if (!i) { fz_strlcpy(dir, ".", n); return; }
236
0
  for(; dir[i] == '/'; --i) if (!i) { fz_strlcpy(dir, "/", n); return; }
237
0
  dir[i+1] = 0;
238
0
}
239
240
const char *
241
fz_basename(const char *path)
242
0
{
243
0
  const char *name = strrchr(path, '/');
244
0
  if (!name)
245
0
    name = strrchr(path, '\\');
246
0
  if (!name)
247
0
    return path;
248
0
  return name + 1;
249
0
}
250
251
#ifdef _WIN32
252
253
char *fz_realpath(const char *path, char *buf)
254
{
255
  wchar_t wpath[PATH_MAX];
256
  wchar_t wbuf[PATH_MAX];
257
  int i;
258
  if (!MultiByteToWideChar(CP_UTF8, 0, path, -1, wpath, PATH_MAX))
259
    return NULL;
260
  if (!GetFullPathNameW(wpath, PATH_MAX, wbuf, NULL))
261
    return NULL;
262
  if (!WideCharToMultiByte(CP_UTF8, 0, wbuf, -1, buf, PATH_MAX, NULL, NULL))
263
    return NULL;
264
  for (i=0; buf[i]; ++i)
265
    if (buf[i] == '\\')
266
      buf[i] = '/';
267
  return buf;
268
}
269
270
#else
271
272
char *fz_realpath(const char *path, char *buf)
273
0
{
274
0
  return realpath(path, buf);
275
0
}
276
277
#endif
278
279
static inline int ishex(int a)
280
0
{
281
0
  return (a >= 'A' && a <= 'F') ||
282
0
    (a >= 'a' && a <= 'f') ||
283
0
    (a >= '0' && a <= '9');
284
0
}
285
286
static inline int tohex(int c)
287
0
{
288
0
  if (c >= '0' && c <= '9') return c - '0';
289
0
  if (c >= 'a' && c <= 'f') return c - 'a' + 0xA;
290
0
  if (c >= 'A' && c <= 'F') return c - 'A' + 0xA;
291
0
  return 0;
292
0
}
293
294
0
#define URIRESERVED ";/?:@&=+$,"
295
0
#define URIALPHA "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
296
#define URIDIGIT "0123456789"
297
0
#define URIMARK "-_.!~*'()"
298
0
#define URIUNESCAPED URIALPHA URIDIGIT URIMARK
299
0
#define HEX "0123456789ABCDEF"
300
301
/* Same as fz_decode_uri_component but in-place */
302
char *
303
fz_urldecode(char *url)
304
0
{
305
0
  char *s = url;
306
0
  char *p = url;
307
0
  while (*s)
308
0
  {
309
0
    int c = (unsigned char) *s++;
310
0
    if (c == '%' && ishex(s[0]) && ishex(s[1]))
311
0
    {
312
0
      int a = tohex(*s++);
313
0
      int b = tohex(*s++);
314
0
      *p++ = a << 4 | b;
315
0
    }
316
0
    else
317
0
    {
318
0
      *p++ = c;
319
0
    }
320
0
  }
321
0
  *p = 0;
322
0
  return url;
323
0
}
324
325
char *
326
fz_decode_uri_component(fz_context *ctx, const char *s)
327
0
{
328
0
  char *uri = fz_malloc(ctx, strlen(s) + 1);
329
0
  char *p = uri;
330
0
  while (*s)
331
0
  {
332
0
    int c = (unsigned char) *s++;
333
0
    if (c == '%' && ishex(s[0]) && ishex(s[1]))
334
0
    {
335
0
      int a = tohex(*s++);
336
0
      int b = tohex(*s++);
337
0
      *p++ = a << 4 | b;
338
0
    }
339
0
    else
340
0
    {
341
0
      *p++ = c;
342
0
    }
343
0
  }
344
0
  *p = 0;
345
0
  return uri;
346
0
}
347
348
char *
349
fz_decode_uri(fz_context *ctx, const char *s)
350
0
{
351
0
  char *uri = fz_malloc(ctx, strlen(s) + 1);
352
0
  char *p = uri;
353
0
  while (*s)
354
0
  {
355
0
    int c = (unsigned char) *s++;
356
0
    if (c == '%' && ishex(s[0]) && ishex(s[1]))
357
0
    {
358
0
      int a = tohex(*s++);
359
0
      int b = tohex(*s++);
360
0
      c = a << 4 | b;
361
0
      if (strchr(URIRESERVED "#", c)) {
362
0
        *p++ = '%';
363
0
        *p++ = HEX[a];
364
0
        *p++ = HEX[b];
365
0
      } else {
366
0
        *p++ = c;
367
0
      }
368
0
    }
369
0
    else
370
0
    {
371
0
      *p++ = c;
372
0
    }
373
0
  }
374
0
  *p = 0;
375
0
  return uri;
376
0
}
377
378
static char *
379
fz_encode_uri_imp(fz_context *ctx, const char *s, const char *unescaped)
380
0
{
381
0
  char *uri = fz_malloc(ctx, strlen(s) * 3 + 1); /* allocate enough for worst case */
382
0
  char *p = uri;
383
0
  while (*s)
384
0
  {
385
0
    int c = (unsigned char) *s++;
386
0
    if (strchr(unescaped, c))
387
0
    {
388
0
      *p++ = c;
389
0
    }
390
0
    else
391
0
    {
392
0
      *p++ = '%';
393
0
      *p++ = HEX[(c >> 4) & 15];
394
0
      *p++ = HEX[(c) & 15];
395
0
    }
396
0
  }
397
0
  *p = 0;
398
0
  return uri;
399
0
}
400
401
char *
402
fz_encode_uri_component(fz_context *ctx, const char *s)
403
0
{
404
0
  return fz_encode_uri_imp(ctx, s, URIUNESCAPED);
405
0
}
406
407
char *
408
fz_encode_uri_pathname(fz_context *ctx, const char *s)
409
0
{
410
0
  return fz_encode_uri_imp(ctx, s, URIUNESCAPED "/");
411
0
}
412
413
char *
414
fz_encode_uri(fz_context *ctx, const char *s)
415
0
{
416
0
  return fz_encode_uri_imp(ctx, s, URIUNESCAPED URIRESERVED "#");
417
0
}
418
419
void
420
fz_format_output_path(fz_context *ctx, char *path, size_t size, const char *fmt, int page)
421
0
{
422
0
  const char *s, *p;
423
0
  char num[40];
424
0
  int i, n;
425
0
  int z = 0;
426
427
0
  for (i = 0; page; page /= 10)
428
0
    num[i++] = '0' + page % 10;
429
0
  num[i] = 0;
430
431
0
  s = p = strchr(fmt, '%');
432
0
  if (p)
433
0
  {
434
0
    ++p;
435
0
    while (*p >= '0' && *p <= '9')
436
0
      z = z * 10 + (*p++ - '0');
437
0
  }
438
0
  if (p && *p == 'd')
439
0
  {
440
0
    ++p;
441
0
  }
442
0
  else
443
0
  {
444
0
    const char *psep = strrchr(fmt, '/');
445
0
    s = p = strrchr(fmt, '.');
446
    /* Ensure we only match a . in the last path segment. */
447
0
    if (psep != NULL && p < psep)
448
0
      p = NULL;
449
0
    if (!p)
450
0
      s = p = fmt + strlen(fmt);
451
0
  }
452
453
0
  if (z < 1)
454
0
    z = 1;
455
0
  while (i < z && i < (int)sizeof num)
456
0
    num[i++] = '0';
457
0
  n = s - fmt;
458
0
  if (n + i + strlen(p) >= size)
459
0
    fz_throw(ctx, FZ_ERROR_ARGUMENT, "path name buffer overflow");
460
0
  memcpy(path, fmt, n);
461
0
  while (i > 0)
462
0
    path[n++] = num[--i];
463
0
  fz_strlcpy(path + n, p, size - n);
464
0
}
465
466
0
#define SEP(x) ((x)=='/' || (x) == 0)
467
468
char *
469
fz_cleanname(char *name)
470
0
{
471
0
  char *p, *q, *dotdot;
472
0
  int rooted;
473
474
0
  rooted = name[0] == '/';
475
476
  /*
477
   * invariants:
478
   *    p points at beginning of path element we're considering.
479
   *    q points just past the last path element we wrote (no slash).
480
   *    dotdot points just past the point where .. cannot backtrack
481
   *        any further (no slash).
482
   */
483
0
  p = q = dotdot = name + rooted;
484
0
  while (*p)
485
0
  {
486
0
    if(p[0] == '/') /* null element */
487
0
      p++;
488
0
    else if (p[0] == '.' && SEP(p[1]))
489
0
      p += 1; /* don't count the separator in case it is nul */
490
0
    else if (p[0] == '.' && p[1] == '.' && SEP(p[2]))
491
0
    {
492
0
      p += 2;
493
0
      if (q > dotdot) /* can backtrack */
494
0
      {
495
0
        while(--q > dotdot && *q != '/')
496
0
          ;
497
0
      }
498
0
      else if (!rooted) /* /.. is / but ./../ is .. */
499
0
      {
500
0
        if (q != name)
501
0
          *q++ = '/';
502
0
        *q++ = '.';
503
0
        *q++ = '.';
504
0
        dotdot = q;
505
0
      }
506
0
    }
507
0
    else /* real path element */
508
0
    {
509
0
      if (q != name+rooted)
510
0
        *q++ = '/';
511
0
      while ((*q = *p) != '/' && *q != 0)
512
0
        p++, q++;
513
0
    }
514
0
  }
515
516
0
  if (q == name) /* empty string is really "." */
517
0
    *q++ = '.';
518
0
  *q = '\0';
519
0
  return name;
520
0
}
521
522
char *
523
fz_cleanname_strdup(fz_context *ctx, const char *name)
524
0
{
525
0
  size_t len = strlen(name);
526
0
  char *newname = fz_malloc(ctx, fz_maxz(2, len + 1));
527
0
  memcpy(newname, name, len + 1);
528
0
  newname[len] = '\0';
529
0
  return fz_cleanname(newname);
530
0
}
531
532
enum
533
{
534
  UTFmax = 4, /* maximum bytes per rune */
535
  Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */
536
  Runeself = 0x80, /* rune and UTF sequences are the same (<) */
537
  Runeerror = 0xFFFD, /* decoding error in UTF */
538
  Runemax = 0x10FFFF, /* maximum rune value */
539
};
540
541
enum
542
{
543
  Bit1 = 7,
544
  Bitx = 6,
545
  Bit2 = 5,
546
  Bit3 = 4,
547
  Bit4 = 3,
548
  Bit5 = 2,
549
550
  T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
551
  Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
552
  T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
553
  T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
554
  T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
555
  T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
556
557
  Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */
558
  Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */
559
  Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */
560
  Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0001 1111 1111 1111 1111 1111 */
561
562
  Maskx = (1<<Bitx)-1,  /* 0011 1111 */
563
  Testx = Maskx ^ 0xFF, /* 1100 0000 */
564
565
  Bad = Runeerror,
566
};
567
568
int
569
fz_chartorune(int *rune, const char *str)
570
18.8k
{
571
18.8k
  int c, c1, c2, c3;
572
18.8k
  int l;
573
574
  /* overlong null character */
575
18.8k
  if((unsigned char)str[0] == 0xc0 && (unsigned char)str[1] == 0x80) {
576
0
    *rune = 0;
577
0
    return 2;
578
0
  }
579
580
  /*
581
   * one character sequence
582
   *  00000-0007F => T1
583
   */
584
18.8k
  c = *(const unsigned char*)str;
585
18.8k
  if(c < Tx) {
586
18.8k
    *rune = c;
587
18.8k
    return 1;
588
18.8k
  }
589
590
  /*
591
   * two character sequence
592
   *  0080-07FF => T2 Tx
593
   */
594
0
  c1 = *(const unsigned char*)(str+1) ^ Tx;
595
0
  if(c1 & Testx)
596
0
    goto bad;
597
0
  if(c < T3) {
598
0
    if(c < T2)
599
0
      goto bad;
600
0
    l = ((c << Bitx) | c1) & Rune2;
601
0
    if(l <= Rune1)
602
0
      goto bad;
603
0
    *rune = l;
604
0
    return 2;
605
0
  }
606
607
  /*
608
   * three character sequence
609
   *  0800-FFFF => T3 Tx Tx
610
   */
611
0
  c2 = *(const unsigned char*)(str+2) ^ Tx;
612
0
  if(c2 & Testx)
613
0
    goto bad;
614
0
  if(c < T4) {
615
0
    l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
616
0
    if(l <= Rune2)
617
0
      goto bad;
618
0
    *rune = l;
619
0
    return 3;
620
0
  }
621
622
  /*
623
   * four character sequence (21-bit value)
624
   *  10000-1FFFFF => T4 Tx Tx Tx
625
   */
626
0
  c3 = *(const unsigned char*)(str+3) ^ Tx;
627
0
  if (c3 & Testx)
628
0
    goto bad;
629
0
  if (c < T5) {
630
0
    l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
631
0
    if (l <= Rune3)
632
0
      goto bad;
633
0
    *rune = l;
634
0
    return 4;
635
0
  }
636
  /*
637
   * Support for 5-byte or longer UTF-8 would go here, but
638
   * since we don't have that, we'll just fall through to bad.
639
   */
640
641
  /*
642
   * bad decoding
643
   */
644
0
bad:
645
0
  *rune = Bad;
646
0
  return 1;
647
0
}
648
649
int
650
fz_chartorunen(int *rune, const char *str, size_t n)
651
0
{
652
0
  int c, c1, c2, c3;
653
0
  int l;
654
655
0
  if (n < 1)
656
0
    goto bad;
657
658
  /*
659
   * one character sequence
660
   *  00000-0007F => T1
661
   */
662
0
  c = *(const unsigned char*)str;
663
0
  if(c < Tx) {
664
0
    *rune = c;
665
0
    return 1;
666
0
  }
667
668
0
  if (n < 2)
669
0
    goto bad;
670
671
  /* overlong null character */
672
0
  if((unsigned char)str[0] == 0xc0 && (unsigned char)str[1] == 0x80) {
673
0
    *rune = 0;
674
0
    return 2;
675
0
  }
676
677
  /*
678
   * two character sequence
679
   *  0080-07FF => T2 Tx
680
   */
681
0
  c1 = *(const unsigned char*)(str+1) ^ Tx;
682
0
  if(c1 & Testx)
683
0
    goto bad;
684
0
  if(c < T3) {
685
0
    if(c < T2)
686
0
      goto bad;
687
0
    l = ((c << Bitx) | c1) & Rune2;
688
0
    if(l <= Rune1)
689
0
      goto bad;
690
0
    *rune = l;
691
0
    return 2;
692
0
  }
693
694
0
  if (n < 3)
695
0
    goto bad;
696
697
  /*
698
   * three character sequence
699
   *  0800-FFFF => T3 Tx Tx
700
   */
701
0
  c2 = *(const unsigned char*)(str+2) ^ Tx;
702
0
  if(c2 & Testx)
703
0
    goto bad;
704
0
  if(c < T4) {
705
0
    l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
706
0
    if(l <= Rune2)
707
0
      goto bad;
708
0
    *rune = l;
709
0
    return 3;
710
0
  }
711
712
0
  if (n < 4)
713
0
    goto bad;
714
715
  /*
716
   * four character sequence (21-bit value)
717
   *  10000-1FFFFF => T4 Tx Tx Tx
718
   */
719
0
  c3 = *(const unsigned char*)(str+3) ^ Tx;
720
0
  if (c3 & Testx)
721
0
    goto bad;
722
0
  if (c < T5) {
723
0
    l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
724
0
    if (l <= Rune3)
725
0
      goto bad;
726
0
    *rune = l;
727
0
    return 4;
728
0
  }
729
  /*
730
   * Support for 5-byte or longer UTF-8 would go here, but
731
   * since we don't have that, we'll just fall through to bad.
732
   */
733
734
  /*
735
   * bad decoding
736
   */
737
0
bad:
738
0
  *rune = Bad;
739
0
  return 1;
740
0
}
741
742
int
743
fz_runetochar(char *str, int rune)
744
0
{
745
  /* Runes are signed, so convert to unsigned for range check. */
746
0
  unsigned int c = (unsigned int)rune;
747
748
  /* overlong null character */
749
0
  if (c == 0) {
750
0
    ((unsigned char *)str)[0] = 0xc0;
751
0
    ((unsigned char *)str)[1] = 0x80;
752
0
    return 2;
753
0
  }
754
755
  /*
756
   * one character sequence
757
   *  00000-0007F => 00-7F
758
   */
759
0
  if(c <= Rune1) {
760
0
    str[0] = c;
761
0
    return 1;
762
0
  }
763
764
  /*
765
   * two character sequence
766
   *  0080-07FF => T2 Tx
767
   */
768
0
  if(c <= Rune2) {
769
0
    str[0] = T2 | (c >> 1*Bitx);
770
0
    str[1] = Tx | (c & Maskx);
771
0
    return 2;
772
0
  }
773
774
  /*
775
   * If the Rune is out of range, convert it to the error rune.
776
   * Do this test here because the error rune encodes to three bytes.
777
   * Doing it earlier would duplicate work, since an out of range
778
   * Rune wouldn't have fit in one or two bytes.
779
   */
780
0
  if (c > Runemax)
781
0
    c = Runeerror;
782
783
  /*
784
   * three character sequence
785
   *  0800-FFFF => T3 Tx Tx
786
   */
787
0
  if (c <= Rune3) {
788
0
    str[0] = T3 | (c >> 2*Bitx);
789
0
    str[1] = Tx | ((c >> 1*Bitx) & Maskx);
790
0
    str[2] = Tx | (c & Maskx);
791
0
    return 3;
792
0
  }
793
794
  /*
795
   * four character sequence (21-bit value)
796
   *  10000-1FFFFF => T4 Tx Tx Tx
797
   */
798
0
  str[0] = T4 | (c >> 3*Bitx);
799
0
  str[1] = Tx | ((c >> 2*Bitx) & Maskx);
800
0
  str[2] = Tx | ((c >> 1*Bitx) & Maskx);
801
0
  str[3] = Tx | (c & Maskx);
802
0
  return 4;
803
0
}
804
805
int
806
fz_runelen(int c)
807
0
{
808
0
  char str[10];
809
0
  return fz_runetochar(str, c);
810
0
}
811
812
int
813
fz_runeidx(const char *s, const char *p)
814
0
{
815
0
  int rune;
816
0
  int i = 0;
817
0
  while (s < p) {
818
0
    if (*(unsigned char *)s < Runeself)
819
0
      ++s;
820
0
    else
821
0
      s += fz_chartorune(&rune, s);
822
0
    ++i;
823
0
  }
824
0
  return i;
825
0
}
826
827
const char *
828
fz_runeptr(const char *s, int i)
829
0
{
830
0
  int rune;
831
0
  while (i-- > 0) {
832
0
    rune = *(unsigned char*)s;
833
0
    if (rune < Runeself) {
834
0
      if (rune == 0)
835
0
        return NULL;
836
0
      ++s;
837
0
    } else
838
0
      s += fz_chartorune(&rune, s);
839
0
  }
840
0
  return s;
841
0
}
842
843
int
844
fz_utflen(const char *s)
845
0
{
846
0
  int c, n, rune;
847
0
  n = 0;
848
0
  for(;;) {
849
0
    c = *(const unsigned char*)s;
850
0
    if(c < Runeself) {
851
0
      if(c == 0)
852
0
        return n;
853
0
      s++;
854
0
    } else
855
0
      s += fz_chartorune(&rune, s);
856
0
    n++;
857
0
  }
858
0
}
859
860
float fz_atof(const char *s)
861
4.61M
{
862
4.61M
  float result;
863
864
4.61M
  if (s == NULL)
865
0
    return 0;
866
867
4.61M
  errno = 0;
868
4.61M
  result = fz_strtof(s, NULL);
869
4.61M
  if ((errno == ERANGE && result == 0) || isnan(result))
870
    /* Return 1.0 on  underflow, as it's a small known value that won't cause a divide by 0.  */
871
6
    return 1;
872
4.61M
  result = fz_clamp(result, -FLT_MAX, FLT_MAX);
873
4.61M
  return result;
874
4.61M
}
875
876
int fz_atoi(const char *s)
877
65
{
878
65
  if (s == NULL)
879
0
    return 0;
880
65
  return atoi(s);
881
65
}
882
883
int64_t fz_atoi64(const char *s)
884
0
{
885
0
  if (s == NULL)
886
0
    return 0;
887
0
  return atoll(s);
888
0
}
889
890
size_t fz_atoz(const char *s)
891
0
{
892
0
  int64_t i;
893
894
0
  if (s == NULL)
895
0
    return 0;
896
0
  i = atoll(s);
897
0
  if (i < 0 || (int64_t)(size_t)i != i)
898
0
    return 0;
899
0
  return (size_t)i;
900
0
}
901
902
int fz_is_page_range(fz_context *ctx, const char *s)
903
0
{
904
  /* TODO: check the actual syntax... */
905
0
  while (*s)
906
0
  {
907
0
    if ((*s < '0' || *s > '9') && *s != 'N' && *s != '-' && *s != ',')
908
0
      return 0;
909
0
    s++;
910
0
  }
911
0
  return 1;
912
0
}
913
914
const char *fz_parse_page_range(fz_context *ctx, const char *s, int *a, int *b, int n)
915
0
{
916
0
  const char *orig = s;
917
918
0
  if (!s || !s[0])
919
0
    return NULL;
920
921
0
  if (s[0] == ',')
922
0
    s += 1;
923
924
0
  if (s[0] == 'N')
925
0
  {
926
0
    *a = n;
927
0
    s += 1;
928
0
  }
929
0
  else
930
0
    *a = strtol(s, (char**)&s, 10);
931
932
0
  if (s[0] == '-')
933
0
  {
934
0
    if (s[1] == 'N')
935
0
    {
936
0
      *b = n;
937
0
      s += 2;
938
0
    }
939
0
    else
940
0
      *b = strtol(s+1, (char**)&s, 10);
941
0
  }
942
0
  else
943
0
    *b = *a;
944
945
0
  if (*a < 0) *a = n + 1 + *a;
946
0
  if (*b < 0) *b = n + 1 + *b;
947
948
0
  *a = fz_clampi(*a, 1, n);
949
0
  *b = fz_clampi(*b, 1, n);
950
951
0
  if (s == orig)
952
0
  {
953
0
    fz_warn(ctx, "skipping invalid page range");
954
0
    return NULL;
955
0
  }
956
957
0
  return s;
958
0
}
959
960
/* memmem from musl */
961
962
24.2k
#define MAX(a,b) ((a)>(b)?(a):(b))
963
964
#define BITOP(a,b,op) \
965
152k
 ((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof *(a))))
966
967
static char *twobyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
968
0
{
969
0
  uint16_t nw = n[0]<<8 | n[1], hw = h[0]<<8 | h[1];
970
0
  for (h++, k--; k; k--, hw = hw<<8 | *++h)
971
0
    if (hw == nw) return (char *)h-1;
972
0
  return 0;
973
0
}
974
975
static char *threebyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
976
0
{
977
0
  uint32_t nw = (uint32_t)n[0]<<24 | (uint32_t)n[1]<<16 | (uint32_t)n[2]<<8;
978
0
  uint32_t hw = (uint32_t)h[0]<<24 | (uint32_t)h[1]<<16 | (uint32_t)h[2]<<8;
979
0
  for (h+=2, k-=2; k; k--, hw = (hw|*++h)<<8)
980
0
    if (hw == nw) return (char *)h-2;
981
0
  return 0;
982
0
}
983
984
static char *fourbyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
985
0
{
986
0
  uint32_t nw = (uint32_t)n[0]<<24 | (uint32_t)n[1]<<16 | (uint32_t)n[2]<<8 | (uint32_t)n[3];
987
0
  uint32_t hw = (uint32_t)h[0]<<24 | (uint32_t)h[1]<<16 | (uint32_t)h[2]<<8 | (uint32_t)h[3];
988
0
  for (h+=3, k-=3; k; k--, hw = hw<<8 | *++h)
989
0
    if (hw == nw) return (char *)h-3;
990
0
  return 0;
991
0
}
992
993
static char *twoway_memmem(const unsigned char *h, const unsigned char *z, const unsigned char *n, size_t l)
994
12.1k
{
995
12.1k
  size_t i, ip, jp, k, p, ms, p0, mem, mem0;
996
12.1k
  size_t byteset[32 / sizeof(size_t)] = { 0 };
997
12.1k
  size_t shift[256];
998
999
  /* Computing length of needle and fill shift table */
1000
121k
  for (i=0; i<l; i++)
1001
109k
    BITOP(byteset, n[i], |=), shift[n[i]] = i+1;
1002
1003
  /* Compute maximal suffix */
1004
12.1k
  ip = (size_t)-1; jp = 0; k = p = 1;
1005
109k
  while (jp+k<l) {
1006
97.5k
    if (n[ip+k] == n[jp+k]) {
1007
0
      if (k == p) {
1008
0
        jp += p;
1009
0
        k = 1;
1010
0
      } else k++;
1011
97.5k
    } else if (n[ip+k] > n[jp+k]) {
1012
60.9k
      jp += k;
1013
60.9k
      k = 1;
1014
60.9k
      p = jp - ip;
1015
60.9k
    } else {
1016
36.5k
      ip = jp++;
1017
36.5k
      k = p = 1;
1018
36.5k
    }
1019
97.5k
  }
1020
12.1k
  ms = ip;
1021
12.1k
  p0 = p;
1022
1023
  /* And with the opposite comparison */
1024
12.1k
  ip = (size_t)-1; jp = 0; k = p = 1;
1025
109k
  while (jp+k<l) {
1026
97.5k
    if (n[ip+k] == n[jp+k]) {
1027
0
      if (k == p) {
1028
0
        jp += p;
1029
0
        k = 1;
1030
0
      } else k++;
1031
97.5k
    } else if (n[ip+k] < n[jp+k]) {
1032
73.1k
      jp += k;
1033
73.1k
      k = 1;
1034
73.1k
      p = jp - ip;
1035
73.1k
    } else {
1036
24.3k
      ip = jp++;
1037
24.3k
      k = p = 1;
1038
24.3k
    }
1039
97.5k
  }
1040
12.1k
  if (ip+1 > ms+1) ms = ip;
1041
0
  else p = p0;
1042
1043
  /* Periodic needle? */
1044
12.1k
  if (memcmp(n, n+p, ms+1)) {
1045
12.1k
    mem0 = 0;
1046
12.1k
    p = MAX(ms, l-ms-1) + 1;
1047
12.1k
  } else mem0 = l-p;
1048
12.1k
  mem = 0;
1049
1050
  /* Search loop */
1051
42.4k
  for (;;) {
1052
    /* If remainder of haystack is shorter than needle, done */
1053
42.4k
    if ((size_t)(z-h) < l) return 0;
1054
1055
    /* Check last byte first; advance by shift on mismatch */
1056
42.2k
    if (BITOP(byteset, h[l-1], &)) {
1057
12.9k
      k = l-shift[h[l-1]];
1058
12.9k
      if (k) {
1059
939
        if (mem0 && mem && k < p) k = l-p;
1060
939
        h += k;
1061
939
        mem = 0;
1062
939
        continue;
1063
939
      }
1064
29.2k
    } else {
1065
29.2k
      h += l;
1066
29.2k
      mem = 0;
1067
29.2k
      continue;
1068
29.2k
    }
1069
1070
    /* Compare right half */
1071
35.9k
    for (k=MAX(ms+1,mem); k<l && n[k] == h[k]; k++);
1072
12.0k
    if (k < l) {
1073
97
      h += k-ms;
1074
97
      mem = 0;
1075
97
      continue;
1076
97
    }
1077
    /* Compare left half */
1078
95.6k
    for (k=ms+1; k>mem && n[k-1] == h[k-1]; k--);
1079
11.9k
    if (k <= mem) return (char *)h;
1080
0
    h += p;
1081
0
    mem = mem0;
1082
0
  }
1083
12.1k
}
1084
1085
void *fz_memmem(const void *h0, size_t k, const void *n0, size_t l)
1086
12.7k
{
1087
12.7k
  const unsigned char *h = h0, *n = n0;
1088
1089
  /* Return immediately on empty needle */
1090
12.7k
  if (!l) return (void *)h;
1091
1092
  /* Return immediately when needle is longer than haystack */
1093
12.7k
  if (k<l) return 0;
1094
1095
  /* Use faster algorithms for short needles */
1096
12.7k
  h = memchr(h0, *n, k);
1097
12.7k
  if (!h || l==1) return (void *)h;
1098
12.2k
  k -= h - (const unsigned char *)h0;
1099
12.2k
  if (k<l) return 0;
1100
12.1k
  if (l==2) return twobyte_memmem(h, k, n);
1101
12.1k
  if (l==3) return threebyte_memmem(h, k, n);
1102
12.1k
  if (l==4) return fourbyte_memmem(h, k, n);
1103
1104
12.1k
  return twoway_memmem(h, h+k, n, l);
1105
12.1k
}
1106
1107
char *
1108
fz_utf8_from_wchar(fz_context *ctx, const wchar_t *s)
1109
0
{
1110
0
  const wchar_t *src = s;
1111
0
  char *d;
1112
0
  char *dst;
1113
0
  int len = 1;
1114
1115
0
  while (*src)
1116
0
  {
1117
0
    len += fz_runelen(*src++);
1118
0
  }
1119
1120
0
  d = Memento_label(fz_malloc(ctx, len), "utf8_from_wchar");
1121
0
  dst = d;
1122
0
  src = s;
1123
0
  while (*src)
1124
0
  {
1125
0
    dst += fz_runetochar(dst, *src++);
1126
0
  }
1127
0
  *dst = 0;
1128
1129
0
  return d;
1130
0
}
1131
1132
wchar_t *
1133
fz_wchar_from_utf8(fz_context *ctx, const char *path)
1134
0
{
1135
0
  size_t z = 0;
1136
0
  const char *p = path;
1137
0
  wchar_t *wpath, *w;
1138
1139
0
  if (!path)
1140
0
    return NULL;
1141
1142
0
  while (*p)
1143
0
  {
1144
0
    int c;
1145
0
    p += fz_chartorune(&c, p);
1146
0
    z++;
1147
0
    if (c >= 0x10000)
1148
0
      z++;
1149
0
  }
1150
1151
0
  w = wpath = fz_malloc(ctx, 2*(z+1));
1152
0
  while (*path)
1153
0
  {
1154
0
    int c;
1155
0
    path += fz_chartorune(&c, path);
1156
0
    if (c >= 0x10000)
1157
0
    {
1158
0
      c -= 0x10000;
1159
0
      *w++ = 0xd800 + (c>>10);
1160
0
      *w++ = 0xdc00 + (c&1023);
1161
0
    }
1162
0
    else
1163
0
      *w++ = c;
1164
0
  }
1165
0
  *w = 0;
1166
1167
0
  return wpath;
1168
0
}
1169
1170
const char *
1171
fz_strstr(const char *haystack, const char *needle)
1172
0
{
1173
0
  size_t matchlen = 0;
1174
0
  char d;
1175
1176
0
  if (haystack == NULL || needle == NULL)
1177
0
    return NULL;
1178
1179
0
  while ((d = needle[matchlen]) != 0)
1180
0
  {
1181
0
    char c = *haystack++;
1182
0
    if (c == 0)
1183
0
      return NULL;
1184
0
    if (c == d)
1185
0
      matchlen++;
1186
0
    else
1187
0
    {
1188
0
      haystack -= matchlen;
1189
0
      matchlen = 0;
1190
0
    }
1191
0
  }
1192
1193
0
  return haystack - matchlen;
1194
0
}
1195
1196
const char *
1197
fz_strstrcase(const char *haystack, const char *needle)
1198
0
{
1199
0
  size_t matchlen = 0;
1200
0
  size_t firstlen;
1201
1202
0
  if (haystack == NULL || needle == NULL)
1203
0
    return NULL;
1204
1205
0
  while (1)
1206
0
  {
1207
0
    int c, d;
1208
0
    int nc, nd;
1209
1210
0
    nd = fz_chartorune(&d, &needle[matchlen]);
1211
0
    if (d == 0)
1212
0
      break;
1213
0
    nc = fz_chartorune(&c, haystack);
1214
0
    if (matchlen == 0)
1215
0
      firstlen = nc;
1216
0
    haystack += nc;
1217
0
    matchlen += nd;
1218
0
    if (c == 0)
1219
0
      return NULL;
1220
0
    if (c != d)
1221
0
      haystack -= matchlen - firstlen, matchlen = 0;
1222
0
  }
1223
1224
0
  return haystack - matchlen;
1225
0
}
1226
1227
0
static inline int my_isdigit(int c) {
1228
0
  return c >= '0' && c <= '9';
1229
0
}
1230
1231
int
1232
fz_strverscmp(const char *l0, const char *r0)
1233
0
{
1234
  // This strverscmp implementation is borrowed from musl.
1235
  // Copyright © 2005-2020 Rich Felker, et al.
1236
  // Standard MIT license.
1237
0
  const unsigned char *l = (const void *)l0;
1238
0
  const unsigned char *r = (const void *)r0;
1239
0
  size_t i, dp, j;
1240
0
  int z = 1;
1241
1242
  /* Find maximal matching prefix and track its maximal digit
1243
   * suffix and whether those digits are all zeros. */
1244
0
  for (dp=i=0; l[i]==r[i]; i++) {
1245
0
    int c = l[i];
1246
0
    if (!c) return 0;
1247
0
    if (!my_isdigit(c)) dp=i+1, z=1;
1248
0
    else if (c!='0') z=0;
1249
0
  }
1250
1251
0
  if (l[dp]!='0' && r[dp]!='0') {
1252
    /* If we're not looking at a digit sequence that began
1253
     * with a zero, longest digit string is greater. */
1254
0
    for (j=i; my_isdigit(l[j]); j++)
1255
0
      if (!my_isdigit(r[j])) return 1;
1256
0
    if (my_isdigit(r[j])) return -1;
1257
0
  } else if (z && dp<i && (my_isdigit(l[i]) || my_isdigit(r[i]))) {
1258
    /* Otherwise, if common prefix of digit sequence is
1259
     * all zeros, digits order less than non-digits. */
1260
0
    return (unsigned char)(l[i]-'0') - (unsigned char)(r[i]-'0');
1261
0
  }
1262
1263
0
  return l[i] - r[i];
1264
0
}