Coverage Report

Created: 2026-05-16 06:55

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libpsl/src/psl.c
Line
Count
Source
1
/*
2
 * Copyright(c) 2014-2024 Tim Ruehsen
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice shall be included in
12
 * all copies or substantial portions of the Software.
13
 *
14
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20
 * DEALINGS IN THE SOFTWARE.
21
 *
22
 * This file is part of libpsl.
23
 *
24
 * Public Suffix List routines
25
 *
26
 * Changelog
27
 * 19.03.2014  Tim Ruehsen  created from libmget/cookie.c
28
 *
29
 */
30
31
#if HAVE_CONFIG_H
32
# include <config.h>
33
#endif
34
35
#if defined(__GNUC__) && defined(__GNUC_MINOR__)
36
#       define GCC_VERSION_AT_LEAST(major, minor) ((__GNUC__ > (major)) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
37
#else
38
#       define GCC_VERSION_AT_LEAST(major, minor) 0
39
#endif
40
41
/* Must be defined before <sys/stat.h> */
42
#if defined(_MSC_VER) || defined(__MINGW32__)
43
# define USE_WIN32_LARGE_FILES
44
# ifdef __MINGW32__
45
#   ifndef _FILE_OFFSET_BITS
46
#    define _FILE_OFFSET_BITS 64
47
#   endif
48
# endif
49
#endif
50
51
#include <sys/types.h>
52
#include <sys/stat.h>
53
54
#if defined(_WIN32) && (defined(WITH_LIBIDN2) || defined(WITH_LIBIDN))
55
# ifndef WIN32_LEAN_AND_MEAN
56
# define WIN32_LEAN_AND_MEAN
57
# endif
58
# include <windows.h> /* for GetACP() */
59
#endif
60
61
#if defined(_WIN32)
62
# ifdef USE_WIN32_LARGE_FILES
63
#  define struct_stat  struct _stati64
64
#  define func_sys_stat _stati64
65
# else
66
#  define struct_stat  struct _stat
67
#  define func_sys_stat _stat
68
# endif
69
#endif
70
71
#ifndef struct_stat
72
9.50k
# define struct_stat   struct stat
73
3.16k
# define func_sys_stat stat
74
#endif
75
76
#if defined(_MSC_VER) && ! defined(ssize_t)
77
# include <basetsd.h>
78
typedef SSIZE_T ssize_t;
79
#endif
80
81
#include <stdio.h>
82
#include <stdlib.h>
83
#include <string.h>
84
#include <ctype.h>
85
#include <time.h>
86
#include <errno.h>
87
#include <limits.h> /* for UINT_MAX */
88
89
#ifdef HAVE_NL_LANGINFO
90
# include <langinfo.h>
91
#endif
92
93
#ifdef _WIN32
94
# include <malloc.h>
95
#endif
96
97
#ifdef WITH_LIBICU
98
# include <unicode/uversion.h>
99
# include <unicode/ustring.h>
100
# include <unicode/uidna.h>
101
# include <unicode/ucnv.h>
102
#elif defined(WITH_LIBIDN2)
103
# include <iconv.h>
104
# include <idn2.h>
105
# include <unicase.h>
106
# include <unistr.h>
107
#elif defined(WITH_LIBIDN)
108
# include <iconv.h>
109
# include <stringprep.h>
110
# include <idna.h>
111
# include <unicase.h>
112
# include <unistr.h>
113
#endif
114
115
#ifdef WINICONV_CONST
116
#  define ICONV_CONST WINICONV_CONST
117
#endif
118
#ifndef ICONV_CONST
119
#  define ICONV_CONST
120
#endif
121
122
123
#include <libpsl.h>
124
125
/**
126
 * SECTION:libpsl
127
 * @short_description: Public Suffix List library functions
128
 * @title: libpsl
129
 * @stability: Stable
130
 * @include: libpsl.h
131
 *
132
 * [Public Suffix List](https://publicsuffix.org/) library functions.
133
 *
134
 */
135
136
#define countof(a) (sizeof(a)/sizeof(*(a)))
137
138
2
#define PRIV_PSL_FLAG_EXCEPTION (1<<0)
139
93
#define PRIV_PSL_FLAG_WILDCARD  (1<<1)
140
0
#define PRIV_PSL_FLAG_ICANN     (1<<2) /* entry of ICANN section */
141
0
#define PRIV_PSL_FLAG_PRIVATE   (1<<3) /* entry of PRIVATE section */
142
0
#define PRIV_PSL_FLAG_PLAIN     (1<<4) /* just used for PSL syntax checking */
143
144
typedef struct {
145
  char
146
    label_buf[128];
147
  const char *
148
    label;
149
  unsigned short
150
    length;
151
  unsigned char
152
    nlabels, /* number of labels */
153
    flags;
154
} psl_entry_t;
155
156
/* stripped down version libmget vector routines */
157
typedef struct {
158
  int
159
    (*cmp)(const psl_entry_t **, const psl_entry_t **); /* comparison function */
160
  psl_entry_t
161
    **entry; /* pointer to array of pointers to elements */
162
  int
163
    max,     /* allocated elements */
164
    cur;     /* number of elements in use */
165
} psl_vector_t;
166
167
struct psl_ctx_st {
168
  psl_vector_t
169
    *suffixes;
170
  unsigned char
171
    *dafsa;
172
  size_t
173
    dafsa_size;
174
  int
175
    nsuffixes,
176
    nexceptions,
177
    nwildcards;
178
  unsigned
179
    utf8 : 1; /* 1: data contains UTF-8 + punycode encoded rules */
180
};
181
182
/* include the PSL data generated by psl-make-dafsa */
183
#ifdef ENABLE_BUILTIN
184
#include "suffixes_dafsa.h"
185
#else
186
static const unsigned char kDafsa[] = "";
187
static time_t _psl_file_time = 0;
188
static int _psl_nsuffixes = 0;
189
static int _psl_nexceptions = 0;
190
static int _psl_nwildcards = 0;
191
static const char _psl_sha1_checksum[] = "";
192
static const char _psl_filename[] = "";
193
#endif
194
195
/* references to these PSLs will result in lookups to built-in data */
196
static const psl_ctx_t
197
  builtin_psl;
198
199
#ifdef PSL_DISTFILE
200
static const char _psl_dist_filename[] = PSL_DISTFILE;
201
#else
202
static const char _psl_dist_filename[] = "";
203
#endif
204
205
static psl_vector_t *vector_alloc(int max, int (*cmp)(const psl_entry_t **, const psl_entry_t **))
206
0
{
207
0
  psl_vector_t *v;
208
209
0
  if (!(v = calloc(1, sizeof(psl_vector_t))))
210
0
    return NULL;
211
212
0
  if (!(v->entry = malloc(max * sizeof(psl_entry_t *)))) {
213
0
    free(v);
214
0
    return NULL;
215
0
  }
216
217
0
  v->max = max;
218
0
  v->cmp = cmp;
219
0
  return v;
220
0
}
221
222
static void vector_free(psl_vector_t **v)
223
1.61k
{
224
1.61k
  if (v && *v) {
225
0
    if ((*v)->entry) {
226
0
      int it;
227
228
0
      for (it = 0; it < (*v)->cur; it++)
229
0
        free((*v)->entry[it]);
230
231
0
      free((*v)->entry);
232
0
    }
233
0
    free(*v);
234
0
  }
235
1.61k
}
236
237
static psl_entry_t *vector_get(const psl_vector_t *v, int pos)
238
0
{
239
0
  if (pos < 0 || !v || pos >= v->cur) return NULL;
240
241
0
  return v->entry[pos];
242
0
}
243
244
/* the entries must be sorted by */
245
static int vector_find(const psl_vector_t *v, const psl_entry_t *elem)
246
0
{
247
0
  if (v) {
248
0
    int l, r, m;
249
0
    int res;
250
251
    /* binary search for element (exact match) */
252
0
    for (l = 0, r = v->cur - 1; l <= r;) {
253
0
      m = (l + r) / 2;
254
0
      if ((res = v->cmp(&elem, (const psl_entry_t **)&(v->entry[m]))) > 0) l = m + 1;
255
0
      else if (res < 0) r = m - 1;
256
0
      else return m;
257
0
    }
258
0
  }
259
260
0
  return -1; /* not found */
261
0
}
262
263
static int vector_add(psl_vector_t *v, const psl_entry_t *elem)
264
0
{
265
0
  if (v) {
266
0
    void *elemp;
267
268
0
    if (!(elemp = malloc(sizeof(psl_entry_t))))
269
0
      return -1;
270
271
0
    memcpy(elemp, elem, sizeof(psl_entry_t));
272
273
0
    if (v->max == v->cur) {
274
0
      void *m = realloc(v->entry, (v->max *= 2) * sizeof(psl_entry_t *));
275
276
0
      if (m)
277
0
        v->entry = m;
278
0
      else {
279
0
        free(elemp);
280
0
        return -1;
281
0
      }
282
0
    }
283
284
0
    v->entry[v->cur++] = elemp;
285
0
    return v->cur - 1;
286
0
  }
287
288
0
  return -1;
289
0
}
290
291
static void vector_sort(psl_vector_t *v)
292
0
{
293
0
  if (v && v->cmp)
294
0
    qsort(v->entry, v->cur, sizeof(psl_vector_t **), (int(*)(const void *, const void *))v->cmp);
295
0
}
296
297
/* by this kind of sorting, we can easily see if a domain matches or not */
298
static int suffix_compare(const psl_entry_t *s1, const psl_entry_t *s2)
299
0
{
300
0
  int n;
301
302
0
  if ((n = s2->nlabels - s1->nlabels))
303
0
    return n; /* most labels first */
304
305
0
  if ((n = s1->length - s2->length))
306
0
    return n;  /* shorter rules first */
307
308
0
  return strcmp(s1->label ? s1->label : s1->label_buf, s2->label ? s2->label : s2->label_buf);
309
0
}
310
311
/* needed to sort array of pointers, given to qsort() */
312
static int suffix_compare_array(const psl_entry_t **s1, const psl_entry_t **s2)
313
0
{
314
0
  return suffix_compare(*s1, *s2);
315
0
}
316
317
static int suffix_init(psl_entry_t *suffix, const char *rule, size_t length)
318
0
{
319
0
  const char *src;
320
0
  char *dst;
321
322
0
  suffix->label = suffix->label_buf;
323
324
0
  if (length >= sizeof(suffix->label_buf) - 1) {
325
0
    suffix->nlabels = 0;
326
    /* fprintf(stderr, "Suffix rule too long (%zd, ignored): %s\n", length, rule); */
327
0
    return -1;
328
0
  }
329
330
0
  suffix->length = (unsigned char)length;
331
332
0
  suffix->nlabels = 1;
333
334
0
  for (dst = suffix->label_buf, src = rule; *src;) {
335
0
    if (*src == '.')
336
0
      suffix->nlabels++;
337
0
    *dst++ = *src++;
338
0
  }
339
0
  *dst = 0;
340
341
0
  return 0;
342
0
}
343
344
static char *psl_strdup(const char *s)
345
0
{
346
0
  char *p = malloc(strlen(s) + 1);
347
0
  if (!p)
348
0
    return NULL;
349
0
  return strcpy(p, s);
350
0
}
351
352
#if !defined(WITH_LIBIDN) && !defined(WITH_LIBIDN2) && !defined(WITH_LIBICU)
353
/*
354
 * When configured without runtime IDNA support (./configure --disable-runtime), we need a pure ASCII
355
 * representation of non-ASCII characters in labels as found in UTF-8 domain names.
356
 * This is because the current DAFSA format used may only hold character values [21..127].
357
 *
358
  Code copied from http://www.nicemice.net/idn/punycode-spec.gz on
359
  2011-01-04 with SHA-1 a966a8017f6be579d74a50a226accc7607c40133
360
  labeled punycode-spec 1.0.3 (2006-Mar-24-Thu).  It is modified for
361
  libpsl by Tim Rühsen.  License on the original code:
362
363
  punycode-spec 1.0.3 (2006-Mar-23-Thu)
364
  http://www.nicemice.net/idn/
365
  Adam M. Costello
366
  http://www.nicemice.net/amc/
367
368
  B. Disclaimer and license
369
370
    Regarding this entire document or any portion of it (including
371
    the pseudocode and C code), the author makes no guarantees and
372
    is not responsible for any damage resulting from its use.  The
373
    author grants irrevocable permission to anyone to use, modify,
374
    and distribute it in any way that does not diminish the rights
375
    of anyone else to use, modify, and distribute it, provided that
376
    redistributed derivative works do not contain misleading author or
377
    version information.  Derivative works need not be licensed under
378
    similar terms.
379
380
  C. Punycode sample implementation
381
382
  punycode-sample.c 2.0.0 (2004-Mar-21-Sun)
383
  http://www.nicemice.net/idn/
384
  Adam M. Costello
385
  http://www.nicemice.net/amc/
386
387
  This is ANSI C code (C89) implementing Punycode 1.0.x.
388
 */
389
enum punycode_status {
390
  punycode_success = 0,
391
  punycode_bad_input = 1, /* Input is invalid.                       */
392
  punycode_big_output = 2, /* Output would exceed the space provided. */
393
  punycode_overflow = 3 /* Wider integers needed to process input. */
394
};
395
396
#ifdef PUNYCODE_UINT
397
  typedef PUNYCODE_UINT punycode_uint;
398
#elif UINT_MAX >= (1 << 26) - 1
399
  typedef unsigned int punycode_uint;
400
#else
401
  typedef unsigned long punycode_uint;
402
#endif
403
404
/*** Bootstring parameters for Punycode ***/
405
enum {
406
  base = 36, tmin = 1, tmax = 26, skew = 38, damp = 700,
407
  initial_bias = 72, initial_n = 0x80, delimiter = 0x2D
408
};
409
410
static char encode_digit(punycode_uint d)
411
{
412
  return d + 22 + 75 * (d < 26);
413
  /*  0..25 map to ASCII a..z or A..Z */
414
  /* 26..35 map to ASCII 0..9         */
415
}
416
#define flagged(bcp) ((punycode_uint)(bcp) - 65 < 26)
417
static const punycode_uint maxint = -1;
418
419
static punycode_uint adapt(punycode_uint delta, punycode_uint numpoints, int firsttime)
420
{
421
  punycode_uint k;
422
423
  delta = firsttime ? delta / damp : delta >> 1;
424
  /* delta >> 1 is a faster way of doing delta / 2 */
425
  delta += delta / numpoints;
426
427
  for (k = 0; delta > ((base - tmin) * tmax) / 2; k += base) {
428
    delta /= base - tmin;
429
  }
430
431
  return k + (base - tmin + 1) * delta / (delta + skew);
432
}
433
434
static enum punycode_status punycode_encode(
435
  size_t input_length_orig,
436
  const punycode_uint input[],
437
  size_t *output_length,
438
  char output[])
439
{
440
  punycode_uint input_length, n, delta, h, b, bias, j, m, q, k, t;
441
  size_t out, max_out;
442
443
  /* The Punycode spec assumes that the input length is the same type */
444
  /* of integer as a code point, so we need to convert the size_t to  */
445
  /* a punycode_uint, which could overflow.                           */
446
447
  if (input_length_orig > maxint)
448
    return punycode_overflow;
449
450
  input_length = (punycode_uint) input_length_orig;
451
452
  /* Initialize the state: */
453
454
  n = initial_n;
455
  delta = 0;
456
  out = 0;
457
  max_out = *output_length;
458
  bias = initial_bias;
459
460
  /* Handle the basic code points: */
461
  for (j = 0; j < input_length; ++j) {
462
    if (input[j] < 0x80) {
463
      if (max_out - out < 2)
464
        return punycode_big_output;
465
      output[out++] = (char) input[j];
466
    }
467
    /* else if (input[j] < n) return punycode_bad_input; */
468
    /* (not needed for Punycode with unsigned code points) */
469
  }
470
471
  h = b = (punycode_uint) out;
472
  /* cannot overflow because out <= input_length <= maxint */
473
474
  /* h is the number of code points that have been handled, b is the  */
475
  /* number of basic code points, and out is the number of ASCII code */
476
  /* points that have been output.                                    */
477
478
  if (b > 0)
479
    output[out++] = delimiter;
480
481
  /* Main encoding loop: */
482
483
  while (h < input_length) {
484
    /* All non-basic code points < n have been     */
485
    /* handled already.  Find the next larger one: */
486
487
    for (m = maxint, j = 0; j < input_length; ++j) {
488
      /* if (basic(input[j])) continue; */
489
      /* (not needed for Punycode) */
490
      if (input[j] >= n && input[j] < m)
491
        m = input[j];
492
    }
493
494
    /* Increase delta enough to advance the decoder's    */
495
    /* <n,i> state to <m,0>, but guard against overflow: */
496
497
    if (m - n > (maxint - delta) / (h + 1))
498
      return punycode_overflow;
499
    delta += (m - n) * (h + 1);
500
    n = m;
501
502
    for (j = 0; j < input_length; ++j) {
503
      /* Punycode does not need to check whether input[j] is basic: */
504
      if (input[j] < n /* || basic(input[j]) */) {
505
        if (++delta == 0)
506
          return punycode_overflow;
507
      }
508
509
      if (input[j] == n) {
510
        /* Represent delta as a generalized variable-length integer: */
511
512
        for (q = delta, k = base;; k += base) {
513
          if (out >= max_out)
514
            return punycode_big_output;
515
          t = k <= bias /* + tmin */ ? tmin : /* +tmin not needed */
516
            k >= bias + tmax ? tmax : k - bias;
517
          if (q < t)
518
            break;
519
          output[out++] = encode_digit(t + (q - t) % (base - t));
520
          q = (q - t) / (base - t);
521
        }
522
523
        output[out++] = encode_digit(q);
524
        bias = adapt(delta, h + 1, h == b);
525
        delta = 0;
526
        ++h;
527
      }
528
    }
529
530
    ++delta, ++n;
531
  }
532
533
  *output_length = out;
534
  return punycode_success;
535
}
536
537
static ssize_t utf8_to_utf32(const char *in, size_t inlen, punycode_uint *out, size_t outlen)
538
{
539
  size_t n = 0;
540
  const unsigned char *s = (void *)in;
541
  const unsigned char *e = (void *)(in + inlen);
542
543
  if (!outlen)
544
    return -1;
545
546
  outlen--;
547
548
  while (n < outlen) {
549
    size_t inleft = e - s;
550
551
    if (inleft >= 1 && (*s & 0x80) == 0) { /* 0xxxxxxx ASCII char */
552
      out[n++] = *s;
553
      s++;
554
    } else if (inleft >= 2 && (*s & 0xE0) == 0xC0) /* 110xxxxx 10xxxxxx */ {
555
      if ((s[1] & 0xC0) != 0x80)
556
        return -1;
557
      out[n++] = ((*s & 0x1F) << 6) | (s[1] & 0x3F);
558
      s += 2;
559
    } else if (inleft >= 3 && (*s & 0xF0) == 0xE0) /* 1110xxxx 10xxxxxx 10xxxxxx */ {
560
      if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80)
561
        return -1;
562
      out[n++] = ((*s & 0x0F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
563
      s += 3;
564
    } else if (inleft >= 4 && (*s & 0xF8) == 0xF0) /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ {
565
      if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] & 0xC0) != 0x80)
566
        return -1;
567
      out[n++] = ((*s & 0x07) << 18) | ((s[1] & 0x3F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
568
      s += 4;
569
    } else if (!inleft) {
570
      break;
571
    } else
572
      return -1;
573
  }
574
575
  return n;
576
}
577
578
static int mem_is_ascii(const char *s, size_t n)
579
{
580
  for (; n; n--) /* 'while(n--)' generates unsigned integer overflow on n = 0 */
581
    if (*((unsigned char *)s++) >= 128)
582
      return 0;
583
584
  return 1;
585
}
586
587
static int domain_to_punycode(const char *domain, char *out, size_t outsize)
588
{
589
  size_t outlen = 0, labellen;
590
  punycode_uint input[256];
591
  const char *label, *e;
592
593
  for (e = label = domain; e;) {
594
    e = strchr(label, '.');
595
    labellen = e ? (size_t) (e - label) : strlen(label);
596
597
    if (mem_is_ascii(label, labellen)) {
598
      if (outlen + labellen + (e != NULL) >= outsize)
599
        return 1;
600
601
      memcpy(out + outlen, label, labellen);
602
      outlen += labellen;
603
    } else {
604
      ssize_t inputlen = 0;
605
606
      if (outlen + labellen + (e != NULL) + 4 >= outsize)
607
        return 1;
608
609
      if ((inputlen = utf8_to_utf32(label, labellen, input, countof(input))) < 0)
610
        return 1;
611
612
      memcpy(out + outlen, "xn--", 4);
613
      outlen += 4;
614
615
      labellen = outsize - outlen - (e != NULL) - 1; // -1 to leave space for the trailing \0
616
      if (punycode_encode(inputlen, input, &labellen, out + outlen))
617
        return 1;
618
      outlen += labellen;
619
    }
620
621
    if (e) {
622
      label = e + 1;
623
      out[outlen++] = '.';
624
    }
625
    out[outlen] = 0;
626
  }
627
628
  return 0;
629
}
630
#endif
631
632
static int isspace_ascii(const char c)
633
0
{
634
0
  return c == ' ' || c == '\t' || c == '\r' || c == '\n';
635
0
}
636
637
static int str_is_ascii(const char *s)
638
0
{
639
0
  while (*s && *((unsigned char *)s) < 128) s++;
640
641
0
  return !*s;
642
0
}
643
644
#if defined(WITH_LIBIDN)
645
/*
646
 * Work around a libidn <= 1.30 vulnerability.
647
 *
648
 * The function checks for a valid UTF-8 character sequence before
649
 * passing it to idna_to_ascii_8z().
650
 *
651
 * [1] https://lists.gnu.org/archive/html/help-libidn/2015-05/msg00002.html
652
 * [2] https://lists.gnu.org/archive/html/bug-wget/2015-06/msg00002.html
653
 * [3] https://curl.haxx.se/mail/lib-2015-06/0143.html
654
 */
655
static int utf8_is_valid(const char *utf8)
656
{
657
  const unsigned char *s = (const unsigned char *) utf8;
658
659
  while (*s) {
660
    if ((*s & 0x80) == 0) /* 0xxxxxxx ASCII char */
661
      s++;
662
    else if ((*s & 0xE0) == 0xC0) /* 110xxxxx 10xxxxxx */ {
663
      if ((s[1] & 0xC0) != 0x80)
664
        return 0;
665
      s += 2;
666
    } else if ((*s & 0xF0) == 0xE0) /* 1110xxxx 10xxxxxx 10xxxxxx */ {
667
      if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80)
668
        return 0;
669
      s += 3;
670
    } else if ((*s & 0xF8) == 0xF0) /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ {
671
      if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] & 0xC0) != 0x80)
672
        return 0;
673
      s += 4;
674
    } else
675
      return 0;
676
  }
677
678
  return 1;
679
}
680
#endif
681
682
typedef void *psl_idna_t;
683
684
static psl_idna_t *psl_idna_open(void)
685
0
{
686
#if defined(WITH_LIBICU)
687
  UErrorCode status = 0;
688
  return (void *)uidna_openUTS46(UIDNA_USE_STD3_RULES | UIDNA_NONTRANSITIONAL_TO_ASCII, &status);
689
#endif
690
0
  return NULL;
691
0
}
692
693
static void psl_idna_close(psl_idna_t *idna)
694
0
{
695
0
  (void) idna;
696
697
#if defined(WITH_LIBICU)
698
  if (idna)
699
    uidna_close((UIDNA *)idna);
700
#endif
701
0
}
702
703
static int psl_idna_toASCII(psl_idna_t *idna, const char *utf8, char **ascii)
704
0
{
705
0
  int ret = -1;
706
707
#if defined(WITH_LIBICU)
708
  (void) idna;
709
710
  /* IDNA2008 UTS#46 punycode conversion */
711
  if (idna) {
712
    char lookupname_buf[128] = "", *lookupname = lookupname_buf;
713
    UErrorCode status = 0;
714
    UIDNAInfo info = UIDNA_INFO_INITIALIZER;
715
    UChar utf16_dst[128], utf16_src_buf[128];
716
    UChar *utf16_src = utf16_src_buf;
717
    int32_t utf16_src_length, bytes_written;
718
    int32_t utf16_dst_length;
719
720
    u_strFromUTF8(utf16_src, countof(utf16_src_buf), &utf16_src_length, utf8, -1, &status);
721
    if (!U_SUCCESS(status)) goto cleanup; /* UTF-8 to UTF-16 conversion failed */
722
723
    if (utf16_src_length >= (int) countof(utf16_src_buf)) {
724
      utf16_src = malloc((utf16_src_length + 1) * sizeof(UChar));
725
      if (!utf16_src) goto cleanup;
726
727
      u_strFromUTF8(utf16_src, utf16_src_length, NULL, utf8, -1, &status);
728
      if (!U_SUCCESS(status)) goto cleanup; /* UTF-8 to UTF-16 conversion failed */
729
730
      utf16_src[utf16_src_length] = 0; /* u_strFromUTF8() doesn't 0-terminate if dest is filled up */
731
    }
732
733
    utf16_dst_length = uidna_nameToASCII((UIDNA *)idna, utf16_src, utf16_src_length, utf16_dst, countof(utf16_dst), &info, &status);
734
    if (!U_SUCCESS(status)) goto cleanup; /* to ASCII conversion failed */
735
736
    u_strToUTF8(lookupname, sizeof(lookupname_buf), &bytes_written, utf16_dst, utf16_dst_length, &status);
737
    if (!U_SUCCESS(status)) goto cleanup; /* UTF-16 to UTF-8 conversion failed */
738
739
    if (bytes_written >= (int) sizeof(lookupname_buf)) {
740
      lookupname = malloc(bytes_written + 1);
741
      if (!lookupname) goto cleanup;
742
743
      u_strToUTF8(lookupname, bytes_written, NULL, utf16_dst, utf16_dst_length, &status);
744
      if (!U_SUCCESS(status)) goto cleanup; /* UTF-16 to UTF-8 conversion failed */
745
746
      lookupname[bytes_written] = 0; /* u_strToUTF8() doesn't 0-terminate if dest is filled up */
747
    } else {
748
      if (!(lookupname = psl_strdup(lookupname)))
749
        goto cleanup;
750
    }
751
752
    if (ascii) {
753
      *ascii = lookupname;
754
      lookupname = NULL;
755
    }
756
757
    ret = 0;
758
759
cleanup:
760
    if (lookupname != lookupname_buf)
761
      free(lookupname);
762
    if (utf16_src != utf16_src_buf)
763
      free(utf16_src);
764
  }
765
#elif defined(WITH_LIBIDN2)
766
#if IDN2_VERSION_NUMBER >= 0x00140000
767
0
  int rc;
768
769
0
  (void) idna;
770
771
  /* IDN2_TRANSITIONAL automatically converts to lowercase
772
   * IDN2_NFC_INPUT converts to NFC before toASCII conversion
773
   * Since IDN2_TRANSITIONAL implicitly does NFC conversion, we don't need
774
   * the additional IDN2_NFC_INPUT. But just for the unlikely case that the linked
775
   * library is not matching the headers when building and it doesn't support TR46,
776
   * we provide IDN2_NFC_INPUT. */
777
778
0
  if ((rc = idn2_lookup_u8((uint8_t *)utf8, (uint8_t **)ascii, IDN2_NFC_INPUT | IDN2_NONTRANSITIONAL)) == IDN2_OK)
779
0
    ret = 0;
780
  /* else
781
    fprintf(stderr, "toASCII(%s) failed (%d): %s\n", lower, rc, idn2_strerror(rc)); */
782
#else
783
  int rc;
784
  uint8_t *lower;
785
  size_t len = u8_strlen((uint8_t *)utf8) + 1;
786
787
  /* we need a conversion to lowercase */
788
  if (!(lower = u8_tolower((uint8_t *)utf8, len, 0, UNINORM_NFKC, NULL, &len))) {
789
    /* fprintf(stderr, "u8_tolower(%s) failed (%d)\n", utf8, errno); */
790
    return -1;
791
  }
792
793
  if ((rc = idn2_lookup_u8(lower, (uint8_t **)ascii, 0)) == IDN2_OK) {
794
    ret = 0;
795
  } /* else
796
    fprintf(stderr, "toASCII(%s) failed (%d): %s\n", lower, rc, idn2_strerror(rc)); */
797
798
  free(lower);
799
#endif
800
#elif defined(WITH_LIBIDN)
801
  int rc;
802
803
  (void) idna;
804
805
  if (!utf8_is_valid(utf8)) {
806
    /* fprintf(stderr, "Invalid UTF-8 sequence not converted: '%s'\n", utf8); */
807
    return -1;
808
  }
809
810
  /* idna_to_ascii_8z() automatically converts UTF-8 to lowercase */
811
812
  if ((rc = idna_to_ascii_8z(utf8, ascii, IDNA_USE_STD3_ASCII_RULES)) == IDNA_SUCCESS) {
813
    ret = 0;
814
  } /* else
815
    fprintf(stderr, "toASCII failed (%d): %s\n", rc, idna_strerror(rc)); */
816
#else
817
  char lookupname[128];
818
819
  (void) idna;
820
821
  if (domain_to_punycode(utf8, lookupname, sizeof(lookupname)) == 0) {
822
    if (ascii)
823
      if ((*ascii = psl_strdup(lookupname)))
824
        ret = 0;
825
  }
826
#endif
827
828
0
  return ret;
829
0
}
830
831
static void add_punycode_if_needed(psl_idna_t *idna, psl_vector_t *v, psl_entry_t *e)
832
0
{
833
0
  char *lookupname;
834
835
0
  if (str_is_ascii(e->label_buf))
836
0
    return;
837
838
0
  if (psl_idna_toASCII(idna, e->label_buf, &lookupname) == 0) {
839
0
    if (strcmp(e->label_buf, lookupname)) {
840
0
      psl_entry_t suffix, *suffixp;
841
842
      /* fprintf(stderr, "toASCII '%s' -> '%s'\n", e->label_buf, lookupname); */
843
0
      if (suffix_init(&suffix, lookupname, strlen(lookupname)) == 0) {
844
0
        suffix.flags = e->flags;
845
0
        if ((suffixp = vector_get(v, vector_add(v, &suffix))))
846
0
          suffixp->label = suffixp->label_buf; /* set label to changed address */
847
0
      }
848
0
    } /* else ignore */
849
850
0
    free(lookupname);
851
0
  }
852
0
}
853
854
/* prototypes */
855
int LookupStringInFixedSet(const unsigned char* graph, size_t length, const char* key, size_t key_length);
856
int GetUtfMode(const unsigned char *graph, size_t length);
857
858
static int is_public_suffix(const psl_ctx_t *psl, const char *domain, int type)
859
3.21k
{
860
3.21k
  psl_entry_t suffix;
861
3.21k
  const char *p;
862
3.21k
  char *punycode = NULL;
863
3.21k
  int need_conversion = 0;
864
865
  /* this function should be called without leading dots, just make sure */
866
3.21k
  if (*domain == '.')
867
0
    domain++;
868
869
3.21k
  suffix.nlabels = 1;
870
871
18.8k
  for (p = domain; *p; p++) {
872
15.6k
    if (*p == '.') {
873
6.06k
      if (suffix.nlabels == 255) /* weird input, avoid 8bit overflow */
874
2
        return 0;
875
6.05k
      suffix.nlabels++;
876
6.05k
    }
877
9.61k
    else if (*((unsigned char *)p) >= 128)
878
2.46k
      need_conversion = 1; /* in case domain is non-ascii we need a toASCII conversion */
879
15.6k
  }
880
881
3.21k
  if (suffix.nlabels == 1) {
882
    /* TLD, this is the prevailing '*' match. If type excludes the '*' rule, continue.
883
     */
884
84
    if (!(type & PSL_TYPE_NO_STAR_RULE))
885
84
      return 1;
886
84
  }
887
888
3.13k
  type &= ~PSL_TYPE_NO_STAR_RULE;
889
890
3.13k
  if (psl->utf8 || psl == &builtin_psl)
891
3.13k
    need_conversion = 0;
892
893
3.13k
  if (need_conversion) {
894
0
    psl_idna_t *idna = psl_idna_open();
895
896
0
    if (psl_idna_toASCII(idna, domain, &punycode) == 0) {
897
0
      suffix.label = punycode;
898
0
      suffix.length = strlen(punycode);
899
0
    } else {
900
      /* fallback */
901
902
0
      suffix.label = domain;
903
0
      suffix.length = p - suffix.label;
904
0
    }
905
906
0
    psl_idna_close(idna);
907
3.13k
  } else {
908
3.13k
    suffix.label = domain;
909
3.13k
    suffix.length = p - suffix.label;
910
3.13k
  }
911
912
3.13k
  if (psl == &builtin_psl || psl->dafsa) {
913
3.13k
    size_t dafsa_size = psl == &builtin_psl ? sizeof(kDafsa) : psl->dafsa_size;
914
3.13k
    const unsigned char *dafsa = psl == &builtin_psl ? kDafsa : psl->dafsa;
915
3.13k
    int rc = LookupStringInFixedSet(dafsa, dafsa_size, suffix.label, suffix.length);
916
3.13k
    if (rc != -1) {
917
      /* check for correct rule type */
918
2
      if (type == PSL_TYPE_ICANN && !(rc & PRIV_PSL_FLAG_ICANN))
919
0
        goto suffix_no;
920
2
      else if (type == PSL_TYPE_PRIVATE && !(rc & PRIV_PSL_FLAG_PRIVATE))
921
0
        goto suffix_no;
922
923
2
      if (rc & PRIV_PSL_FLAG_EXCEPTION)
924
1
        goto suffix_no;
925
926
      /* wildcard *.foo.bar implicitly make foo.bar a public suffix */
927
      /* definitely a match, no matter if the found rule is a wildcard or not */
928
1
      goto suffix_yes;
929
2
    }
930
3.13k
    if ((suffix.label = strchr(suffix.label, '.'))) {
931
3.13k
      suffix.label++;
932
3.13k
      suffix.length = strlen(suffix.label);
933
3.13k
      suffix.nlabels--;
934
935
3.13k
      rc = LookupStringInFixedSet(dafsa, dafsa_size, suffix.label, suffix.length);
936
3.13k
      if (rc != -1) {
937
        /* check for correct rule type */
938
93
        if (type == PSL_TYPE_ICANN && !(rc & PRIV_PSL_FLAG_ICANN))
939
0
          goto suffix_no;
940
93
        else if (type == PSL_TYPE_PRIVATE && !(rc & PRIV_PSL_FLAG_PRIVATE))
941
0
          goto suffix_no;
942
943
93
        if (rc & PRIV_PSL_FLAG_WILDCARD)
944
91
          goto suffix_yes;
945
93
      }
946
3.13k
    }
947
3.13k
  } else {
948
0
    psl_entry_t *rule = vector_get(psl->suffixes, 0);
949
950
0
    if (!rule || rule->nlabels < suffix.nlabels - 1)
951
0
      goto suffix_no;
952
953
0
    rule = vector_get(psl->suffixes, vector_find(psl->suffixes, &suffix));
954
955
0
    if (rule) {
956
      /* check for correct rule type */
957
0
      if (type == PSL_TYPE_ICANN && !(rule->flags & PRIV_PSL_FLAG_ICANN))
958
0
        goto suffix_no;
959
0
      else if (type == PSL_TYPE_PRIVATE && !(rule->flags & PRIV_PSL_FLAG_PRIVATE))
960
0
        goto suffix_no;
961
962
0
      if (rule->flags & PRIV_PSL_FLAG_EXCEPTION)
963
0
        goto suffix_no;
964
965
      /* wildcard *.foo.bar implicitly make foo.bar a public suffix */
966
      /* definitely a match, no matter if the found rule is a wildcard or not */
967
0
      goto suffix_yes;
968
0
    }
969
970
0
    if ((suffix.label = strchr(suffix.label, '.'))) {
971
0
      suffix.label++;
972
0
      suffix.length = strlen(suffix.label);
973
0
      suffix.nlabels--;
974
975
0
      rule = vector_get(psl->suffixes, vector_find(psl->suffixes, &suffix));
976
977
0
      if (rule) {
978
        /* check for correct rule type */
979
0
        if (type == PSL_TYPE_ICANN && !(rule->flags & PRIV_PSL_FLAG_ICANN))
980
0
          goto suffix_no;
981
0
        else if (type == PSL_TYPE_PRIVATE && !(rule->flags & PRIV_PSL_FLAG_PRIVATE))
982
0
          goto suffix_no;
983
984
0
        if (rule->flags & PRIV_PSL_FLAG_WILDCARD)
985
0
          goto suffix_yes;
986
0
      }
987
0
    }
988
0
  }
989
990
3.04k
suffix_no:
991
3.04k
  if (punycode)
992
0
    free(punycode);
993
3.04k
  return 0;
994
995
92
suffix_yes:
996
92
  if (punycode)
997
0
    free(punycode);
998
92
  return 1;
999
3.13k
}
1000
1001
/**
1002
 * psl_is_public_suffix:
1003
 * @psl: PSL context
1004
 * @domain: Domain string
1005
 *
1006
 * This function checks if @domain is a public suffix by the means of the
1007
 * [Mozilla Public Suffix List](https://publicsuffix.org).
1008
 *
1009
 * For cookie domain checking see psl_is_cookie_domain_acceptable().
1010
 *
1011
 * International @domain names have to be either in UTF-8 (lowercase + NFKC) or in ASCII/ACE format (punycode).
1012
 * Other encodings likely result in incorrect return values.
1013
 * Use helper function psl_str_to_utf8lower() for normalization @domain.
1014
 *
1015
 * @psl is a context returned by either psl_load_file(), psl_load_fp() or
1016
 * psl_builtin().
1017
 *
1018
 * Returns: 1 if domain is a public suffix, 0 if not.
1019
 *
1020
 * Since: 0.1
1021
 */
1022
int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
1023
4.45k
{
1024
4.45k
  if (!psl || !domain)
1025
1.24k
    return 1;
1026
1027
3.21k
  return is_public_suffix(psl, domain, PSL_TYPE_ANY);
1028
4.45k
}
1029
1030
/**
1031
 * psl_is_public_suffix2:
1032
 * @psl: PSL context
1033
 * @domain: Domain string
1034
 * @type: Domain type
1035
 *
1036
 * This function checks if @domain is a public suffix by the means of the
1037
 * [Mozilla Public Suffix List](https://publicsuffix.org).
1038
 *
1039
 * @type specifies the PSL section where to perform the lookup. Valid values are
1040
 * %PSL_TYPE_PRIVATE, %PSL_TYPE_ICANN, %PSL_TYPE_NO_STAR_RULE, and %PSL_TYPE_ANY.
1041
 *
1042
 * %PSL_TYPE_NO_STAR_RULE switches of the 'prevailing star rule' (see
1043
 * [List](https://publicsuffix.org/list) under 'Algorithm' 2.).
1044
 * Applying the flag means that TLDs not explicitly listed in the PSL are *not* treated as public suffixes.
1045
 *
1046
 * International @domain names have to be either in UTF-8 (lowercase + NFKC) or in ASCII/ACE format (punycode).
1047
 * Other encodings likely result in incorrect return values.
1048
 * Use helper function psl_str_to_utf8lower() for normalization @domain.
1049
 *
1050
 * @psl is a context returned by either psl_load_file(), psl_load_fp() or
1051
 * psl_builtin().
1052
 *
1053
 * Returns: 1 if domain is a public suffix, 0 if not.
1054
 *
1055
 * Since: 0.1
1056
 */
1057
int psl_is_public_suffix2(const psl_ctx_t *psl, const char *domain, int type)
1058
0
{
1059
0
  if (!psl || !domain)
1060
0
    return 1;
1061
1062
0
  return is_public_suffix(psl, domain, type);
1063
0
}
1064
1065
/**
1066
 * psl_unregistrable_domain:
1067
 * @psl: PSL context
1068
 * @domain: Domain string
1069
 *
1070
 * This function finds the longest public suffix part of @domain by the means
1071
 * of the [Mozilla Public Suffix List](https://publicsuffix.org).
1072
 *
1073
 * International @domain names have to be either in UTF-8 (lowercase + NFKC) or in ASCII/ACE format (punycode).
1074
 * Other encodings likely result in incorrect return values.
1075
 * Use helper function psl_str_to_utf8lower() for normalization @domain.
1076
 *
1077
 * @psl is a context returned by either psl_load_file(), psl_load_fp() or
1078
 * psl_builtin().
1079
 *
1080
 * Returns: Pointer to longest public suffix part of @domain or %NULL if @domain
1081
 * does not contain a public suffix (or if @psl is %NULL).
1082
 *
1083
 * Since: 0.1
1084
 */
1085
const char *psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain)
1086
0
{
1087
0
  int nlabels = 0;
1088
0
  const char *p;
1089
1090
0
  if (!psl || !domain)
1091
0
    return NULL;
1092
1093
  /*
1094
   * In the main loop we introduce a O(N^2) behavior to avoid code duplication.
1095
   * To avoid nasty CPU hogging, we limit the lookup to max. 8 domain labels to the right.
1096
   */
1097
0
  for (p = domain + strlen(domain) - 1; p >= domain; p--) {
1098
0
    if (*p == '.' && ++nlabels > 8) {
1099
0
      domain = p + 1;
1100
0
      break;
1101
0
    }
1102
0
  }
1103
1104
  /*
1105
   *  We check from left to right to catch special PSL entries like 'forgot.his.name':
1106
   *   'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
1107
   */
1108
1109
0
  while (!is_public_suffix(psl, domain, 0)) {
1110
0
    if ((domain = strchr(domain, '.')))
1111
0
      domain++;
1112
0
    else
1113
0
      break; /* prevent endless loop if is_public_suffix() is broken. */
1114
0
  }
1115
1116
0
  return domain;
1117
0
}
1118
1119
/**
1120
 * psl_registrable_domain:
1121
 * @psl: PSL context
1122
 * @domain: Domain string
1123
 *
1124
 * This function finds the shortest private suffix part of @domain by the means
1125
 * of the [Mozilla Public Suffix List](https://publicsuffix.org).
1126
 *
1127
 * International @domain names have to be either in UTF-8 (lowercase + NFKC) or in ASCII/ACE format (punycode).
1128
 * Other encodings likely result in incorrect return values.
1129
 * Use helper function psl_str_to_utf8lower() for normalization @domain.
1130
 *
1131
 * @psl is a context returned by either psl_load_file(), psl_load_fp() or
1132
 * psl_builtin().
1133
 *
1134
 * Returns: Pointer to shortest private suffix part of @domain or %NULL if @domain
1135
 * does not contain a private suffix (or if @psl is %NULL).
1136
 *
1137
 * Since: 0.1
1138
 */
1139
const char *psl_registrable_domain(const psl_ctx_t *psl, const char *domain)
1140
0
{
1141
0
  const char *p, *regdom = NULL;
1142
0
  int nlabels = 0;
1143
1144
0
  if (!psl || !domain || *domain == '.')
1145
0
    return NULL;
1146
1147
  /*
1148
   * In the main loop we introduce a O(N^2) behavior to avoid code duplication.
1149
   * To avoid nasty CPU hogging, we limit the lookup to max. 8 domain labels to the right.
1150
   */
1151
0
  for (p = domain + strlen(domain) - 1; p >= domain; p--) {
1152
0
    if (*p == '.' && ++nlabels > 8) {
1153
0
      domain = p + 1;
1154
0
      break;
1155
0
    }
1156
0
  }
1157
1158
  /*
1159
   *  We check from left to right to catch special PSL entries like 'forgot.his.name':
1160
   *   'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
1161
   */
1162
1163
0
  while (!is_public_suffix(psl, domain, 0)) {
1164
0
    if ((p = strchr(domain, '.'))) {
1165
0
      regdom = domain;
1166
0
      domain = p + 1;
1167
0
    } else
1168
0
      break; /* prevent endless loop if is_public_suffix() is broken. */
1169
0
  }
1170
1171
0
  return regdom;
1172
0
}
1173
1174
/**
1175
 * psl_load_file:
1176
 * @fname: Name of PSL file
1177
 *
1178
 * This function loads the public suffixes file named @fname.
1179
 * To free the allocated resources, call psl_free().
1180
 *
1181
 * The suffixes are expected to be UTF-8 encoded (lowercase + NFKC) if they are international.
1182
 *
1183
 * Returns: Pointer to a PSL context or %NULL on failure.
1184
 *
1185
 * Since: 0.1
1186
 */
1187
psl_ctx_t *psl_load_file(const char *fname)
1188
1.61k
{
1189
1.61k
  FILE *fp;
1190
1.61k
  psl_ctx_t *psl = NULL;
1191
1192
1.61k
  if (!fname)
1193
0
    return NULL;
1194
1195
1.61k
  if ((fp = fopen(fname, "rb"))) {
1196
1.61k
    psl = psl_load_fp(fp);
1197
1.61k
    fclose(fp);
1198
1.61k
  }
1199
1200
1.61k
  return psl;
1201
1.61k
}
1202
1203
/**
1204
 * psl_load_fp:
1205
 * @fp: %FILE pointer
1206
 *
1207
 * This function loads the public suffixes from a %FILE pointer.
1208
 * To free the allocated resources, call psl_free().
1209
 *
1210
 * The suffixes are expected to be UTF-8 encoded (lowercase + NFKC) if they are international.
1211
 *
1212
 * Returns: Pointer to a PSL context or %NULL on failure.
1213
 *
1214
 * Since: 0.1
1215
 */
1216
psl_ctx_t *psl_load_fp(FILE *fp)
1217
1.61k
{
1218
1.61k
  psl_ctx_t *psl;
1219
1.61k
  psl_entry_t suffix, *suffixp;
1220
1.61k
  char buf[256], *linep, *p;
1221
1.61k
  int type = 0, is_dafsa;
1222
1.61k
  psl_idna_t *idna;
1223
1224
1.61k
  if (!fp)
1225
0
    return NULL;
1226
1227
1.61k
  if (!(psl = calloc(1, sizeof(psl_ctx_t))))
1228
0
    return NULL;
1229
1230
  /* read first line to allow ASCII / DAFSA detection */
1231
1.61k
  if (!(linep = fgets(buf, sizeof(buf) - 1, fp)))
1232
1.61k
    goto fail;
1233
1234
0
  is_dafsa = strlen(buf) == 16 && !strncmp(buf, ".DAFSA@PSL_", 11);
1235
1236
0
  if (is_dafsa) {
1237
0
    void *m;
1238
0
    size_t size = 65536, n, len = 0;
1239
0
    int version = atoi(buf + 11);
1240
1241
0
    if (version != 0)
1242
0
      goto fail;
1243
1244
0
    if (!(psl->dafsa = malloc(size)))
1245
0
      goto fail;
1246
1247
0
    memcpy(psl->dafsa, buf, len);
1248
1249
0
    while ((n = fread(psl->dafsa + len, 1, size - len, fp)) > 0) {
1250
0
      len += n;
1251
0
      if (len >= size) {
1252
0
        if (!(m = realloc(psl->dafsa, size *= 2)))
1253
0
          goto fail;
1254
0
        psl->dafsa = m;
1255
0
      }
1256
0
    }
1257
1258
    /* release unused memory */
1259
0
    if ((m = realloc(psl->dafsa, len)))
1260
0
      psl->dafsa = m;
1261
0
    else if (!len)
1262
0
      psl->dafsa = NULL; /* realloc() just free'd psl->dafsa */
1263
1264
0
    psl->dafsa_size = len;
1265
0
    psl->utf8 = !!GetUtfMode(psl->dafsa, len);
1266
1267
0
    return psl;
1268
0
  }
1269
1270
0
  idna = psl_idna_open();
1271
1272
  /*
1273
   *  as of 02.11.2012, the list at https://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions.
1274
   *  as of 19.02.2014, the list at https://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions.
1275
   *  as of 07.10.2018, the list at https://publicsuffix.org/list/ contains ~8600 rules and 8 exceptions.
1276
   */
1277
0
  psl->suffixes = vector_alloc(8*1024, suffix_compare_array);
1278
0
  psl->utf8 = 1; /* we put UTF-8 and punycode rules in the lookup vector */
1279
1280
0
  do {
1281
0
    while (isspace_ascii(*linep)) linep++; /* ignore leading whitespace */
1282
0
    if (!*linep) continue; /* skip empty lines */
1283
1284
0
    if (*linep == '/' && linep[1] == '/') {
1285
0
      if (!type) {
1286
0
        if (strstr(linep + 2, "===BEGIN ICANN DOMAINS==="))
1287
0
          type = PRIV_PSL_FLAG_ICANN;
1288
0
        else if (!type && strstr(linep + 2, "===BEGIN PRIVATE DOMAINS==="))
1289
0
          type = PRIV_PSL_FLAG_PRIVATE;
1290
0
      }
1291
0
      else if (type == PRIV_PSL_FLAG_ICANN && strstr(linep + 2, "===END ICANN DOMAINS==="))
1292
0
        type = 0;
1293
0
      else if (type == PRIV_PSL_FLAG_PRIVATE && strstr(linep + 2, "===END PRIVATE DOMAINS==="))
1294
0
        type = 0;
1295
1296
0
      continue; /* skip comments */
1297
0
    }
1298
1299
    /* parse suffix rule */
1300
0
    for (p = linep; *linep && !isspace_ascii(*linep);) linep++;
1301
0
    *linep = 0;
1302
1303
0
    if (*p == '!') {
1304
0
      p++;
1305
0
      suffix.flags = PRIV_PSL_FLAG_EXCEPTION | type;
1306
0
      psl->nexceptions++;
1307
0
    } else if (*p == '*') {
1308
0
      if (*++p != '.') {
1309
        /* fprintf(stderr, "Unsupported kind of rule (ignored): %s\n", p - 1); */
1310
0
        continue;
1311
0
      }
1312
0
      p++;
1313
      /* wildcard *.foo.bar implicitly make foo.bar a public suffix */
1314
0
      suffix.flags = PRIV_PSL_FLAG_WILDCARD | PRIV_PSL_FLAG_PLAIN | type;
1315
0
      psl->nwildcards++;
1316
0
      psl->nsuffixes++;
1317
0
    } else {
1318
0
      suffix.flags = PRIV_PSL_FLAG_PLAIN | type;
1319
0
      psl->nsuffixes++;
1320
0
    }
1321
1322
0
    if (suffix_init(&suffix, p, linep - p) == 0) {
1323
0
      int index;
1324
1325
0
      if ((index = vector_find(psl->suffixes, &suffix)) >= 0) {
1326
        /* Found existing entry:
1327
         * Combination of exception and plain rule is ambiguous
1328
         * !foo.bar
1329
         * foo.bar
1330
         *
1331
         * Allowed:
1332
         * !foo.bar + *.foo.bar
1333
         * foo.bar + *.foo.bar
1334
         *
1335
         * We do not check here, let's do it later.
1336
         */
1337
1338
0
        suffixp = vector_get(psl->suffixes, index);
1339
0
        suffixp->flags |= suffix.flags;
1340
0
      } else {
1341
        /* New entry */
1342
0
        suffixp = vector_get(psl->suffixes, vector_add(psl->suffixes, &suffix));
1343
0
      }
1344
1345
0
      if (suffixp) {
1346
0
        suffixp->label = suffixp->label_buf; /* set label to changed address */
1347
0
        add_punycode_if_needed(idna, psl->suffixes, suffixp);
1348
0
      }
1349
0
    }
1350
0
  } while ((linep = fgets(buf, sizeof(buf), fp)));
1351
1352
0
  vector_sort(psl->suffixes);
1353
1354
0
  psl_idna_close(idna);
1355
1356
0
  return psl;
1357
1358
1.61k
fail:
1359
1.61k
  psl_free(psl);
1360
1.61k
  return NULL;
1361
0
}
1362
1363
/**
1364
 * psl_free:
1365
 * @psl: PSL context pointer
1366
 *
1367
 * This function frees the the PSL context that has been retrieved via
1368
 * psl_load_fp() or psl_load_file().
1369
 *
1370
 * Since: 0.1
1371
 */
1372
void psl_free(psl_ctx_t *psl)
1373
8.00k
{
1374
8.00k
  if (psl && psl != &builtin_psl) {
1375
1.61k
    vector_free(&psl->suffixes);
1376
1.61k
    free(psl->dafsa);
1377
1.61k
    free(psl);
1378
1.61k
  }
1379
8.00k
}
1380
1381
/**
1382
 * psl_builtin:
1383
 *
1384
 * This function returns the PSL context that has been generated and built in at compile-time.
1385
 * You don't have to free the returned context explicitly.
1386
 *
1387
 * The builtin data also contains punycode entries, one for each international domain name.
1388
 *
1389
 * If the generation of built-in data has been disabled during compilation, %NULL will be returned.
1390
 * When using the builtin psl context, you can provide UTF-8 (lowercase + NFKC) or ASCII/ACE (punycode)
1391
 * representations of domains to functions like psl_is_public_suffix().
1392
 *
1393
 * Returns: Pointer to the built in PSL data or %NULL if this data is not available.
1394
 *
1395
 * Since: 0.1
1396
 */
1397
const psl_ctx_t *psl_builtin(void)
1398
3.16k
{
1399
3.16k
#ifdef ENABLE_BUILTIN
1400
3.16k
  return &builtin_psl;
1401
#else
1402
  return NULL;
1403
#endif
1404
3.16k
}
1405
1406
/**
1407
 * psl_suffix_count:
1408
 * @psl: PSL context pointer
1409
 *
1410
 * This function returns number of public suffixes maintained by @psl.
1411
 * The number of exceptions within the Public Suffix List are not included.
1412
 *
1413
 * If the information is not available, the return value is -1 (since 0.19).
1414
 * This is the case with DAFSA blobs or if @psl is %NULL.
1415
 *
1416
 * Returns: Number of public suffixes entries in PSL context or -1 if this information is not available.
1417
 *
1418
 * Since: 0.1
1419
 */
1420
int psl_suffix_count(const psl_ctx_t *psl)
1421
0
{
1422
0
  if (psl == &builtin_psl)
1423
0
    return _psl_nsuffixes;
1424
0
  else if (psl)
1425
0
    return psl->dafsa ? -1 : psl->nsuffixes;
1426
0
  else
1427
0
    return -1;
1428
0
}
1429
1430
/**
1431
 * psl_suffix_exception_count:
1432
 * @psl: PSL context pointer
1433
 *
1434
 * This function returns number of public suffix exceptions maintained by @psl.
1435
 *
1436
 * If the information is not available, the return value is -1 (since 0.19).
1437
 * This is the case with DAFSA blobs or if @psl is %NULL.
1438
 *
1439
 * Returns: Number of public suffix exceptions in PSL context or -1 if this information is not available.
1440
 *
1441
 * Since: 0.1
1442
 */
1443
int psl_suffix_exception_count(const psl_ctx_t *psl)
1444
0
{
1445
0
  if (psl == &builtin_psl)
1446
0
    return _psl_nexceptions;
1447
0
  else if (psl)
1448
0
    return psl->dafsa ? -1 : psl->nexceptions;
1449
0
  else
1450
0
    return -1;
1451
0
}
1452
1453
/**
1454
 * psl_suffix_wildcard_count:
1455
 * @psl: PSL context pointer
1456
 *
1457
 * This function returns number of public suffix wildcards maintained by @psl.
1458
 *
1459
 * If the information is not available, the return value is -1 (since 0.19).
1460
 * This is the case with DAFSA blobs or if @psl is %NULL.
1461
 *
1462
 * Returns: Number of public suffix wildcards in PSL context or -1 if this information is not available.
1463
 *
1464
 * Since: 0.10.0
1465
 */
1466
int psl_suffix_wildcard_count(const psl_ctx_t *psl)
1467
0
{
1468
0
  if (psl == &builtin_psl)
1469
0
    return _psl_nwildcards;
1470
0
  else if (psl)
1471
0
    return psl->dafsa ? -1 : psl->nwildcards;
1472
0
  else
1473
0
    return -1;
1474
0
}
1475
1476
/**
1477
 * psl_builtin_file_time:
1478
 *
1479
 * This function returns the mtime of the Public Suffix List file that has been built in.
1480
 *
1481
 * If the generation of built-in data has been disabled during compilation, 0 will be returned.
1482
 *
1483
 * Returns: time_t value or 0.
1484
 *
1485
 * Since: 0.1
1486
 */
1487
time_t psl_builtin_file_time(void)
1488
0
{
1489
0
  return _psl_file_time;
1490
0
}
1491
1492
/**
1493
 * psl_builtin_sha1sum:
1494
 *
1495
 * This function returns the SHA1 checksum of the Public Suffix List file that has been built in.
1496
 * The returned string is in lowercase hex encoding, e.g. "2af1e9e3044eda0678bb05949d7cca2f769901d8".
1497
 *
1498
 * If the generation of built-in data has been disabled during compilation, an empty string will be returned.
1499
 *
1500
 * Returns: String containing SHA1 checksum or an empty string.
1501
 *
1502
 * Since: 0.1
1503
 */
1504
const char *psl_builtin_sha1sum(void)
1505
0
{
1506
0
  return _psl_sha1_checksum;
1507
0
}
1508
1509
/**
1510
 * psl_builtin_filename:
1511
 *
1512
 * This function returns the file name of the Public Suffix List file that has been built in.
1513
 *
1514
 * If the generation of built-in data has been disabled during compilation, an empty string will be returned.
1515
 *
1516
 * Returns: String containing the PSL file name or an empty string.
1517
 *
1518
 * Since: 0.1
1519
 */
1520
const char *psl_builtin_filename(void)
1521
0
{
1522
0
  return _psl_filename;
1523
0
}
1524
1525
/**
1526
 * psl_builtin_outdated:
1527
 *
1528
 * This function checks if the built-in data is older than the file it has been created from.
1529
 * If it is, it might be a good idea for the application to reload the PSL.
1530
 * The mtime is taken as reference.
1531
 *
1532
 * If the PSL file does not exist, it is assumed that the built-in data is not outdated.
1533
 *
1534
 * Returns: 1 if the built-in is outdated, 0 otherwise.
1535
 *
1536
 * Since: 0.10.0
1537
 */
1538
int psl_builtin_outdated(void)
1539
0
{
1540
0
  struct_stat st;
1541
1542
0
  if (func_sys_stat(_psl_filename, &st) == 0 && st.st_mtime > _psl_file_time)
1543
0
    return 1;
1544
1545
0
  return 0;
1546
0
}
1547
1548
/**
1549
 * psl_dist_filename:
1550
 *
1551
 * This function returns the file name of the distribution/system PSL data file.
1552
 * This file will be considered by psl_latest().
1553
 *
1554
 * Return the filename that is set by ./configure --with-psl-distfile, or an empty string.
1555
 *
1556
 * Returns: String containing a PSL file name or an empty string.
1557
 *
1558
 * Since: 0.16
1559
 */
1560
const char *psl_dist_filename(void)
1561
0
{
1562
0
  return _psl_dist_filename;
1563
0
}
1564
1565
/**
1566
 * psl_get_version:
1567
 *
1568
 * Get libpsl version.
1569
 *
1570
 * Returns: String containing version of libpsl.
1571
 *
1572
 * Since: 0.2.5
1573
 **/
1574
const char *psl_get_version(void)
1575
0
{
1576
#ifdef WITH_LIBICU
1577
  return PACKAGE_VERSION " (+libicu/" U_ICU_VERSION ")";
1578
#elif defined(WITH_LIBIDN2)
1579
0
  return PACKAGE_VERSION " (+libidn2/" IDN2_VERSION ")";
1580
#elif defined(WITH_LIBIDN)
1581
  return PACKAGE_VERSION " (+libidn/" STRINGPREP_VERSION ")";
1582
#else
1583
  return PACKAGE_VERSION " (no IDNA support)";
1584
#endif
1585
0
}
1586
1587
/**
1588
 * psl_check_version_number:
1589
 * @version: Version number (hex) to check against.
1590
 *
1591
 * Check the given version number is at minimum the current library version number.
1592
 * The version number must be a hexadecimal number like 0x000a01 (V0.10.1).
1593
 *
1594
 * Returns: Returns the library version number if the given version number is at least
1595
 * the version of the library, else return 0; If the argument is 0, the function returns
1596
 * the library version number without performing a check.
1597
 *
1598
 * Since: 0.11.0
1599
 **/
1600
int psl_check_version_number(int version)
1601
0
{
1602
0
  if (version) {
1603
0
    int major = version >> 16;
1604
0
    int minor = (version >> 8) & 0xFF;
1605
0
    int patch = version & 0xFF;
1606
1607
0
    if (major < PSL_VERSION_MAJOR
1608
0
      || (major == PSL_VERSION_MAJOR && minor < PSL_VERSION_MINOR)
1609
0
      || (major == PSL_VERSION_MAJOR && minor == PSL_VERSION_MINOR && patch < PSL_VERSION_PATCH))
1610
0
    {
1611
0
      return 0;
1612
0
    }
1613
0
  }
1614
1615
0
  return PSL_VERSION_NUMBER;
1616
0
}
1617
/*
1618
 * Return true if 'src' is a valid dotted quad, else false.
1619
 * Assume that characters '0'..'9' have consecutive byte values.
1620
 * credit:
1621
 *    inspired by Paul Vixie
1622
 */
1623
static int is_ip4(const char *s)
1624
0
{
1625
0
  int i, n;
1626
0
  unsigned char c;
1627
1628
0
  for (i = 0; i < 4; i++) {
1629
0
    if (!(c = *s++) || c < '0' || c > '9')
1630
0
      return 0;
1631
1632
0
    n = c - '0';
1633
0
    if ((c = *s++) && c >= '0' && c <= '9') {
1634
0
      n = n * 10 + c - '0';
1635
0
      if ((c = *s++) && c >= '0' && c <= '9') {
1636
0
        n = n * 10 + c - '0';
1637
0
        if ((c = *s++) && c >= '0' && c <= '9') {
1638
0
          n = n * 10 + c - '0';
1639
0
          c = *s++;
1640
0
        }
1641
0
      }
1642
0
    }
1643
1644
0
    if (n > 255)
1645
0
      return 0;
1646
1647
0
    if (i < 3 && c != '.')
1648
0
      return 0;
1649
0
  }
1650
1651
0
  return !c;
1652
0
}
1653
1654
static int hexval(unsigned c)
1655
0
{
1656
0
  if (c - '0' < 10) return c - '0';
1657
0
  c |= 32;
1658
0
  if (c - 'a' < 6) return c - 'a' + 10;
1659
0
  return -1;
1660
0
}
1661
1662
/*
1663
 * Original code taken from musl inet_pton(),
1664
 *   which has a standard MIT license (https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT).
1665
 * Amended and simplified to out needs.
1666
 */
1667
static int is_ip6(const char *s)
1668
0
{
1669
0
  int i, j, n, d, brk = -1, need_v4 = 0;
1670
1671
0
  if (*s == ':' && *++s != ':') return 0;
1672
1673
0
  for (i = 0; ; i++) {
1674
0
    if (s[0] == ':' && brk < 0) {
1675
0
      brk = i;
1676
0
      if (!*++s) break;
1677
0
      continue;
1678
0
    }
1679
0
    for (n = j = 0; j < 4 && (d = hexval(s[j])) >= 0; j++)
1680
0
      n = n * 16 + d;
1681
0
    if (j == 0) return 0;
1682
0
    if (!s[j] && (brk >= 0 || i == 7)) break;
1683
0
    if (i == 7) return 0;
1684
0
    if (s[j] != ':') {
1685
0
      if (s[j] != '.' || (i < 6 && brk < 0)) return 0;
1686
0
      need_v4 = 1;
1687
0
      i++;
1688
0
      break;
1689
0
    }
1690
0
    s += j + 1;
1691
0
  }
1692
1693
0
  if (need_v4 && !is_ip4(s)) return 0;
1694
0
  return 1;
1695
0
}
1696
1697
/* return whether hostname is an IP address or not */
1698
static int isip(const char *hostname)
1699
0
{
1700
0
  return is_ip4(hostname) || is_ip6(hostname);
1701
0
}
1702
1703
/**
1704
 * psl_is_cookie_domain_acceptable:
1705
 * @psl: PSL context pointer
1706
 * @hostname: The request hostname.
1707
 * @cookie_domain: The domain value from a cookie
1708
 *
1709
 * This helper function checks whether @cookie_domain is an acceptable cookie domain value for the request
1710
 * @hostname.
1711
 *
1712
 * For international domain names both, @hostname and @cookie_domain, have to be either in UTF-8 (lowercase + NFKC)
1713
 * or in ASCII/ACE (punycode) format. Other encodings or mixing UTF-8 and punycode likely result in incorrect return values.
1714
 *
1715
 * Use helper function psl_str_to_utf8lower() for normalization of @hostname and @cookie_domain.
1716
 *
1717
 * Hint for Windows users:
1718
 * Please make sure the calling application has called WSAStartup() before calling psl_is_cookie_domain_acceptable().
1719
 *
1720
 * Examples:
1721
 * 1. Cookie domain 'example.com' would be acceptable for hostname 'www.example.com',
1722
 * but '.com' or 'com' would NOT be acceptable since 'com' is a public suffix.
1723
 *
1724
 * 2. Cookie domain 'his.name' would be acceptable for hostname 'remember.his.name',
1725
 *  but NOT for 'forgot.his.name' since 'forgot.his.name' is a public suffix.
1726
 *
1727
 * Returns: 1 if acceptable, 0 if not acceptable.
1728
 *
1729
 * Since: 0.1
1730
 */
1731
int psl_is_cookie_domain_acceptable(const psl_ctx_t *psl, const char *hostname, const char *cookie_domain)
1732
0
{
1733
0
  const char *p;
1734
0
  size_t hostname_length, cookie_domain_length;
1735
1736
0
  if (!psl || !hostname || !cookie_domain)
1737
0
    return 0;
1738
1739
0
  while (*cookie_domain == '.')
1740
0
    cookie_domain++;
1741
1742
0
  if (!strcmp(hostname, cookie_domain))
1743
0
    return 1; /* an exact match is acceptable (and pretty common) */
1744
1745
0
  if (isip(hostname))
1746
0
    return 0; /* Hostname is an IP address and these must match fully (RFC 6265, 5.1.3) */
1747
1748
0
  cookie_domain_length = strlen(cookie_domain);
1749
0
  hostname_length = strlen(hostname);
1750
1751
0
  if (cookie_domain_length >= hostname_length)
1752
0
    return 0; /* cookie_domain is too long */
1753
1754
0
  p = hostname + hostname_length - cookie_domain_length;
1755
0
  if (!strcmp(p, cookie_domain) && p[-1] == '.') {
1756
    /* OK, cookie_domain matches, but it must be longer than the longest public suffix in 'hostname' */
1757
1758
0
    if (!(p = psl_unregistrable_domain(psl, hostname)))
1759
0
      return 1;
1760
1761
0
    if (cookie_domain_length > strlen(p))
1762
0
      return 1;
1763
0
  }
1764
1765
0
  return 0;
1766
0
}
1767
1768
/**
1769
 * psl_free_string:
1770
 * @str: pointer to lowercase string returned by psl_str_to_utf8lower()
1771
 *
1772
 * This function free()'s the memory allocated by psl_str_to_utf8lower() when
1773
 * returning a lowercase string
1774
 *
1775
 * Since: 0.19
1776
 */
1777
void psl_free_string(char *str)
1778
0
{
1779
0
  if (str)
1780
0
    free(str);
1781
0
}
1782
1783
#if defined(WITH_LIBIDN2) || defined(WITH_LIBIDN)
1784
/* Avoid using strcasecmp() or _stricmp() */
1785
0
static int isUTF8(const char *s) {
1786
0
  return (s[0] == 'u' || s[0] == 'U')
1787
0
    && (s[1] == 't' || s[1] == 'T')
1788
0
    && (s[2] == 'f' || s[2] == 'F')
1789
0
    && s[3] == '-' && s[4] == 0;
1790
0
}
1791
#endif
1792
1793
/**
1794
 * psl_str_to_utf8lower:
1795
 * @str: string to convert
1796
 * @encoding: charset encoding of @str, e.g. 'iso-8859-1' or %NULL
1797
 * @locale: locale of @str for to lowercase conversion, e.g. 'de' or %NULL
1798
 * @lower: return value containing the converted string
1799
 *
1800
 * This helper function converts a string to UTF-8 lowercase + NFKC representation.
1801
 * Lowercase + NFKC UTF-8 is needed as input to the domain checking functions.
1802
 *
1803
 * @lower stays unchanged on error.
1804
 *
1805
 * When returning PSL_SUCCESS, the return value 'lower' must be freed after usage.
1806
 *
1807
 * Returns: psl_error_t value.
1808
 *   PSL_SUCCESS: Success
1809
 *   PSL_ERR_INVALID_ARG: @str is a %NULL value.
1810
 *   PSL_ERR_CONVERTER: Failed to open the unicode converter with name @encoding
1811
 *   PSL_ERR_TO_UTF16: Failed to convert @str to unicode
1812
 *   PSL_ERR_TO_LOWER: Failed to convert unicode to lowercase
1813
 *   PSL_ERR_TO_UTF8: Failed to convert unicode to UTF-8
1814
 *   PSL_ERR_NO_MEM: Failed to allocate memory
1815
 *
1816
 * Since: 0.4
1817
 */
1818
psl_error_t psl_str_to_utf8lower(const char *str, const char *encoding, const char *locale, char **lower)
1819
0
{
1820
0
  int ret = PSL_ERR_INVALID_ARG;
1821
1822
0
  (void) encoding;
1823
0
  (void) locale;
1824
1825
0
  if (!str)
1826
0
    return PSL_ERR_INVALID_ARG;
1827
1828
  /* shortcut to avoid costly conversion */
1829
0
  if (str_is_ascii(str)) {
1830
0
    if (lower) {
1831
0
      char *p, *tmp;
1832
1833
0
      if (!(tmp = psl_strdup(str)))
1834
0
        return PSL_ERR_NO_MEM;
1835
1836
0
      *lower = tmp;
1837
1838
      /* convert ASCII string to lowercase */
1839
0
      for (p = *lower; *p; p++)
1840
0
        if (isupper(*p))
1841
0
          *p = tolower(*p);
1842
0
    }
1843
0
    return PSL_SUCCESS;
1844
0
  }
1845
1846
#ifdef WITH_LIBICU
1847
#define STACK_STRLENGTH 256
1848
  do {
1849
  UErrorCode status = 0;
1850
  UChar *utf16_dst, *utf16_lower;
1851
  char *utf8_lower;
1852
  int32_t utf16_dst_length, utf16_dst_size, utf16_lower_size, utf8_lower_size;
1853
  UConverter *uconv;
1854
  UChar utf16_dst_buf[STACK_STRLENGTH * 2 + 1];
1855
  UChar utf16_lower_buf[STACK_STRLENGTH * 2 + 1];
1856
  char utf8_lower_buf[STACK_STRLENGTH * 6 + 1];
1857
  size_t str_length = strlen(str);
1858
1859
  if (str_length <= STACK_STRLENGTH) {
1860
    utf16_dst_size = countof(utf16_dst_buf);
1861
    utf16_lower_size = countof(utf16_lower_buf);
1862
    utf8_lower_size = countof(utf8_lower_buf);
1863
    utf16_dst   = utf16_dst_buf;
1864
    utf16_lower = utf16_lower_buf;
1865
    utf8_lower  = utf8_lower_buf;
1866
  } else {
1867
    utf16_dst_size = utf16_lower_size = str_length * 2 + 1;
1868
    utf8_lower_size = str_length * 6 + 1;
1869
    utf16_dst   = malloc(sizeof(UChar) * utf16_dst_size);
1870
    utf16_lower = malloc(sizeof(UChar) * utf16_lower_size);
1871
    utf8_lower  = malloc(sizeof(char) * utf8_lower_size);
1872
1873
    if (!utf16_dst || !utf16_lower || !utf8_lower) {
1874
      ret = PSL_ERR_NO_MEM;
1875
      goto out;
1876
    }
1877
  }
1878
1879
  uconv = ucnv_open(encoding, &status);
1880
  if (U_SUCCESS(status)) {
1881
    utf16_dst_length = ucnv_toUChars(uconv, utf16_dst, utf16_dst_size, str, str_length, &status);
1882
    ucnv_close(uconv);
1883
1884
    if (U_SUCCESS(status)) {
1885
      int32_t utf16_lower_length = u_strToLower(utf16_lower, utf16_lower_size, utf16_dst, utf16_dst_length, locale, &status);
1886
      if (U_SUCCESS(status)) {
1887
        u_strToUTF8(utf8_lower, utf8_lower_size, NULL, utf16_lower, utf16_lower_length, &status);
1888
        if (U_SUCCESS(status)) {
1889
          ret = PSL_SUCCESS;
1890
          if (lower) {
1891
            char *tmp = psl_strdup(utf8_lower);
1892
1893
            if (tmp)
1894
              *lower = tmp;
1895
            else
1896
              ret = PSL_ERR_NO_MEM;
1897
          }
1898
        } else {
1899
          ret = PSL_ERR_TO_UTF8;
1900
          /* fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); */
1901
        }
1902
      } else {
1903
        ret = PSL_ERR_TO_LOWER;
1904
        /* fprintf(stderr, "Failed to convert UTF-16 to lowercase (status %d)\n", status); */
1905
      }
1906
    } else {
1907
      ret = PSL_ERR_TO_UTF16;
1908
      /* fprintf(stderr, "Failed to convert string to UTF-16 (status %d)\n", status); */
1909
    }
1910
  } else {
1911
    ret = PSL_ERR_CONVERTER;
1912
    /* fprintf(stderr, "Failed to open converter for '%s' (status %d)\n", encoding, status); */
1913
  }
1914
out:
1915
  if (utf16_dst != utf16_dst_buf)
1916
    free(utf16_dst);
1917
  if (utf16_lower != utf16_lower_buf)
1918
    free(utf16_lower);
1919
  if (utf8_lower != utf8_lower_buf)
1920
    free(utf8_lower);
1921
1922
  } while (0);
1923
#elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN)
1924
0
  do {
1925
    /* find out local charset encoding */
1926
0
    if (!encoding) {
1927
0
#ifdef HAVE_NL_LANGINFO
1928
0
      encoding = nl_langinfo(CODESET);
1929
#elif defined _WIN32
1930
      static char buf[16];
1931
      snprintf(buf, sizeof(buf), "CP%u", GetACP());
1932
      encoding = buf;
1933
#endif
1934
0
      if (!encoding || !*encoding)
1935
0
        encoding = "ASCII";
1936
0
    }
1937
1938
    /* convert to UTF-8 */
1939
0
    if (!isUTF8(encoding)) {
1940
0
      iconv_t cd = iconv_open("utf-8", encoding);
1941
1942
0
      if (cd != (iconv_t)-1) {
1943
0
        char *tmp = (char *)str; /* iconv won't change where str points to, but changes tmp itself */
1944
0
        size_t tmp_len = strlen(str) + 1;
1945
0
        size_t dst_len = tmp_len * 6, dst_len_tmp = dst_len;
1946
0
        char *dst = malloc(dst_len + 1), *dst_tmp = dst;
1947
1948
0
        if (!dst) {
1949
0
          ret = PSL_ERR_NO_MEM;
1950
0
        }
1951
0
        else if (iconv(cd, (ICONV_CONST char **)&tmp, &tmp_len, &dst_tmp, &dst_len_tmp) != (size_t)-1
1952
0
          && iconv(cd, NULL, NULL, &dst_tmp, &dst_len_tmp) != (size_t)-1)
1953
0
        {
1954
          /* start size for u8_tolower internal memory allocation.
1955
           * u8_tolower() does not terminate the result string. we have 0 byte included in above tmp_len
1956
           * and thus in len. */
1957
0
          size_t len = dst_len - dst_len_tmp;
1958
1959
0
          if ((tmp = (char *)u8_tolower((uint8_t *)dst, len, 0, UNINORM_NFKC, NULL, &len))) {
1960
0
            ret = PSL_SUCCESS;
1961
0
            if (lower) {
1962
0
              *lower = tmp;
1963
0
              tmp = NULL;
1964
0
            } else
1965
0
              free(tmp);
1966
0
          } else {
1967
0
            ret = PSL_ERR_TO_LOWER;
1968
            /* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */
1969
0
          }
1970
0
        } else {
1971
0
          ret = PSL_ERR_TO_UTF8;
1972
          /* fprintf(stderr, "Failed to convert '%s' string into '%s' (%d)\n", src_encoding, dst_encoding, errno); */
1973
0
        }
1974
1975
0
        free(dst);
1976
0
        iconv_close(cd);
1977
0
      } else {
1978
0
        ret = PSL_ERR_TO_UTF8;
1979
        /* fprintf(stderr, "Failed to prepare encoding '%s' into '%s' (%d)\n", src_encoding, dst_encoding, errno); */
1980
0
      }
1981
0
    } else {
1982
      /* we need a conversion to lowercase */
1983
0
      uint8_t *tmp;
1984
1985
      /* start size for u8_tolower internal memory allocation.
1986
       * u8_tolower() does not terminate the result string, so include terminating 0 byte in len. */
1987
0
      size_t len = u8_strlen((uint8_t *)str) + 1;
1988
1989
0
      if ((tmp = u8_tolower((uint8_t *)str, len, 0, UNINORM_NFKC, NULL, &len))) {
1990
0
        ret = PSL_SUCCESS;
1991
0
        if (lower) {
1992
0
          *lower = (char*)tmp;
1993
0
          tmp = NULL;
1994
0
        } else
1995
0
          free(tmp);
1996
0
      } else {
1997
0
        ret = PSL_ERR_TO_LOWER;
1998
        /* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */
1999
0
      }
2000
0
    }
2001
2002
0
  } while (0);
2003
0
#endif
2004
2005
0
  return ret;
2006
0
}
2007
2008
/* if file is newer than the builtin data, insert it reverse sorted by mtime */
2009
static int insert_file(const char *fname, const char **psl_fname, time_t *psl_mtime, int n)
2010
9.50k
{
2011
9.50k
  struct_stat st;
2012
9.50k
  int it;
2013
2014
9.50k
  if (fname && *fname && func_sys_stat(fname, &st) == 0 && st.st_mtime > _psl_file_time) {
2015
    /* add file name and mtime to end of array */
2016
0
    psl_fname[n] = fname;
2017
0
    psl_mtime[n++] = st.st_mtime;
2018
2019
    /* move the new entry to it's correct position */
2020
0
    for (it = n - 2; it >= 0 && st.st_mtime > psl_mtime[it]; it--) {
2021
0
      psl_fname[it + 1] = psl_fname[it];
2022
0
      psl_mtime[it + 1] = psl_mtime[it];
2023
0
      psl_fname[it] = fname;
2024
0
      psl_mtime[it] = st.st_mtime;
2025
0
    }
2026
0
  }
2027
2028
9.50k
  return n;
2029
9.50k
}
2030
2031
/**
2032
 * psl_latest:
2033
 * @fname: Name of PSL file or %NULL
2034
 *
2035
 * This function loads the the latest available PSL data from either
2036
 * - @fname (application specific filename, may be %NULL)
2037
 * - location specified during built-time (filename from ./configure --with-psl-distfile)
2038
 * - built-in PSL data (generated from ./configure --with-psl-file)
2039
 * - location of built-in data (filename from ./configure --with-psl-file)
2040
 *
2041
 * If none of the above is available, the function returns %NULL.
2042
 *
2043
 * To free the allocated resources, call psl_free().
2044
 *
2045
 * Returns: Pointer to a PSL context or %NULL on failure.
2046
 *
2047
 * Since: 0.16
2048
 */
2049
psl_ctx_t *psl_latest(const char *fname)
2050
3.16k
{
2051
3.16k
  psl_ctx_t *psl;
2052
3.16k
  const char *psl_fname[3];
2053
3.16k
  time_t psl_mtime[3];
2054
3.16k
  int it, ntimes;
2055
2056
3.16k
  psl_fname[0] = NULL; /* silence gcc 6.2 false warning */
2057
2058
  /* create array of PSL files reverse sorted by mtime (latest first) */
2059
3.16k
  ntimes = insert_file(fname, psl_fname, psl_mtime, 0);
2060
3.16k
  ntimes = insert_file(_psl_dist_filename, psl_fname, psl_mtime, ntimes);
2061
3.16k
  ntimes = insert_file(_psl_filename, psl_fname, psl_mtime, ntimes);
2062
2063
  /* load PSL data from the latest file, falling back to the second recent, ... */
2064
3.16k
  for (psl = NULL, it = 0; it < ntimes; it++) {
2065
0
    if (psl_mtime[it] > _psl_file_time)
2066
0
      if ((psl = psl_load_file(psl_fname[it])))
2067
0
        break;
2068
0
  }
2069
2070
  /* if file loading failed or there is no file newer than the builtin data,
2071
   * then return the builtin data. */
2072
3.16k
  return psl ? psl : (psl_ctx_t *) psl_builtin();
2073
3.16k
}