Coverage Report

Created: 2025-03-06 06:58

/src/libpsl/src/psl.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright(c) 2014-2024 Tim Ruehsen
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice shall be included in
12
 * all copies or substantial portions of the Software.
13
 *
14
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20
 * DEALINGS IN THE SOFTWARE.
21
 *
22
 * This file is part of libpsl.
23
 *
24
 * Public Suffix List routines
25
 *
26
 * Changelog
27
 * 19.03.2014  Tim Ruehsen  created from libmget/cookie.c
28
 *
29
 */
30
31
#if HAVE_CONFIG_H
32
# include <config.h>
33
#endif
34
35
#if defined(__GNUC__) && defined(__GNUC_MINOR__)
36
#       define GCC_VERSION_AT_LEAST(major, minor) ((__GNUC__ > (major)) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
37
#else
38
#       define GCC_VERSION_AT_LEAST(major, minor) 0
39
#endif
40
41
#include <sys/types.h>
42
#include <sys/stat.h>
43
44
#if defined(_WIN32) && (defined(WITH_LIBIDN2) || defined(WITH_LIBIDN))
45
# ifndef WIN32_LEAN_AND_MEAN
46
# define WIN32_LEAN_AND_MEAN
47
# endif
48
# include <windows.h> /* for GetACP() */
49
#endif
50
51
#if defined(_MSC_VER) && ! defined(ssize_t)
52
# include <basetsd.h>
53
typedef SSIZE_T ssize_t;
54
#endif
55
56
#include <stdio.h>
57
#include <stdlib.h>
58
#include <string.h>
59
#include <ctype.h>
60
#include <time.h>
61
#include <errno.h>
62
#include <limits.h> /* for UINT_MAX */
63
64
#ifdef HAVE_NL_LANGINFO
65
# include <langinfo.h>
66
#endif
67
68
#ifdef _WIN32
69
# include <malloc.h>
70
#endif
71
72
#ifdef WITH_LIBICU
73
# include <unicode/uversion.h>
74
# include <unicode/ustring.h>
75
# include <unicode/uidna.h>
76
# include <unicode/ucnv.h>
77
#elif defined(WITH_LIBIDN2)
78
# include <iconv.h>
79
# include <idn2.h>
80
# include <unicase.h>
81
# include <unistr.h>
82
#elif defined(WITH_LIBIDN)
83
# include <iconv.h>
84
# include <stringprep.h>
85
# include <idna.h>
86
# include <unicase.h>
87
# include <unistr.h>
88
#endif
89
90
#ifdef WINICONV_CONST
91
#  define ICONV_CONST WINICONV_CONST
92
#endif
93
#ifndef ICONV_CONST
94
#  define ICONV_CONST
95
#endif
96
97
98
#include <libpsl.h>
99
100
/**
101
 * SECTION:libpsl
102
 * @short_description: Public Suffix List library functions
103
 * @title: libpsl
104
 * @stability: Stable
105
 * @include: libpsl.h
106
 *
107
 * [Public Suffix List](https://publicsuffix.org/) library functions.
108
 *
109
 */
110
111
#define countof(a) (sizeof(a)/sizeof(*(a)))
112
113
0
#define PRIV_PSL_FLAG_EXCEPTION (1<<0)
114
0
#define PRIV_PSL_FLAG_WILDCARD  (1<<1)
115
0
#define PRIV_PSL_FLAG_ICANN     (1<<2) /* entry of ICANN section */
116
0
#define PRIV_PSL_FLAG_PRIVATE   (1<<3) /* entry of PRIVATE section */
117
0
#define PRIV_PSL_FLAG_PLAIN     (1<<4) /* just used for PSL syntax checking */
118
119
typedef struct {
120
  char
121
    label_buf[128];
122
  const char *
123
    label;
124
  unsigned short
125
    length;
126
  unsigned char
127
    nlabels, /* number of labels */
128
    flags;
129
} psl_entry_t;
130
131
/* stripped down version libmget vector routines */
132
typedef struct {
133
  int
134
    (*cmp)(const psl_entry_t **, const psl_entry_t **); /* comparison function */
135
  psl_entry_t
136
    **entry; /* pointer to array of pointers to elements */
137
  int
138
    max,     /* allocated elements */
139
    cur;     /* number of elements in use */
140
} psl_vector_t;
141
142
struct psl_ctx_st {
143
  psl_vector_t
144
    *suffixes;
145
  unsigned char
146
    *dafsa;
147
  size_t
148
    dafsa_size;
149
  int
150
    nsuffixes,
151
    nexceptions,
152
    nwildcards;
153
  unsigned
154
    utf8 : 1; /* 1: data contains UTF-8 + punycode encoded rules */
155
};
156
157
/* include the PSL data generated by psl-make-dafsa */
158
#ifdef ENABLE_BUILTIN
159
#include "suffixes_dafsa.h"
160
#else
161
static const unsigned char kDafsa[] = "";
162
static time_t _psl_file_time = 0;
163
static int _psl_nsuffixes = 0;
164
static int _psl_nexceptions = 0;
165
static int _psl_nwildcards = 0;
166
static const char _psl_sha1_checksum[] = "";
167
static const char _psl_filename[] = "";
168
#endif
169
170
/* references to these PSLs will result in lookups to built-in data */
171
static const psl_ctx_t
172
  builtin_psl;
173
174
#ifdef PSL_DISTFILE
175
static const char _psl_dist_filename[] = PSL_DISTFILE;
176
#else
177
static const char _psl_dist_filename[] = "";
178
#endif
179
180
static psl_vector_t *vector_alloc(int max, int (*cmp)(const psl_entry_t **, const psl_entry_t **))
181
0
{
182
0
  psl_vector_t *v;
183
184
0
  if (!(v = calloc(1, sizeof(psl_vector_t))))
185
0
    return NULL;
186
187
0
  if (!(v->entry = malloc(max * sizeof(psl_entry_t *)))) {
188
0
    free(v);
189
0
    return NULL;
190
0
  }
191
192
0
  v->max = max;
193
0
  v->cmp = cmp;
194
0
  return v;
195
0
}
196
197
static void vector_free(psl_vector_t **v)
198
0
{
199
0
  if (v && *v) {
200
0
    if ((*v)->entry) {
201
0
      int it;
202
203
0
      for (it = 0; it < (*v)->cur; it++)
204
0
        free((*v)->entry[it]);
205
206
0
      free((*v)->entry);
207
0
    }
208
0
    free(*v);
209
0
  }
210
0
}
211
212
static psl_entry_t *vector_get(const psl_vector_t *v, int pos)
213
0
{
214
0
  if (pos < 0 || !v || pos >= v->cur) return NULL;
215
216
0
  return v->entry[pos];
217
0
}
218
219
/* the entries must be sorted by */
220
static int vector_find(const psl_vector_t *v, const psl_entry_t *elem)
221
0
{
222
0
  if (v) {
223
0
    int l, r, m;
224
0
    int res;
225
226
    /* binary search for element (exact match) */
227
0
    for (l = 0, r = v->cur - 1; l <= r;) {
228
0
      m = (l + r) / 2;
229
0
      if ((res = v->cmp(&elem, (const psl_entry_t **)&(v->entry[m]))) > 0) l = m + 1;
230
0
      else if (res < 0) r = m - 1;
231
0
      else return m;
232
0
    }
233
0
  }
234
235
0
  return -1; /* not found */
236
0
}
237
238
static int vector_add(psl_vector_t *v, const psl_entry_t *elem)
239
0
{
240
0
  if (v) {
241
0
    void *elemp;
242
243
0
    if (!(elemp = malloc(sizeof(psl_entry_t))))
244
0
      return -1;
245
246
0
    memcpy(elemp, elem, sizeof(psl_entry_t));
247
248
0
    if (v->max == v->cur) {
249
0
      void *m = realloc(v->entry, (v->max *= 2) * sizeof(psl_entry_t *));
250
251
0
      if (m)
252
0
        v->entry = m;
253
0
      else {
254
0
        free(elemp);
255
0
        return -1;
256
0
      }
257
0
    }
258
259
0
    v->entry[v->cur++] = elemp;
260
0
    return v->cur - 1;
261
0
  }
262
263
0
  return -1;
264
0
}
265
266
static void vector_sort(psl_vector_t *v)
267
0
{
268
0
  if (v && v->cmp)
269
0
    qsort(v->entry, v->cur, sizeof(psl_vector_t **), (int(*)(const void *, const void *))v->cmp);
270
0
}
271
272
/* by this kind of sorting, we can easily see if a domain matches or not */
273
static int suffix_compare(const psl_entry_t *s1, const psl_entry_t *s2)
274
0
{
275
0
  int n;
276
277
0
  if ((n = s2->nlabels - s1->nlabels))
278
0
    return n; /* most labels first */
279
280
0
  if ((n = s1->length - s2->length))
281
0
    return n;  /* shorter rules first */
282
283
0
  return strcmp(s1->label ? s1->label : s1->label_buf, s2->label ? s2->label : s2->label_buf);
284
0
}
285
286
/* needed to sort array of pointers, given to qsort() */
287
static int suffix_compare_array(const psl_entry_t **s1, const psl_entry_t **s2)
288
0
{
289
0
  return suffix_compare(*s1, *s2);
290
0
}
291
292
static int suffix_init(psl_entry_t *suffix, const char *rule, size_t length)
293
0
{
294
0
  const char *src;
295
0
  char *dst;
296
297
0
  suffix->label = suffix->label_buf;
298
299
0
  if (length >= sizeof(suffix->label_buf) - 1) {
300
0
    suffix->nlabels = 0;
301
    /* fprintf(stderr, "Suffix rule too long (%zd, ignored): %s\n", length, rule); */
302
0
    return -1;
303
0
  }
304
305
0
  suffix->length = (unsigned char)length;
306
307
0
  suffix->nlabels = 1;
308
309
0
  for (dst = suffix->label_buf, src = rule; *src;) {
310
0
    if (*src == '.')
311
0
      suffix->nlabels++;
312
0
    *dst++ = *src++;
313
0
  }
314
0
  *dst = 0;
315
316
0
  return 0;
317
0
}
318
319
static char *psl_strdup(const char *s)
320
0
{
321
0
  char *p = malloc(strlen(s) + 1);
322
0
  if (!p)
323
0
    return NULL;
324
0
  return strcpy(p, s);
325
0
}
326
327
#if !defined(WITH_LIBIDN) && !defined(WITH_LIBIDN2) && !defined(WITH_LIBICU)
328
/*
329
 * When configured without runtime IDNA support (./configure --disable-runtime), we need a pure ASCII
330
 * representation of non-ASCII characters in labels as found in UTF-8 domain names.
331
 * This is because the current DAFSA format used may only hold character values [21..127].
332
 *
333
  Code copied from http://www.nicemice.net/idn/punycode-spec.gz on
334
  2011-01-04 with SHA-1 a966a8017f6be579d74a50a226accc7607c40133
335
  labeled punycode-spec 1.0.3 (2006-Mar-24-Thu).  It is modified for
336
  libpsl by Tim Rühsen.  License on the original code:
337
338
  punycode-spec 1.0.3 (2006-Mar-23-Thu)
339
  http://www.nicemice.net/idn/
340
  Adam M. Costello
341
  http://www.nicemice.net/amc/
342
343
  B. Disclaimer and license
344
345
    Regarding this entire document or any portion of it (including
346
    the pseudocode and C code), the author makes no guarantees and
347
    is not responsible for any damage resulting from its use.  The
348
    author grants irrevocable permission to anyone to use, modify,
349
    and distribute it in any way that does not diminish the rights
350
    of anyone else to use, modify, and distribute it, provided that
351
    redistributed derivative works do not contain misleading author or
352
    version information.  Derivative works need not be licensed under
353
    similar terms.
354
355
  C. Punycode sample implementation
356
357
  punycode-sample.c 2.0.0 (2004-Mar-21-Sun)
358
  http://www.nicemice.net/idn/
359
  Adam M. Costello
360
  http://www.nicemice.net/amc/
361
362
  This is ANSI C code (C89) implementing Punycode 1.0.x.
363
 */
364
enum punycode_status {
365
  punycode_success = 0,
366
  punycode_bad_input = 1, /* Input is invalid.                       */
367
  punycode_big_output = 2, /* Output would exceed the space provided. */
368
  punycode_overflow = 3 /* Wider integers needed to process input. */
369
};
370
371
#ifdef PUNYCODE_UINT
372
  typedef PUNYCODE_UINT punycode_uint;
373
#elif UINT_MAX >= (1 << 26) - 1
374
  typedef unsigned int punycode_uint;
375
#else
376
  typedef unsigned long punycode_uint;
377
#endif
378
379
/*** Bootstring parameters for Punycode ***/
380
enum {
381
  base = 36, tmin = 1, tmax = 26, skew = 38, damp = 700,
382
  initial_bias = 72, initial_n = 0x80, delimiter = 0x2D
383
};
384
385
static char encode_digit(punycode_uint d)
386
{
387
  return d + 22 + 75 * (d < 26);
388
  /*  0..25 map to ASCII a..z or A..Z */
389
  /* 26..35 map to ASCII 0..9         */
390
}
391
#define flagged(bcp) ((punycode_uint)(bcp) - 65 < 26)
392
static const punycode_uint maxint = -1;
393
394
static punycode_uint adapt(punycode_uint delta, punycode_uint numpoints, int firsttime)
395
{
396
  punycode_uint k;
397
398
  delta = firsttime ? delta / damp : delta >> 1;
399
  /* delta >> 1 is a faster way of doing delta / 2 */
400
  delta += delta / numpoints;
401
402
  for (k = 0; delta > ((base - tmin) * tmax) / 2; k += base) {
403
    delta /= base - tmin;
404
  }
405
406
  return k + (base - tmin + 1) * delta / (delta + skew);
407
}
408
409
static enum punycode_status punycode_encode(
410
  size_t input_length_orig,
411
  const punycode_uint input[],
412
  size_t *output_length,
413
  char output[])
414
{
415
  punycode_uint input_length, n, delta, h, b, bias, j, m, q, k, t;
416
  size_t out, max_out;
417
418
  /* The Punycode spec assumes that the input length is the same type */
419
  /* of integer as a code point, so we need to convert the size_t to  */
420
  /* a punycode_uint, which could overflow.                           */
421
422
  if (input_length_orig > maxint)
423
    return punycode_overflow;
424
425
  input_length = (punycode_uint) input_length_orig;
426
427
  /* Initialize the state: */
428
429
  n = initial_n;
430
  delta = 0;
431
  out = 0;
432
  max_out = *output_length;
433
  bias = initial_bias;
434
435
  /* Handle the basic code points: */
436
  for (j = 0; j < input_length; ++j) {
437
    if (input[j] < 0x80) {
438
      if (max_out - out < 2)
439
        return punycode_big_output;
440
      output[out++] = (char) input[j];
441
    }
442
    /* else if (input[j] < n) return punycode_bad_input; */
443
    /* (not needed for Punycode with unsigned code points) */
444
  }
445
446
  h = b = (punycode_uint) out;
447
  /* cannot overflow because out <= input_length <= maxint */
448
449
  /* h is the number of code points that have been handled, b is the  */
450
  /* number of basic code points, and out is the number of ASCII code */
451
  /* points that have been output.                                    */
452
453
  if (b > 0)
454
    output[out++] = delimiter;
455
456
  /* Main encoding loop: */
457
458
  while (h < input_length) {
459
    /* All non-basic code points < n have been     */
460
    /* handled already.  Find the next larger one: */
461
462
    for (m = maxint, j = 0; j < input_length; ++j) {
463
      /* if (basic(input[j])) continue; */
464
      /* (not needed for Punycode) */
465
      if (input[j] >= n && input[j] < m)
466
        m = input[j];
467
    }
468
469
    /* Increase delta enough to advance the decoder's    */
470
    /* <n,i> state to <m,0>, but guard against overflow: */
471
472
    if (m - n > (maxint - delta) / (h + 1))
473
      return punycode_overflow;
474
    delta += (m - n) * (h + 1);
475
    n = m;
476
477
    for (j = 0; j < input_length; ++j) {
478
      /* Punycode does not need to check whether input[j] is basic: */
479
      if (input[j] < n /* || basic(input[j]) */) {
480
        if (++delta == 0)
481
          return punycode_overflow;
482
      }
483
484
      if (input[j] == n) {
485
        /* Represent delta as a generalized variable-length integer: */
486
487
        for (q = delta, k = base;; k += base) {
488
          if (out >= max_out)
489
            return punycode_big_output;
490
          t = k <= bias /* + tmin */ ? tmin : /* +tmin not needed */
491
            k >= bias + tmax ? tmax : k - bias;
492
          if (q < t)
493
            break;
494
          output[out++] = encode_digit(t + (q - t) % (base - t));
495
          q = (q - t) / (base - t);
496
        }
497
498
        output[out++] = encode_digit(q);
499
        bias = adapt(delta, h + 1, h == b);
500
        delta = 0;
501
        ++h;
502
      }
503
    }
504
505
    ++delta, ++n;
506
  }
507
508
  *output_length = out;
509
  return punycode_success;
510
}
511
512
static ssize_t utf8_to_utf32(const char *in, size_t inlen, punycode_uint *out, size_t outlen)
513
{
514
  size_t n = 0;
515
  const unsigned char *s = (void *)in;
516
  const unsigned char *e = (void *)(in + inlen);
517
518
  if (!outlen)
519
    return -1;
520
521
  outlen--;
522
523
  while (n < outlen) {
524
    size_t inleft = e - s;
525
526
    if (inleft >= 1 && (*s & 0x80) == 0) { /* 0xxxxxxx ASCII char */
527
      out[n++] = *s;
528
      s++;
529
    } else if (inleft >= 2 && (*s & 0xE0) == 0xC0) /* 110xxxxx 10xxxxxx */ {
530
      if ((s[1] & 0xC0) != 0x80)
531
        return -1;
532
      out[n++] = ((*s & 0x1F) << 6) | (s[1] & 0x3F);
533
      s += 2;
534
    } else if (inleft >= 3 && (*s & 0xF0) == 0xE0) /* 1110xxxx 10xxxxxx 10xxxxxx */ {
535
      if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80)
536
        return -1;
537
      out[n++] = ((*s & 0x0F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
538
      s += 3;
539
    } else if (inleft >= 4 && (*s & 0xF8) == 0xF0) /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ {
540
      if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] & 0xC0) != 0x80)
541
        return -1;
542
      out[n++] = ((*s & 0x07) << 18) | ((s[1] & 0x3F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
543
      s += 4;
544
    } else if (!inleft) {
545
      break;
546
    } else
547
      return -1;
548
  }
549
550
  return n;
551
}
552
553
static int mem_is_ascii(const char *s, size_t n)
554
{
555
  for (; n; n--) /* 'while(n--)' generates unsigned integer overflow on n = 0 */
556
    if (*((unsigned char *)s++) >= 128)
557
      return 0;
558
559
  return 1;
560
}
561
562
static int domain_to_punycode(const char *domain, char *out, size_t outsize)
563
{
564
  size_t outlen = 0, labellen;
565
  punycode_uint input[256];
566
  const char *label, *e;
567
568
  for (e = label = domain; e;) {
569
    e = strchr(label, '.');
570
    labellen = e ? (size_t) (e - label) : strlen(label);
571
572
    if (mem_is_ascii(label, labellen)) {
573
      if (outlen + labellen + (e != NULL) >= outsize)
574
        return 1;
575
576
      memcpy(out + outlen, label, labellen);
577
      outlen += labellen;
578
    } else {
579
      ssize_t inputlen = 0;
580
581
      if (outlen + labellen + (e != NULL) + 4 >= outsize)
582
        return 1;
583
584
      if ((inputlen = utf8_to_utf32(label, labellen, input, countof(input))) < 0)
585
        return 1;
586
587
      memcpy(out + outlen, "xn--", 4);
588
      outlen += 4;
589
590
      labellen = outsize - outlen - (e != NULL) - 1; // -1 to leave space for the trailing \0
591
      if (punycode_encode(inputlen, input, &labellen, out + outlen))
592
        return 1;
593
      outlen += labellen;
594
    }
595
596
    if (e) {
597
      label = e + 1;
598
      out[outlen++] = '.';
599
    }
600
    out[outlen] = 0;
601
  }
602
603
  return 0;
604
}
605
#endif
606
607
static int isspace_ascii(const char c)
608
0
{
609
0
  return c == ' ' || c == '\t' || c == '\r' || c == '\n';
610
0
}
611
612
static int str_is_ascii(const char *s)
613
0
{
614
0
  while (*s && *((unsigned char *)s) < 128) s++;
615
616
0
  return !*s;
617
0
}
618
619
#if defined(WITH_LIBIDN)
620
/*
621
 * Work around a libidn <= 1.30 vulnerability.
622
 *
623
 * The function checks for a valid UTF-8 character sequence before
624
 * passing it to idna_to_ascii_8z().
625
 *
626
 * [1] https://lists.gnu.org/archive/html/help-libidn/2015-05/msg00002.html
627
 * [2] https://lists.gnu.org/archive/html/bug-wget/2015-06/msg00002.html
628
 * [3] https://curl.haxx.se/mail/lib-2015-06/0143.html
629
 */
630
static int utf8_is_valid(const char *utf8)
631
{
632
  const unsigned char *s = (const unsigned char *) utf8;
633
634
  while (*s) {
635
    if ((*s & 0x80) == 0) /* 0xxxxxxx ASCII char */
636
      s++;
637
    else if ((*s & 0xE0) == 0xC0) /* 110xxxxx 10xxxxxx */ {
638
      if ((s[1] & 0xC0) != 0x80)
639
        return 0;
640
      s += 2;
641
    } else if ((*s & 0xF0) == 0xE0) /* 1110xxxx 10xxxxxx 10xxxxxx */ {
642
      if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80)
643
        return 0;
644
      s += 3;
645
    } else if ((*s & 0xF8) == 0xF0) /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ {
646
      if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] & 0xC0) != 0x80)
647
        return 0;
648
      s += 4;
649
    } else
650
      return 0;
651
  }
652
653
  return 1;
654
}
655
#endif
656
657
typedef void *psl_idna_t;
658
659
static psl_idna_t *psl_idna_open(void)
660
0
{
661
#if defined(WITH_LIBICU)
662
  UErrorCode status = 0;
663
  return (void *)uidna_openUTS46(UIDNA_USE_STD3_RULES | UIDNA_NONTRANSITIONAL_TO_ASCII, &status);
664
#endif
665
0
  return NULL;
666
0
}
667
668
static void psl_idna_close(psl_idna_t *idna)
669
0
{
670
0
  (void) idna;
671
672
#if defined(WITH_LIBICU)
673
  if (idna)
674
    uidna_close((UIDNA *)idna);
675
#endif
676
0
}
677
678
static int psl_idna_toASCII(psl_idna_t *idna, const char *utf8, char **ascii)
679
0
{
680
0
  int ret = -1;
681
682
#if defined(WITH_LIBICU)
683
  (void) idna;
684
685
  /* IDNA2008 UTS#46 punycode conversion */
686
  if (idna) {
687
    char lookupname_buf[128] = "", *lookupname = lookupname_buf;
688
    UErrorCode status = 0;
689
    UIDNAInfo info = UIDNA_INFO_INITIALIZER;
690
    UChar utf16_dst[128], utf16_src_buf[128];
691
    UChar *utf16_src = utf16_src_buf;
692
    int32_t utf16_src_length, bytes_written;
693
    int32_t utf16_dst_length;
694
695
    u_strFromUTF8(utf16_src, countof(utf16_src_buf), &utf16_src_length, utf8, -1, &status);
696
    if (!U_SUCCESS(status)) goto cleanup; /* UTF-8 to UTF-16 conversion failed */
697
698
    if (utf16_src_length >= (int) countof(utf16_src_buf)) {
699
      utf16_src = malloc((utf16_src_length + 1) * sizeof(UChar));
700
      if (!utf16_src) goto cleanup;
701
702
      u_strFromUTF8(utf16_src, utf16_src_length, NULL, utf8, -1, &status);
703
      if (!U_SUCCESS(status)) goto cleanup; /* UTF-8 to UTF-16 conversion failed */
704
705
      utf16_src[utf16_src_length] = 0; /* u_strFromUTF8() doesn't 0-terminate if dest is filled up */
706
    }
707
708
    utf16_dst_length = uidna_nameToASCII((UIDNA *)idna, utf16_src, utf16_src_length, utf16_dst, countof(utf16_dst), &info, &status);
709
    if (!U_SUCCESS(status)) goto cleanup; /* to ASCII conversion failed */
710
711
    u_strToUTF8(lookupname, sizeof(lookupname_buf), &bytes_written, utf16_dst, utf16_dst_length, &status);
712
    if (!U_SUCCESS(status)) goto cleanup; /* UTF-16 to UTF-8 conversion failed */
713
714
    if (bytes_written >= (int) sizeof(lookupname_buf)) {
715
      lookupname = malloc(bytes_written + 1);
716
      if (!lookupname) goto cleanup;
717
718
      u_strToUTF8(lookupname, bytes_written, NULL, utf16_dst, utf16_dst_length, &status);
719
      if (!U_SUCCESS(status)) goto cleanup; /* UTF-16 to UTF-8 conversion failed */
720
721
      lookupname[bytes_written] = 0; /* u_strToUTF8() doesn't 0-terminate if dest is filled up */
722
    } else {
723
      if (!(lookupname = psl_strdup(lookupname)))
724
        goto cleanup;
725
    }
726
727
    if (ascii) {
728
      *ascii = lookupname;
729
      lookupname = NULL;
730
    }
731
732
    ret = 0;
733
734
cleanup:
735
    if (lookupname != lookupname_buf)
736
      free(lookupname);
737
    if (utf16_src != utf16_src_buf)
738
      free(utf16_src);
739
  }
740
#elif defined(WITH_LIBIDN2)
741
#if IDN2_VERSION_NUMBER >= 0x00140000
742
0
  int rc;
743
744
0
  (void) idna;
745
746
  /* IDN2_TRANSITIONAL automatically converts to lowercase
747
   * IDN2_NFC_INPUT converts to NFC before toASCII conversion
748
   * Since IDN2_TRANSITIONAL implicitly does NFC conversion, we don't need
749
   * the additional IDN2_NFC_INPUT. But just for the unlikely case that the linked
750
   * library is not matching the headers when building and it doesn't support TR46,
751
   * we provide IDN2_NFC_INPUT. */
752
753
0
  if ((rc = idn2_lookup_u8((uint8_t *)utf8, (uint8_t **)ascii, IDN2_NFC_INPUT | IDN2_NONTRANSITIONAL)) == IDN2_OK)
754
0
    ret = 0;
755
  /* else
756
    fprintf(stderr, "toASCII(%s) failed (%d): %s\n", lower, rc, idn2_strerror(rc)); */
757
#else
758
  int rc;
759
  uint8_t *lower;
760
  size_t len = u8_strlen((uint8_t *)utf8) + 1;
761
762
  /* we need a conversion to lowercase */
763
  if (!(lower = u8_tolower((uint8_t *)utf8, len, 0, UNINORM_NFKC, NULL, &len))) {
764
    /* fprintf(stderr, "u8_tolower(%s) failed (%d)\n", utf8, errno); */
765
    return -1;
766
  }
767
768
  if ((rc = idn2_lookup_u8(lower, (uint8_t **)ascii, 0)) == IDN2_OK) {
769
    ret = 0;
770
  } /* else
771
    fprintf(stderr, "toASCII(%s) failed (%d): %s\n", lower, rc, idn2_strerror(rc)); */
772
773
  free(lower);
774
#endif
775
#elif defined(WITH_LIBIDN)
776
  int rc;
777
778
  (void) idna;
779
780
  if (!utf8_is_valid(utf8)) {
781
    /* fprintf(stderr, "Invalid UTF-8 sequence not converted: '%s'\n", utf8); */
782
    return -1;
783
  }
784
785
  /* idna_to_ascii_8z() automatically converts UTF-8 to lowercase */
786
787
  if ((rc = idna_to_ascii_8z(utf8, ascii, IDNA_USE_STD3_ASCII_RULES)) == IDNA_SUCCESS) {
788
    ret = 0;
789
  } /* else
790
    fprintf(stderr, "toASCII failed (%d): %s\n", rc, idna_strerror(rc)); */
791
#else
792
  char lookupname[128];
793
794
  (void) idna;
795
796
  if (domain_to_punycode(utf8, lookupname, sizeof(lookupname)) == 0) {
797
    if (ascii)
798
      if ((*ascii = psl_strdup(lookupname)))
799
        ret = 0;
800
  }
801
#endif
802
803
0
  return ret;
804
0
}
805
806
static void add_punycode_if_needed(psl_idna_t *idna, psl_vector_t *v, psl_entry_t *e)
807
0
{
808
0
  char *lookupname;
809
810
0
  if (str_is_ascii(e->label_buf))
811
0
    return;
812
813
0
  if (psl_idna_toASCII(idna, e->label_buf, &lookupname) == 0) {
814
0
    if (strcmp(e->label_buf, lookupname)) {
815
0
      psl_entry_t suffix, *suffixp;
816
817
      /* fprintf(stderr, "toASCII '%s' -> '%s'\n", e->label_buf, lookupname); */
818
0
      if (suffix_init(&suffix, lookupname, strlen(lookupname)) == 0) {
819
0
        suffix.flags = e->flags;
820
0
        if ((suffixp = vector_get(v, vector_add(v, &suffix))))
821
0
          suffixp->label = suffixp->label_buf; /* set label to changed address */
822
0
      }
823
0
    } /* else ignore */
824
825
0
    free(lookupname);
826
0
  }
827
0
}
828
829
/* prototypes */
830
int LookupStringInFixedSet(const unsigned char* graph, size_t length, const char* key, size_t key_length);
831
int GetUtfMode(const unsigned char *graph, size_t length);
832
833
static int is_public_suffix(const psl_ctx_t *psl, const char *domain, int type)
834
0
{
835
0
  psl_entry_t suffix;
836
0
  const char *p;
837
0
  char *punycode = NULL;
838
0
  int need_conversion = 0;
839
840
  /* this function should be called without leading dots, just make sure */
841
0
  if (*domain == '.')
842
0
    domain++;
843
844
0
  suffix.nlabels = 1;
845
846
0
  for (p = domain; *p; p++) {
847
0
    if (*p == '.') {
848
0
      if (suffix.nlabels == 255) /* weird input, avoid 8bit overflow */
849
0
        return 0;
850
0
      suffix.nlabels++;
851
0
    }
852
0
    else if (*((unsigned char *)p) >= 128)
853
0
      need_conversion = 1; /* in case domain is non-ascii we need a toASCII conversion */
854
0
  }
855
856
0
  if (suffix.nlabels == 1) {
857
    /* TLD, this is the prevailing '*' match. If type excludes the '*' rule, continue.
858
     */
859
0
    if (!(type & PSL_TYPE_NO_STAR_RULE))
860
0
      return 1;
861
0
  }
862
863
0
  type &= ~PSL_TYPE_NO_STAR_RULE;
864
865
0
  if (psl->utf8 || psl == &builtin_psl)
866
0
    need_conversion = 0;
867
868
0
  if (need_conversion) {
869
0
    psl_idna_t *idna = psl_idna_open();
870
871
0
    if (psl_idna_toASCII(idna, domain, &punycode) == 0) {
872
0
      suffix.label = punycode;
873
0
      suffix.length = strlen(punycode);
874
0
    } else {
875
      /* fallback */
876
877
0
      suffix.label = domain;
878
0
      suffix.length = p - suffix.label;
879
0
    }
880
881
0
    psl_idna_close(idna);
882
0
  } else {
883
0
    suffix.label = domain;
884
0
    suffix.length = p - suffix.label;
885
0
  }
886
887
0
  if (psl == &builtin_psl || psl->dafsa) {
888
0
    size_t dafsa_size = psl == &builtin_psl ? sizeof(kDafsa) : psl->dafsa_size;
889
0
    const unsigned char *dafsa = psl == &builtin_psl ? kDafsa : psl->dafsa;
890
0
    int rc = LookupStringInFixedSet(dafsa, dafsa_size, suffix.label, suffix.length);
891
0
    if (rc != -1) {
892
      /* check for correct rule type */
893
0
      if (type == PSL_TYPE_ICANN && !(rc & PRIV_PSL_FLAG_ICANN))
894
0
        goto suffix_no;
895
0
      else if (type == PSL_TYPE_PRIVATE && !(rc & PRIV_PSL_FLAG_PRIVATE))
896
0
        goto suffix_no;
897
898
0
      if (rc & PRIV_PSL_FLAG_EXCEPTION)
899
0
        goto suffix_no;
900
901
      /* wildcard *.foo.bar implicitly make foo.bar a public suffix */
902
      /* definitely a match, no matter if the found rule is a wildcard or not */
903
0
      goto suffix_yes;
904
0
    }
905
0
    if ((suffix.label = strchr(suffix.label, '.'))) {
906
0
      suffix.label++;
907
0
      suffix.length = strlen(suffix.label);
908
0
      suffix.nlabels--;
909
910
0
      rc = LookupStringInFixedSet(dafsa, dafsa_size, suffix.label, suffix.length);
911
0
      if (rc != -1) {
912
        /* check for correct rule type */
913
0
        if (type == PSL_TYPE_ICANN && !(rc & PRIV_PSL_FLAG_ICANN))
914
0
          goto suffix_no;
915
0
        else if (type == PSL_TYPE_PRIVATE && !(rc & PRIV_PSL_FLAG_PRIVATE))
916
0
          goto suffix_no;
917
918
0
        if (rc & PRIV_PSL_FLAG_WILDCARD)
919
0
          goto suffix_yes;
920
0
      }
921
0
    }
922
0
  } else {
923
0
    psl_entry_t *rule = vector_get(psl->suffixes, 0);
924
925
0
    if (!rule || rule->nlabels < suffix.nlabels - 1)
926
0
      goto suffix_no;
927
928
0
    rule = vector_get(psl->suffixes, vector_find(psl->suffixes, &suffix));
929
930
0
    if (rule) {
931
      /* check for correct rule type */
932
0
      if (type == PSL_TYPE_ICANN && !(rule->flags & PRIV_PSL_FLAG_ICANN))
933
0
        goto suffix_no;
934
0
      else if (type == PSL_TYPE_PRIVATE && !(rule->flags & PRIV_PSL_FLAG_PRIVATE))
935
0
        goto suffix_no;
936
937
0
      if (rule->flags & PRIV_PSL_FLAG_EXCEPTION)
938
0
        goto suffix_no;
939
940
      /* wildcard *.foo.bar implicitly make foo.bar a public suffix */
941
      /* definitely a match, no matter if the found rule is a wildcard or not */
942
0
      goto suffix_yes;
943
0
    }
944
945
0
    if ((suffix.label = strchr(suffix.label, '.'))) {
946
0
      suffix.label++;
947
0
      suffix.length = strlen(suffix.label);
948
0
      suffix.nlabels--;
949
950
0
      rule = vector_get(psl->suffixes, vector_find(psl->suffixes, &suffix));
951
952
0
      if (rule) {
953
        /* check for correct rule type */
954
0
        if (type == PSL_TYPE_ICANN && !(rule->flags & PRIV_PSL_FLAG_ICANN))
955
0
          goto suffix_no;
956
0
        else if (type == PSL_TYPE_PRIVATE && !(rule->flags & PRIV_PSL_FLAG_PRIVATE))
957
0
          goto suffix_no;
958
959
0
        if (rule->flags & PRIV_PSL_FLAG_WILDCARD)
960
0
          goto suffix_yes;
961
0
      }
962
0
    }
963
0
  }
964
965
0
suffix_no:
966
0
  if (punycode)
967
0
    free(punycode);
968
0
  return 0;
969
970
0
suffix_yes:
971
0
  if (punycode)
972
0
    free(punycode);
973
0
  return 1;
974
0
}
975
976
/**
977
 * psl_is_public_suffix:
978
 * @psl: PSL context
979
 * @domain: Domain string
980
 *
981
 * This function checks if @domain is a public suffix by the means of the
982
 * [Mozilla Public Suffix List](https://publicsuffix.org).
983
 *
984
 * For cookie domain checking see psl_is_cookie_domain_acceptable().
985
 *
986
 * International @domain names have to be either in UTF-8 (lowercase + NFKC) or in ASCII/ACE format (punycode).
987
 * Other encodings likely result in incorrect return values.
988
 * Use helper function psl_str_to_utf8lower() for normalization @domain.
989
 *
990
 * @psl is a context returned by either psl_load_file(), psl_load_fp() or
991
 * psl_builtin().
992
 *
993
 * Returns: 1 if domain is a public suffix, 0 if not.
994
 *
995
 * Since: 0.1
996
 */
997
int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
998
0
{
999
0
  if (!psl || !domain)
1000
0
    return 1;
1001
1002
0
  return is_public_suffix(psl, domain, PSL_TYPE_ANY);
1003
0
}
1004
1005
/**
1006
 * psl_is_public_suffix2:
1007
 * @psl: PSL context
1008
 * @domain: Domain string
1009
 * @type: Domain type
1010
 *
1011
 * This function checks if @domain is a public suffix by the means of the
1012
 * [Mozilla Public Suffix List](https://publicsuffix.org).
1013
 *
1014
 * @type specifies the PSL section where to perform the lookup. Valid values are
1015
 * %PSL_TYPE_PRIVATE, %PSL_TYPE_ICANN, %PSL_TYPE_NO_STAR_RULE, and %PSL_TYPE_ANY.
1016
 *
1017
 * %PSL_TYPE_NO_STAR_RULE switches of the 'prevailing star rule' (see
1018
 * [List](https://publicsuffix.org/list) under 'Algorithm' 2.).
1019
 * Applying the flag means that TLDs not explicitly listed in the PSL are *not* treated as public suffixes.
1020
 *
1021
 * International @domain names have to be either in UTF-8 (lowercase + NFKC) or in ASCII/ACE format (punycode).
1022
 * Other encodings likely result in incorrect return values.
1023
 * Use helper function psl_str_to_utf8lower() for normalization @domain.
1024
 *
1025
 * @psl is a context returned by either psl_load_file(), psl_load_fp() or
1026
 * psl_builtin().
1027
 *
1028
 * Returns: 1 if domain is a public suffix, 0 if not.
1029
 *
1030
 * Since: 0.1
1031
 */
1032
int psl_is_public_suffix2(const psl_ctx_t *psl, const char *domain, int type)
1033
0
{
1034
0
  if (!psl || !domain)
1035
0
    return 1;
1036
1037
0
  return is_public_suffix(psl, domain, type);
1038
0
}
1039
1040
/**
1041
 * psl_unregistrable_domain:
1042
 * @psl: PSL context
1043
 * @domain: Domain string
1044
 *
1045
 * This function finds the longest public suffix part of @domain by the means
1046
 * of the [Mozilla Public Suffix List](https://publicsuffix.org).
1047
 *
1048
 * International @domain names have to be either in UTF-8 (lowercase + NFKC) or in ASCII/ACE format (punycode).
1049
 * Other encodings likely result in incorrect return values.
1050
 * Use helper function psl_str_to_utf8lower() for normalization @domain.
1051
 *
1052
 * @psl is a context returned by either psl_load_file(), psl_load_fp() or
1053
 * psl_builtin().
1054
 *
1055
 * Returns: Pointer to longest public suffix part of @domain or %NULL if @domain
1056
 * does not contain a public suffix (or if @psl is %NULL).
1057
 *
1058
 * Since: 0.1
1059
 */
1060
const char *psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain)
1061
0
{
1062
0
  int nlabels = 0;
1063
0
  const char *p;
1064
1065
0
  if (!psl || !domain)
1066
0
    return NULL;
1067
1068
  /*
1069
   * In the main loop we introduce a O(N^2) behavior to avoid code duplication.
1070
   * To avoid nasty CPU hogging, we limit the lookup to max. 8 domain labels to the right.
1071
   */
1072
0
  for (p = domain + strlen(domain) - 1; p >= domain; p--) {
1073
0
    if (*p == '.' && ++nlabels > 8) {
1074
0
      domain = p + 1;
1075
0
      break;
1076
0
    }
1077
0
  }
1078
1079
  /*
1080
   *  We check from left to right to catch special PSL entries like 'forgot.his.name':
1081
   *   'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
1082
   */
1083
1084
0
  while (!is_public_suffix(psl, domain, 0)) {
1085
0
    if ((domain = strchr(domain, '.')))
1086
0
      domain++;
1087
0
    else
1088
0
      break; /* prevent endless loop if is_public_suffix() is broken. */
1089
0
  }
1090
1091
0
  return domain;
1092
0
}
1093
1094
/**
1095
 * psl_registrable_domain:
1096
 * @psl: PSL context
1097
 * @domain: Domain string
1098
 *
1099
 * This function finds the shortest private suffix part of @domain by the means
1100
 * of the [Mozilla Public Suffix List](https://publicsuffix.org).
1101
 *
1102
 * International @domain names have to be either in UTF-8 (lowercase + NFKC) or in ASCII/ACE format (punycode).
1103
 * Other encodings likely result in incorrect return values.
1104
 * Use helper function psl_str_to_utf8lower() for normalization @domain.
1105
 *
1106
 * @psl is a context returned by either psl_load_file(), psl_load_fp() or
1107
 * psl_builtin().
1108
 *
1109
 * Returns: Pointer to shortest private suffix part of @domain or %NULL if @domain
1110
 * does not contain a private suffix (or if @psl is %NULL).
1111
 *
1112
 * Since: 0.1
1113
 */
1114
const char *psl_registrable_domain(const psl_ctx_t *psl, const char *domain)
1115
0
{
1116
0
  const char *p, *regdom = NULL;
1117
0
  int nlabels = 0;
1118
1119
0
  if (!psl || !domain || *domain == '.')
1120
0
    return NULL;
1121
1122
  /*
1123
   * In the main loop we introduce a O(N^2) behavior to avoid code duplication.
1124
   * To avoid nasty CPU hogging, we limit the lookup to max. 8 domain labels to the right.
1125
   */
1126
0
  for (p = domain + strlen(domain) - 1; p >= domain; p--) {
1127
0
    if (*p == '.' && ++nlabels > 8) {
1128
0
      domain = p + 1;
1129
0
      break;
1130
0
    }
1131
0
  }
1132
1133
  /*
1134
   *  We check from left to right to catch special PSL entries like 'forgot.his.name':
1135
   *   'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
1136
   */
1137
1138
0
  while (!is_public_suffix(psl, domain, 0)) {
1139
0
    if ((p = strchr(domain, '.'))) {
1140
0
      regdom = domain;
1141
0
      domain = p + 1;
1142
0
    } else
1143
0
      break; /* prevent endless loop if is_public_suffix() is broken. */
1144
0
  }
1145
1146
0
  return regdom;
1147
0
}
1148
1149
/**
1150
 * psl_load_file:
1151
 * @fname: Name of PSL file
1152
 *
1153
 * This function loads the public suffixes file named @fname.
1154
 * To free the allocated resources, call psl_free().
1155
 *
1156
 * The suffixes are expected to be UTF-8 encoded (lowercase + NFKC) if they are international.
1157
 *
1158
 * Returns: Pointer to a PSL context or %NULL on failure.
1159
 *
1160
 * Since: 0.1
1161
 */
1162
psl_ctx_t *psl_load_file(const char *fname)
1163
0
{
1164
0
  FILE *fp;
1165
0
  psl_ctx_t *psl = NULL;
1166
1167
0
  if (!fname)
1168
0
    return NULL;
1169
1170
0
  if ((fp = fopen(fname, "rb"))) {
1171
0
    psl = psl_load_fp(fp);
1172
0
    fclose(fp);
1173
0
  }
1174
1175
0
  return psl;
1176
0
}
1177
1178
/**
1179
 * psl_load_fp:
1180
 * @fp: %FILE pointer
1181
 *
1182
 * This function loads the public suffixes from a %FILE pointer.
1183
 * To free the allocated resources, call psl_free().
1184
 *
1185
 * The suffixes are expected to be UTF-8 encoded (lowercase + NFKC) if they are international.
1186
 *
1187
 * Returns: Pointer to a PSL context or %NULL on failure.
1188
 *
1189
 * Since: 0.1
1190
 */
1191
psl_ctx_t *psl_load_fp(FILE *fp)
1192
0
{
1193
0
  psl_ctx_t *psl;
1194
0
  psl_entry_t suffix, *suffixp;
1195
0
  char buf[256], *linep, *p;
1196
0
  int type = 0, is_dafsa;
1197
0
  psl_idna_t *idna;
1198
1199
0
  if (!fp)
1200
0
    return NULL;
1201
1202
0
  if (!(psl = calloc(1, sizeof(psl_ctx_t))))
1203
0
    return NULL;
1204
1205
  /* read first line to allow ASCII / DAFSA detection */
1206
0
  if (!(linep = fgets(buf, sizeof(buf) - 1, fp)))
1207
0
    goto fail;
1208
1209
0
  is_dafsa = strlen(buf) == 16 && !strncmp(buf, ".DAFSA@PSL_", 11);
1210
1211
0
  if (is_dafsa) {
1212
0
    void *m;
1213
0
    size_t size = 65536, n, len = 0;
1214
0
    int version = atoi(buf + 11);
1215
1216
0
    if (version != 0)
1217
0
      goto fail;
1218
1219
0
    if (!(psl->dafsa = malloc(size)))
1220
0
      goto fail;
1221
1222
0
    memcpy(psl->dafsa, buf, len);
1223
1224
0
    while ((n = fread(psl->dafsa + len, 1, size - len, fp)) > 0) {
1225
0
      len += n;
1226
0
      if (len >= size) {
1227
0
        if (!(m = realloc(psl->dafsa, size *= 2)))
1228
0
          goto fail;
1229
0
        psl->dafsa = m;
1230
0
      }
1231
0
    }
1232
1233
    /* release unused memory */
1234
0
    if ((m = realloc(psl->dafsa, len)))
1235
0
      psl->dafsa = m;
1236
0
    else if (!len)
1237
0
      psl->dafsa = NULL; /* realloc() just free'd psl->dafsa */
1238
1239
0
    psl->dafsa_size = len;
1240
0
    psl->utf8 = !!GetUtfMode(psl->dafsa, len);
1241
1242
0
    return psl;
1243
0
  }
1244
1245
0
  idna = psl_idna_open();
1246
1247
  /*
1248
   *  as of 02.11.2012, the list at https://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions.
1249
   *  as of 19.02.2014, the list at https://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions.
1250
   *  as of 07.10.2018, the list at https://publicsuffix.org/list/ contains ~8600 rules and 8 exceptions.
1251
   */
1252
0
  psl->suffixes = vector_alloc(8*1024, suffix_compare_array);
1253
0
  psl->utf8 = 1; /* we put UTF-8 and punycode rules in the lookup vector */
1254
1255
0
  do {
1256
0
    while (isspace_ascii(*linep)) linep++; /* ignore leading whitespace */
1257
0
    if (!*linep) continue; /* skip empty lines */
1258
1259
0
    if (*linep == '/' && linep[1] == '/') {
1260
0
      if (!type) {
1261
0
        if (strstr(linep + 2, "===BEGIN ICANN DOMAINS==="))
1262
0
          type = PRIV_PSL_FLAG_ICANN;
1263
0
        else if (!type && strstr(linep + 2, "===BEGIN PRIVATE DOMAINS==="))
1264
0
          type = PRIV_PSL_FLAG_PRIVATE;
1265
0
      }
1266
0
      else if (type == PRIV_PSL_FLAG_ICANN && strstr(linep + 2, "===END ICANN DOMAINS==="))
1267
0
        type = 0;
1268
0
      else if (type == PRIV_PSL_FLAG_PRIVATE && strstr(linep + 2, "===END PRIVATE DOMAINS==="))
1269
0
        type = 0;
1270
1271
0
      continue; /* skip comments */
1272
0
    }
1273
1274
    /* parse suffix rule */
1275
0
    for (p = linep; *linep && !isspace_ascii(*linep);) linep++;
1276
0
    *linep = 0;
1277
1278
0
    if (*p == '!') {
1279
0
      p++;
1280
0
      suffix.flags = PRIV_PSL_FLAG_EXCEPTION | type;
1281
0
      psl->nexceptions++;
1282
0
    } else if (*p == '*') {
1283
0
      if (*++p != '.') {
1284
        /* fprintf(stderr, "Unsupported kind of rule (ignored): %s\n", p - 1); */
1285
0
        continue;
1286
0
      }
1287
0
      p++;
1288
      /* wildcard *.foo.bar implicitly make foo.bar a public suffix */
1289
0
      suffix.flags = PRIV_PSL_FLAG_WILDCARD | PRIV_PSL_FLAG_PLAIN | type;
1290
0
      psl->nwildcards++;
1291
0
      psl->nsuffixes++;
1292
0
    } else {
1293
0
      suffix.flags = PRIV_PSL_FLAG_PLAIN | type;
1294
0
      psl->nsuffixes++;
1295
0
    }
1296
1297
0
    if (suffix_init(&suffix, p, linep - p) == 0) {
1298
0
      int index;
1299
1300
0
      if ((index = vector_find(psl->suffixes, &suffix)) >= 0) {
1301
        /* Found existing entry:
1302
         * Combination of exception and plain rule is ambiguous
1303
         * !foo.bar
1304
         * foo.bar
1305
         *
1306
         * Allowed:
1307
         * !foo.bar + *.foo.bar
1308
         * foo.bar + *.foo.bar
1309
         *
1310
         * We do not check here, let's do it later.
1311
         */
1312
1313
0
        suffixp = vector_get(psl->suffixes, index);
1314
0
        suffixp->flags |= suffix.flags;
1315
0
      } else {
1316
        /* New entry */
1317
0
        suffixp = vector_get(psl->suffixes, vector_add(psl->suffixes, &suffix));
1318
0
      }
1319
1320
0
      if (suffixp) {
1321
0
        suffixp->label = suffixp->label_buf; /* set label to changed address */
1322
0
        add_punycode_if_needed(idna, psl->suffixes, suffixp);
1323
0
      }
1324
0
    }
1325
0
  } while ((linep = fgets(buf, sizeof(buf), fp)));
1326
1327
0
  vector_sort(psl->suffixes);
1328
1329
0
  psl_idna_close(idna);
1330
1331
0
  return psl;
1332
1333
0
fail:
1334
0
  psl_free(psl);
1335
0
  return NULL;
1336
0
}
1337
1338
/**
1339
 * psl_free:
1340
 * @psl: PSL context pointer
1341
 *
1342
 * This function frees the the PSL context that has been retrieved via
1343
 * psl_load_fp() or psl_load_file().
1344
 *
1345
 * Since: 0.1
1346
 */
1347
void psl_free(psl_ctx_t *psl)
1348
0
{
1349
0
  if (psl && psl != &builtin_psl) {
1350
0
    vector_free(&psl->suffixes);
1351
0
    free(psl->dafsa);
1352
0
    free(psl);
1353
0
  }
1354
0
}
1355
1356
/**
1357
 * psl_builtin:
1358
 *
1359
 * This function returns the PSL context that has been generated and built in at compile-time.
1360
 * You don't have to free the returned context explicitly.
1361
 *
1362
 * The builtin data also contains punycode entries, one for each international domain name.
1363
 *
1364
 * If the generation of built-in data has been disabled during compilation, %NULL will be returned.
1365
 * When using the builtin psl context, you can provide UTF-8 (lowercase + NFKC) or ASCII/ACE (punycode)
1366
 * representations of domains to functions like psl_is_public_suffix().
1367
 *
1368
 * Returns: Pointer to the built in PSL data or %NULL if this data is not available.
1369
 *
1370
 * Since: 0.1
1371
 */
1372
const psl_ctx_t *psl_builtin(void)
1373
0
{
1374
0
#ifdef ENABLE_BUILTIN
1375
0
  return &builtin_psl;
1376
#else
1377
  return NULL;
1378
#endif
1379
0
}
1380
1381
/**
1382
 * psl_suffix_count:
1383
 * @psl: PSL context pointer
1384
 *
1385
 * This function returns number of public suffixes maintained by @psl.
1386
 * The number of exceptions within the Public Suffix List are not included.
1387
 *
1388
 * If the information is not available, the return value is -1 (since 0.19).
1389
 * This is the case with DAFSA blobs or if @psl is %NULL.
1390
 *
1391
 * Returns: Number of public suffixes entries in PSL context or -1 if this information is not available.
1392
 *
1393
 * Since: 0.1
1394
 */
1395
int psl_suffix_count(const psl_ctx_t *psl)
1396
0
{
1397
0
  if (psl == &builtin_psl)
1398
0
    return _psl_nsuffixes;
1399
0
  else if (psl)
1400
0
    return psl->dafsa ? -1 : psl->nsuffixes;
1401
0
  else
1402
0
    return -1;
1403
0
}
1404
1405
/**
1406
 * psl_suffix_exception_count:
1407
 * @psl: PSL context pointer
1408
 *
1409
 * This function returns number of public suffix exceptions maintained by @psl.
1410
 *
1411
 * If the information is not available, the return value is -1 (since 0.19).
1412
 * This is the case with DAFSA blobs or if @psl is %NULL.
1413
 *
1414
 * Returns: Number of public suffix exceptions in PSL context or -1 if this information is not available.
1415
 *
1416
 * Since: 0.1
1417
 */
1418
int psl_suffix_exception_count(const psl_ctx_t *psl)
1419
0
{
1420
0
  if (psl == &builtin_psl)
1421
0
    return _psl_nexceptions;
1422
0
  else if (psl)
1423
0
    return psl->dafsa ? -1 : psl->nexceptions;
1424
0
  else
1425
0
    return -1;
1426
0
}
1427
1428
/**
1429
 * psl_suffix_wildcard_count:
1430
 * @psl: PSL context pointer
1431
 *
1432
 * This function returns number of public suffix wildcards maintained by @psl.
1433
 *
1434
 * If the information is not available, the return value is -1 (since 0.19).
1435
 * This is the case with DAFSA blobs or if @psl is %NULL.
1436
 *
1437
 * Returns: Number of public suffix wildcards in PSL context or -1 if this information is not available.
1438
 *
1439
 * Since: 0.10.0
1440
 */
1441
int psl_suffix_wildcard_count(const psl_ctx_t *psl)
1442
0
{
1443
0
  if (psl == &builtin_psl)
1444
0
    return _psl_nwildcards;
1445
0
  else if (psl)
1446
0
    return psl->dafsa ? -1 : psl->nwildcards;
1447
0
  else
1448
0
    return -1;
1449
0
}
1450
1451
/**
1452
 * psl_builtin_file_time:
1453
 *
1454
 * This function returns the mtime of the Public Suffix List file that has been built in.
1455
 *
1456
 * If the generation of built-in data has been disabled during compilation, 0 will be returned.
1457
 *
1458
 * Returns: time_t value or 0.
1459
 *
1460
 * Since: 0.1
1461
 */
1462
time_t psl_builtin_file_time(void)
1463
0
{
1464
0
  return _psl_file_time;
1465
0
}
1466
1467
/**
1468
 * psl_builtin_sha1sum:
1469
 *
1470
 * This function returns the SHA1 checksum of the Public Suffix List file that has been built in.
1471
 * The returned string is in lowercase hex encoding, e.g. "2af1e9e3044eda0678bb05949d7cca2f769901d8".
1472
 *
1473
 * If the generation of built-in data has been disabled during compilation, an empty string will be returned.
1474
 *
1475
 * Returns: String containing SHA1 checksum or an empty string.
1476
 *
1477
 * Since: 0.1
1478
 */
1479
const char *psl_builtin_sha1sum(void)
1480
0
{
1481
0
  return _psl_sha1_checksum;
1482
0
}
1483
1484
/**
1485
 * psl_builtin_filename:
1486
 *
1487
 * This function returns the file name of the Public Suffix List file that has been built in.
1488
 *
1489
 * If the generation of built-in data has been disabled during compilation, an empty string will be returned.
1490
 *
1491
 * Returns: String containing the PSL file name or an empty string.
1492
 *
1493
 * Since: 0.1
1494
 */
1495
const char *psl_builtin_filename(void)
1496
0
{
1497
0
  return _psl_filename;
1498
0
}
1499
1500
/**
1501
 * psl_builtin_outdated:
1502
 *
1503
 * This function checks if the built-in data is older than the file it has been created from.
1504
 * If it is, it might be a good idea for the application to reload the PSL.
1505
 * The mtime is taken as reference.
1506
 *
1507
 * If the PSL file does not exist, it is assumed that the built-in data is not outdated.
1508
 *
1509
 * Returns: 1 if the built-in is outdated, 0 otherwise.
1510
 *
1511
 * Since: 0.10.0
1512
 */
1513
int psl_builtin_outdated(void)
1514
0
{
1515
0
  struct stat st;
1516
1517
0
  if (stat(_psl_filename, &st) == 0 && st.st_mtime > _psl_file_time)
1518
0
    return 1;
1519
1520
0
  return 0;
1521
0
}
1522
1523
/**
1524
 * psl_dist_filename:
1525
 *
1526
 * This function returns the file name of the distribution/system PSL data file.
1527
 * This file will be considered by psl_latest().
1528
 *
1529
 * Return the filename that is set by ./configure --with-psl-distfile, or an empty string.
1530
 *
1531
 * Returns: String containing a PSL file name or an empty string.
1532
 *
1533
 * Since: 0.16
1534
 */
1535
const char *psl_dist_filename(void)
1536
0
{
1537
0
  return _psl_dist_filename;
1538
0
}
1539
1540
/**
1541
 * psl_get_version:
1542
 *
1543
 * Get libpsl version.
1544
 *
1545
 * Returns: String containing version of libpsl.
1546
 *
1547
 * Since: 0.2.5
1548
 **/
1549
const char *psl_get_version(void)
1550
0
{
1551
#ifdef WITH_LIBICU
1552
  return PACKAGE_VERSION " (+libicu/" U_ICU_VERSION ")";
1553
#elif defined(WITH_LIBIDN2)
1554
0
  return PACKAGE_VERSION " (+libidn2/" IDN2_VERSION ")";
1555
#elif defined(WITH_LIBIDN)
1556
  return PACKAGE_VERSION " (+libidn/" STRINGPREP_VERSION ")";
1557
#else
1558
  return PACKAGE_VERSION " (no IDNA support)";
1559
#endif
1560
0
}
1561
1562
/**
1563
 * psl_check_version_number:
1564
 * @version: Version number (hex) to check against.
1565
 *
1566
 * Check the given version number is at minimum the current library version number.
1567
 * The version number must be a hexadecimal number like 0x000a01 (V0.10.1).
1568
 *
1569
 * Returns: Returns the library version number if the given version number is at least
1570
 * the version of the library, else return 0; If the argument is 0, the function returns
1571
 * the library version number without performing a check.
1572
 *
1573
 * Since: 0.11.0
1574
 **/
1575
int psl_check_version_number(int version)
1576
0
{
1577
0
  if (version) {
1578
0
    int major = version >> 16;
1579
0
    int minor = (version >> 8) & 0xFF;
1580
0
    int patch = version & 0xFF;
1581
1582
0
    if (major < PSL_VERSION_MAJOR
1583
0
      || (major == PSL_VERSION_MAJOR && minor < PSL_VERSION_MINOR)
1584
0
      || (major == PSL_VERSION_MAJOR && minor == PSL_VERSION_MINOR && patch < PSL_VERSION_PATCH))
1585
0
    {
1586
0
      return 0;
1587
0
    }
1588
0
  }
1589
1590
0
  return PSL_VERSION_NUMBER;
1591
0
}
1592
/*
1593
 * Return true if 'src' is a valid dotted quad, else false.
1594
 * Assume that characters '0'..'9' have consecutive byte values.
1595
 * credit:
1596
 *    inspired by Paul Vixie
1597
 */
1598
static int is_ip4(const char *s)
1599
0
{
1600
0
  int i, n;
1601
0
  unsigned char c;
1602
1603
0
  for (i = 0; i < 4; i++) {
1604
0
    if (!(c = *s++) || c < '0' || c > '9')
1605
0
      return 0;
1606
1607
0
    n = c - '0';
1608
0
    if ((c = *s++) && c >= '0' && c <= '9') {
1609
0
      n = n * 10 + c - '0';
1610
0
      if ((c = *s++) && c >= '0' && c <= '9') {
1611
0
        n = n * 10 + c - '0';
1612
0
        if ((c = *s++) && c >= '0' && c <= '9') {
1613
0
          n = n * 10 + c - '0';
1614
0
          c = *s++;
1615
0
        }
1616
0
      }
1617
0
    }
1618
1619
0
    if (n > 255)
1620
0
      return 0;
1621
1622
0
    if (i < 3 && c != '.')
1623
0
      return 0;
1624
0
  }
1625
1626
0
  return !c;
1627
0
}
1628
1629
static int hexval(unsigned c)
1630
0
{
1631
0
  if (c - '0' < 10) return c - '0';
1632
0
  c |= 32;
1633
0
  if (c - 'a' < 6) return c - 'a' + 10;
1634
0
  return -1;
1635
0
}
1636
1637
/*
1638
 * Original code taken from musl inet_pton(),
1639
 *   which has a standard MIT license (https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT).
1640
 * Amended and simplified to out needs.
1641
 */
1642
static int is_ip6(const char *s)
1643
0
{
1644
0
  int i, j, n, d, brk = -1, need_v4 = 0;
1645
1646
0
  if (*s == ':' && *++s != ':') return 0;
1647
1648
0
  for (i = 0; ; i++) {
1649
0
    if (s[0] == ':' && brk < 0) {
1650
0
      brk = i;
1651
0
      if (!*++s) break;
1652
0
      continue;
1653
0
    }
1654
0
    for (n = j = 0; j < 4 && (d = hexval(s[j])) >= 0; j++)
1655
0
      n = n * 16 + d;
1656
0
    if (j == 0) return 0;
1657
0
    if (!s[j] && (brk >= 0 || i == 7)) break;
1658
0
    if (i == 7) return 0;
1659
0
    if (s[j] != ':') {
1660
0
      if (s[j] != '.' || (i < 6 && brk < 0)) return 0;
1661
0
      need_v4 = 1;
1662
0
      i++;
1663
0
      break;
1664
0
    }
1665
0
    s += j + 1;
1666
0
  }
1667
1668
0
  if (need_v4 && !is_ip4(s)) return 0;
1669
0
  return 1;
1670
0
}
1671
1672
/* return whether hostname is an IP address or not */
1673
static int isip(const char *hostname)
1674
0
{
1675
0
  return is_ip4(hostname) || is_ip6(hostname);
1676
0
}
1677
1678
/**
1679
 * psl_is_cookie_domain_acceptable:
1680
 * @psl: PSL context pointer
1681
 * @hostname: The request hostname.
1682
 * @cookie_domain: The domain value from a cookie
1683
 *
1684
 * This helper function checks whether @cookie_domain is an acceptable cookie domain value for the request
1685
 * @hostname.
1686
 *
1687
 * For international domain names both, @hostname and @cookie_domain, have to be either in UTF-8 (lowercase + NFKC)
1688
 * or in ASCII/ACE (punycode) format. Other encodings or mixing UTF-8 and punycode likely result in incorrect return values.
1689
 *
1690
 * Use helper function psl_str_to_utf8lower() for normalization of @hostname and @cookie_domain.
1691
 *
1692
 * Hint for Windows users:
1693
 * Please make sure the calling application has called WSAStartup() before calling psl_is_cookie_domain_acceptable().
1694
 *
1695
 * Examples:
1696
 * 1. Cookie domain 'example.com' would be acceptable for hostname 'www.example.com',
1697
 * but '.com' or 'com' would NOT be acceptable since 'com' is a public suffix.
1698
 *
1699
 * 2. Cookie domain 'his.name' would be acceptable for hostname 'remember.his.name',
1700
 *  but NOT for 'forgot.his.name' since 'forgot.his.name' is a public suffix.
1701
 *
1702
 * Returns: 1 if acceptable, 0 if not acceptable.
1703
 *
1704
 * Since: 0.1
1705
 */
1706
int psl_is_cookie_domain_acceptable(const psl_ctx_t *psl, const char *hostname, const char *cookie_domain)
1707
0
{
1708
0
  const char *p;
1709
0
  size_t hostname_length, cookie_domain_length;
1710
1711
0
  if (!psl || !hostname || !cookie_domain)
1712
0
    return 0;
1713
1714
0
  while (*cookie_domain == '.')
1715
0
    cookie_domain++;
1716
1717
0
  if (!strcmp(hostname, cookie_domain))
1718
0
    return 1; /* an exact match is acceptable (and pretty common) */
1719
1720
0
  if (isip(hostname))
1721
0
    return 0; /* Hostname is an IP address and these must match fully (RFC 6265, 5.1.3) */
1722
1723
0
  cookie_domain_length = strlen(cookie_domain);
1724
0
  hostname_length = strlen(hostname);
1725
1726
0
  if (cookie_domain_length >= hostname_length)
1727
0
    return 0; /* cookie_domain is too long */
1728
1729
0
  p = hostname + hostname_length - cookie_domain_length;
1730
0
  if (!strcmp(p, cookie_domain) && p[-1] == '.') {
1731
    /* OK, cookie_domain matches, but it must be longer than the longest public suffix in 'hostname' */
1732
1733
0
    if (!(p = psl_unregistrable_domain(psl, hostname)))
1734
0
      return 1;
1735
1736
0
    if (cookie_domain_length > strlen(p))
1737
0
      return 1;
1738
0
  }
1739
1740
0
  return 0;
1741
0
}
1742
1743
/**
1744
 * psl_free_string:
1745
 * @str: pointer to lowercase string returned by psl_str_to_utf8lower()
1746
 *
1747
 * This function free()'s the memory allocated by psl_str_to_utf8lower() when
1748
 * returning a lowercase string
1749
 *
1750
 * Since: 0.19
1751
 */
1752
void psl_free_string(char *str)
1753
0
{
1754
0
  if (str)
1755
0
    free(str);
1756
0
}
1757
1758
#if defined(WITH_LIBIDN2) || defined(WITH_LIBIDN)
1759
/* Avoid using strcasecmp() or _stricmp() */
1760
0
static int isUTF8(const char *s) {
1761
0
  return (s[0] == 'u' || s[0] == 'U')
1762
0
    && (s[1] == 't' || s[1] == 'T')
1763
0
    && (s[2] == 'f' || s[2] == 'F')
1764
0
    && s[3] == '-' && s[4] == 0;
1765
0
}
1766
#endif
1767
1768
/**
1769
 * psl_str_to_utf8lower:
1770
 * @str: string to convert
1771
 * @encoding: charset encoding of @str, e.g. 'iso-8859-1' or %NULL
1772
 * @locale: locale of @str for to lowercase conversion, e.g. 'de' or %NULL
1773
 * @lower: return value containing the converted string
1774
 *
1775
 * This helper function converts a string to UTF-8 lowercase + NFKC representation.
1776
 * Lowercase + NFKC UTF-8 is needed as input to the domain checking functions.
1777
 *
1778
 * @lower stays unchanged on error.
1779
 *
1780
 * When returning PSL_SUCCESS, the return value 'lower' must be freed after usage.
1781
 *
1782
 * Returns: psl_error_t value.
1783
 *   PSL_SUCCESS: Success
1784
 *   PSL_ERR_INVALID_ARG: @str is a %NULL value.
1785
 *   PSL_ERR_CONVERTER: Failed to open the unicode converter with name @encoding
1786
 *   PSL_ERR_TO_UTF16: Failed to convert @str to unicode
1787
 *   PSL_ERR_TO_LOWER: Failed to convert unicode to lowercase
1788
 *   PSL_ERR_TO_UTF8: Failed to convert unicode to UTF-8
1789
 *   PSL_ERR_NO_MEM: Failed to allocate memory
1790
 *
1791
 * Since: 0.4
1792
 */
1793
psl_error_t psl_str_to_utf8lower(const char *str, const char *encoding, const char *locale, char **lower)
1794
0
{
1795
0
  int ret = PSL_ERR_INVALID_ARG;
1796
1797
0
  (void) encoding;
1798
0
  (void) locale;
1799
1800
0
  if (!str)
1801
0
    return PSL_ERR_INVALID_ARG;
1802
1803
  /* shortcut to avoid costly conversion */
1804
0
  if (str_is_ascii(str)) {
1805
0
    if (lower) {
1806
0
      char *p, *tmp;
1807
1808
0
      if (!(tmp = psl_strdup(str)))
1809
0
        return PSL_ERR_NO_MEM;
1810
1811
0
      *lower = tmp;
1812
1813
      /* convert ASCII string to lowercase */
1814
0
      for (p = *lower; *p; p++)
1815
0
        if (isupper(*p))
1816
0
          *p = tolower(*p);
1817
0
    }
1818
0
    return PSL_SUCCESS;
1819
0
  }
1820
1821
#ifdef WITH_LIBICU
1822
#define STACK_STRLENGTH 256
1823
  do {
1824
  UErrorCode status = 0;
1825
  UChar *utf16_dst, *utf16_lower;
1826
  char *utf8_lower;
1827
  int32_t utf16_dst_length, utf16_dst_size, utf16_lower_size, utf8_lower_size;
1828
  UConverter *uconv;
1829
  UChar utf16_dst_buf[STACK_STRLENGTH * 2 + 1];
1830
  UChar utf16_lower_buf[STACK_STRLENGTH * 2 + 1];
1831
  char utf8_lower_buf[STACK_STRLENGTH * 6 + 1];
1832
  size_t str_length = strlen(str);
1833
1834
  if (str_length <= STACK_STRLENGTH) {
1835
    utf16_dst_size = countof(utf16_dst_buf);
1836
    utf16_lower_size = countof(utf16_lower_buf);
1837
    utf8_lower_size = countof(utf8_lower_buf);
1838
    utf16_dst   = utf16_dst_buf;
1839
    utf16_lower = utf16_lower_buf;
1840
    utf8_lower  = utf8_lower_buf;
1841
  } else {
1842
    utf16_dst_size = utf16_lower_size = str_length * 2 + 1;
1843
    utf8_lower_size = str_length * 6 + 1;
1844
    utf16_dst   = malloc(sizeof(UChar) * utf16_dst_size);
1845
    utf16_lower = malloc(sizeof(UChar) * utf16_lower_size);
1846
    utf8_lower  = malloc(sizeof(char) * utf8_lower_size);
1847
1848
    if (!utf16_dst || !utf16_lower || !utf8_lower) {
1849
      ret = PSL_ERR_NO_MEM;
1850
      goto out;
1851
    }
1852
  }
1853
1854
  uconv = ucnv_open(encoding, &status);
1855
  if (U_SUCCESS(status)) {
1856
    utf16_dst_length = ucnv_toUChars(uconv, utf16_dst, utf16_dst_size, str, str_length, &status);
1857
    ucnv_close(uconv);
1858
1859
    if (U_SUCCESS(status)) {
1860
      int32_t utf16_lower_length = u_strToLower(utf16_lower, utf16_lower_size, utf16_dst, utf16_dst_length, locale, &status);
1861
      if (U_SUCCESS(status)) {
1862
        u_strToUTF8(utf8_lower, utf8_lower_size, NULL, utf16_lower, utf16_lower_length, &status);
1863
        if (U_SUCCESS(status)) {
1864
          ret = PSL_SUCCESS;
1865
          if (lower) {
1866
            char *tmp = psl_strdup(utf8_lower);
1867
1868
            if (tmp)
1869
              *lower = tmp;
1870
            else
1871
              ret = PSL_ERR_NO_MEM;
1872
          }
1873
        } else {
1874
          ret = PSL_ERR_TO_UTF8;
1875
          /* fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); */
1876
        }
1877
      } else {
1878
        ret = PSL_ERR_TO_LOWER;
1879
        /* fprintf(stderr, "Failed to convert UTF-16 to lowercase (status %d)\n", status); */
1880
      }
1881
    } else {
1882
      ret = PSL_ERR_TO_UTF16;
1883
      /* fprintf(stderr, "Failed to convert string to UTF-16 (status %d)\n", status); */
1884
    }
1885
  } else {
1886
    ret = PSL_ERR_CONVERTER;
1887
    /* fprintf(stderr, "Failed to open converter for '%s' (status %d)\n", encoding, status); */
1888
  }
1889
out:
1890
  if (utf16_dst != utf16_dst_buf)
1891
    free(utf16_dst);
1892
  if (utf16_lower != utf16_lower_buf)
1893
    free(utf16_lower);
1894
  if (utf8_lower != utf8_lower_buf)
1895
    free(utf8_lower);
1896
1897
  } while (0);
1898
#elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN)
1899
0
  do {
1900
    /* find out local charset encoding */
1901
0
    if (!encoding) {
1902
0
#ifdef HAVE_NL_LANGINFO
1903
0
      encoding = nl_langinfo(CODESET);
1904
#elif defined _WIN32
1905
      static char buf[16];
1906
      snprintf(buf, sizeof(buf), "CP%u", GetACP());
1907
      encoding = buf;
1908
#endif
1909
0
      if (!encoding || !*encoding)
1910
0
        encoding = "ASCII";
1911
0
    }
1912
1913
    /* convert to UTF-8 */
1914
0
    if (!isUTF8(encoding)) {
1915
0
      iconv_t cd = iconv_open("utf-8", encoding);
1916
1917
0
      if (cd != (iconv_t)-1) {
1918
0
        char *tmp = (char *)str; /* iconv won't change where str points to, but changes tmp itself */
1919
0
        size_t tmp_len = strlen(str) + 1;
1920
0
        size_t dst_len = tmp_len * 6, dst_len_tmp = dst_len;
1921
0
        char *dst = malloc(dst_len + 1), *dst_tmp = dst;
1922
1923
0
        if (!dst) {
1924
0
          ret = PSL_ERR_NO_MEM;
1925
0
        }
1926
0
        else if (iconv(cd, (ICONV_CONST char **)&tmp, &tmp_len, &dst_tmp, &dst_len_tmp) != (size_t)-1
1927
0
          && iconv(cd, NULL, NULL, &dst_tmp, &dst_len_tmp) != (size_t)-1)
1928
0
        {
1929
          /* start size for u8_tolower internal memory allocation.
1930
           * u8_tolower() does not terminate the result string. we have 0 byte included in above tmp_len
1931
           * and thus in len. */
1932
0
          size_t len = dst_len - dst_len_tmp;
1933
1934
0
          if ((tmp = (char *)u8_tolower((uint8_t *)dst, len, 0, UNINORM_NFKC, NULL, &len))) {
1935
0
            ret = PSL_SUCCESS;
1936
0
            if (lower) {
1937
0
              *lower = tmp;
1938
0
              tmp = NULL;
1939
0
            } else
1940
0
              free(tmp);
1941
0
          } else {
1942
0
            ret = PSL_ERR_TO_LOWER;
1943
            /* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */
1944
0
          }
1945
0
        } else {
1946
0
          ret = PSL_ERR_TO_UTF8;
1947
          /* fprintf(stderr, "Failed to convert '%s' string into '%s' (%d)\n", src_encoding, dst_encoding, errno); */
1948
0
        }
1949
1950
0
        free(dst);
1951
0
        iconv_close(cd);
1952
0
      } else {
1953
0
        ret = PSL_ERR_TO_UTF8;
1954
        /* fprintf(stderr, "Failed to prepare encoding '%s' into '%s' (%d)\n", src_encoding, dst_encoding, errno); */
1955
0
      }
1956
0
    } else {
1957
      /* we need a conversion to lowercase */
1958
0
      uint8_t *tmp;
1959
1960
      /* start size for u8_tolower internal memory allocation.
1961
       * u8_tolower() does not terminate the result string, so include terminating 0 byte in len. */
1962
0
      size_t len = u8_strlen((uint8_t *)str) + 1;
1963
1964
0
      if ((tmp = u8_tolower((uint8_t *)str, len, 0, UNINORM_NFKC, NULL, &len))) {
1965
0
        ret = PSL_SUCCESS;
1966
0
        if (lower) {
1967
0
          *lower = (char*)tmp;
1968
0
          tmp = NULL;
1969
0
        } else
1970
0
          free(tmp);
1971
0
      } else {
1972
0
        ret = PSL_ERR_TO_LOWER;
1973
        /* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */
1974
0
      }
1975
0
    }
1976
1977
0
  } while (0);
1978
0
#endif
1979
1980
0
  return ret;
1981
0
}
1982
1983
/* if file is newer than the builtin data, insert it reverse sorted by mtime */
1984
static int insert_file(const char *fname, const char **psl_fname, time_t *psl_mtime, int n)
1985
0
{
1986
0
  struct stat st;
1987
0
  int it;
1988
1989
0
  if (fname && *fname && stat(fname, &st) == 0 && st.st_mtime > _psl_file_time) {
1990
    /* add file name and mtime to end of array */
1991
0
    psl_fname[n] = fname;
1992
0
    psl_mtime[n++] = st.st_mtime;
1993
1994
    /* move the new entry to it's correct position */
1995
0
    for (it = n - 2; it >= 0 && st.st_mtime > psl_mtime[it]; it--) {
1996
0
      psl_fname[it + 1] = psl_fname[it];
1997
0
      psl_mtime[it + 1] = psl_mtime[it];
1998
0
      psl_fname[it] = fname;
1999
0
      psl_mtime[it] = st.st_mtime;
2000
0
    }
2001
0
  }
2002
2003
0
  return n;
2004
0
}
2005
2006
/**
2007
 * psl_latest:
2008
 * @fname: Name of PSL file or %NULL
2009
 *
2010
 * This function loads the the latest available PSL data from either
2011
 * - @fname (application specific filename, may be %NULL)
2012
 * - location specified during built-time (filename from ./configure --with-psl-distfile)
2013
 * - built-in PSL data (generated from ./configure --with-psl-file)
2014
 * - location of built-in data (filename from ./configure --with-psl-file)
2015
 *
2016
 * If none of the above is available, the function returns %NULL.
2017
 *
2018
 * To free the allocated resources, call psl_free().
2019
 *
2020
 * Returns: Pointer to a PSL context or %NULL on failure.
2021
 *
2022
 * Since: 0.16
2023
 */
2024
psl_ctx_t *psl_latest(const char *fname)
2025
0
{
2026
0
  psl_ctx_t *psl;
2027
0
  const char *psl_fname[3];
2028
0
  time_t psl_mtime[3];
2029
0
  int it, ntimes;
2030
2031
0
  psl_fname[0] = NULL; /* silence gcc 6.2 false warning */
2032
2033
  /* create array of PSL files reverse sorted by mtime (latest first) */
2034
0
  ntimes = insert_file(fname, psl_fname, psl_mtime, 0);
2035
0
  ntimes = insert_file(_psl_dist_filename, psl_fname, psl_mtime, ntimes);
2036
0
  ntimes = insert_file(_psl_filename, psl_fname, psl_mtime, ntimes);
2037
2038
  /* load PSL data from the latest file, falling back to the second recent, ... */
2039
0
  for (psl = NULL, it = 0; it < ntimes; it++) {
2040
0
    if (psl_mtime[it] > _psl_file_time)
2041
0
      if ((psl = psl_load_file(psl_fname[it])))
2042
0
        break;
2043
0
  }
2044
2045
  /* if file loading failed or there is no file newer than the builtin data,
2046
   * then return the builtin data. */
2047
0
  return psl ? psl : (psl_ctx_t *) psl_builtin();
2048
0
}