Coverage Report

Created: 2023-03-26 07:33

/src/libpsl/src/psl.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright(c) 2014-2022 Tim Ruehsen
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice shall be included in
12
 * all copies or substantial portions of the Software.
13
 *
14
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20
 * DEALINGS IN THE SOFTWARE.
21
 *
22
 * This file is part of libpsl.
23
 *
24
 * Public Suffix List routines
25
 *
26
 * Changelog
27
 * 19.03.2014  Tim Ruehsen  created from libmget/cookie.c
28
 *
29
 */
30
31
#if HAVE_CONFIG_H
32
# include <config.h>
33
#endif
34
35
#if defined(__GNUC__) && defined(__GNUC_MINOR__)
36
#       define GCC_VERSION_AT_LEAST(major, minor) ((__GNUC__ > (major)) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
37
#else
38
#       define GCC_VERSION_AT_LEAST(major, minor) 0
39
#endif
40
41
#if GCC_VERSION_AT_LEAST(2,95)
42
#  define PSL_UNUSED __attribute__ ((unused))
43
#else
44
#  define PSL_UNUSED
45
#endif
46
47
#include <sys/types.h>
48
#include <sys/stat.h>
49
50
#ifdef _WIN32
51
# include <winsock2.h>
52
# include <ws2tcpip.h>
53
#else
54
# include <sys/socket.h>
55
# include <netinet/in.h>
56
# include <unistd.h>
57
#endif
58
59
#if defined(_MSC_VER) && ! defined(ssize_t)
60
# include <basetsd.h>
61
typedef SSIZE_T ssize_t;
62
#endif
63
64
#include <stdio.h>
65
#include <stdlib.h>
66
#include <string.h>
67
#ifdef HAVE_STRINGS_H
68
# include <strings.h>
69
#endif
70
#include <ctype.h>
71
#include <time.h>
72
#include <errno.h>
73
#include <limits.h> /* for UINT_MAX */
74
75
#ifdef HAVE_NL_LANGINFO
76
# include <langinfo.h>
77
#endif
78
79
#ifndef _WIN32
80
# include <arpa/inet.h>
81
#endif
82
83
#ifdef HAVE_ALLOCA_H
84
# include <alloca.h>
85
#endif
86
87
#ifdef WITH_LIBICU
88
# include <unicode/uversion.h>
89
# include <unicode/ustring.h>
90
# include <unicode/uidna.h>
91
# include <unicode/ucnv.h>
92
#elif defined(WITH_LIBIDN2)
93
# include <iconv.h>
94
# include <idn2.h>
95
# include <unicase.h>
96
# include <unistr.h>
97
#elif defined(WITH_LIBIDN)
98
# include <iconv.h>
99
# include <stringprep.h>
100
# include <idna.h>
101
# include <unicase.h>
102
# include <unistr.h>
103
#endif
104
105
#ifndef WINICONV_CONST
106
#  define WINICONV_CONST
107
#endif
108
109
#include <libpsl.h>
110
111
/**
112
 * SECTION:libpsl
113
 * @short_description: Public Suffix List library functions
114
 * @title: libpsl
115
 * @stability: Stable
116
 * @include: libpsl.h
117
 *
118
 * [Public Suffix List](https://publicsuffix.org/) library functions.
119
 *
120
 */
121
122
#define countof(a) (sizeof(a)/sizeof(*(a)))
123
124
0
#define PRIV_PSL_FLAG_EXCEPTION (1<<0)
125
0
#define PRIV_PSL_FLAG_WILDCARD  (1<<1)
126
0
#define PRIV_PSL_FLAG_ICANN     (1<<2) /* entry of ICANN section */
127
0
#define PRIV_PSL_FLAG_PRIVATE   (1<<3) /* entry of PRIVATE section */
128
0
#define PRIV_PSL_FLAG_PLAIN     (1<<4) /* just used for PSL syntax checking */
129
130
typedef struct {
131
  char
132
    label_buf[128];
133
  const char *
134
    label;
135
  unsigned short
136
    length;
137
  unsigned char
138
    nlabels, /* number of labels */
139
    flags;
140
} psl_entry_t;
141
142
/* stripped down version libmget vector routines */
143
typedef struct {
144
  int
145
    (*cmp)(const psl_entry_t **, const psl_entry_t **); /* comparison function */
146
  psl_entry_t
147
    **entry; /* pointer to array of pointers to elements */
148
  int
149
    max,     /* allocated elements */
150
    cur;     /* number of elements in use */
151
} psl_vector_t;
152
153
struct psl_ctx_st {
154
  psl_vector_t
155
    *suffixes;
156
  unsigned char
157
    *dafsa;
158
  size_t
159
    dafsa_size;
160
  int
161
    nsuffixes,
162
    nexceptions,
163
    nwildcards;
164
  unsigned
165
    utf8 : 1; /* 1: data contains UTF-8 + punycode encoded rules */
166
};
167
168
/* include the PSL data generated by psl-make-dafsa */
169
#ifdef ENABLE_BUILTIN
170
#include "suffixes_dafsa.h"
171
#else
172
static const unsigned char kDafsa[] = "";
173
static time_t _psl_file_time = 0;
174
static int _psl_nsuffixes = 0;
175
static int _psl_nexceptions = 0;
176
static int _psl_nwildcards = 0;
177
static const char _psl_sha1_checksum[] = "";
178
static const char _psl_filename[] = "";
179
#endif
180
181
/* references to these PSLs will result in lookups to built-in data */
182
static const psl_ctx_t
183
  builtin_psl;
184
185
#ifdef PSL_DISTFILE
186
static const char _psl_dist_filename[] = PSL_DISTFILE;
187
#else
188
static const char _psl_dist_filename[] = "";
189
#endif
190
191
static psl_vector_t *vector_alloc(int max, int (*cmp)(const psl_entry_t **, const psl_entry_t **))
192
0
{
193
0
  psl_vector_t *v;
194
195
0
  if (!(v = calloc(1, sizeof(psl_vector_t))))
196
0
    return NULL;
197
198
0
  if (!(v->entry = malloc(max * sizeof(psl_entry_t *)))) {
199
0
    free(v);
200
0
    return NULL;
201
0
  }
202
203
0
  v->max = max;
204
0
  v->cmp = cmp;
205
0
  return v;
206
0
}
207
208
static void vector_free(psl_vector_t **v)
209
0
{
210
0
  if (v && *v) {
211
0
    if ((*v)->entry) {
212
0
      int it;
213
214
0
      for (it = 0; it < (*v)->cur; it++)
215
0
        free((*v)->entry[it]);
216
217
0
      free((*v)->entry);
218
0
    }
219
0
    free(*v);
220
0
  }
221
0
}
222
223
static psl_entry_t *vector_get(const psl_vector_t *v, int pos)
224
0
{
225
0
  if (pos < 0 || !v || pos >= v->cur) return NULL;
226
227
0
  return v->entry[pos];
228
0
}
229
230
/* the entries must be sorted by */
231
static int vector_find(const psl_vector_t *v, const psl_entry_t *elem)
232
0
{
233
0
  if (v) {
234
0
    int l, r, m;
235
0
    int res;
236
237
    /* binary search for element (exact match) */
238
0
    for (l = 0, r = v->cur - 1; l <= r;) {
239
0
      m = (l + r) / 2;
240
0
      if ((res = v->cmp(&elem, (const psl_entry_t **)&(v->entry[m]))) > 0) l = m + 1;
241
0
      else if (res < 0) r = m - 1;
242
0
      else return m;
243
0
    }
244
0
  }
245
246
0
  return -1; /* not found */
247
0
}
248
249
static int vector_add(psl_vector_t *v, const psl_entry_t *elem)
250
0
{
251
0
  if (v) {
252
0
    void *elemp;
253
254
0
    if (!(elemp = malloc(sizeof(psl_entry_t))))
255
0
      return -1;
256
257
0
    memcpy(elemp, elem, sizeof(psl_entry_t));
258
259
0
    if (v->max == v->cur) {
260
0
      void *m = realloc(v->entry, (v->max *= 2) * sizeof(psl_entry_t *));
261
262
0
      if (m)
263
0
        v->entry = m;
264
0
      else {
265
0
        free(elemp);
266
0
        return -1;
267
0
      }
268
0
    }
269
270
0
    v->entry[v->cur++] = elemp;
271
0
    return v->cur - 1;
272
0
  }
273
274
0
  return -1;
275
0
}
276
277
static void vector_sort(psl_vector_t *v)
278
0
{
279
0
  if (v && v->cmp)
280
0
    qsort(v->entry, v->cur, sizeof(psl_vector_t **), (int(*)(const void *, const void *))v->cmp);
281
0
}
282
283
/* by this kind of sorting, we can easily see if a domain matches or not */
284
static int suffix_compare(const psl_entry_t *s1, const psl_entry_t *s2)
285
0
{
286
0
  int n;
287
288
0
  if ((n = s2->nlabels - s1->nlabels))
289
0
    return n; /* most labels first */
290
291
0
  if ((n = s1->length - s2->length))
292
0
    return n;  /* shorter rules first */
293
294
0
  return strcmp(s1->label ? s1->label : s1->label_buf, s2->label ? s2->label : s2->label_buf);
295
0
}
296
297
/* needed to sort array of pointers, given to qsort() */
298
static int suffix_compare_array(const psl_entry_t **s1, const psl_entry_t **s2)
299
0
{
300
0
  return suffix_compare(*s1, *s2);
301
0
}
302
303
static int suffix_init(psl_entry_t *suffix, const char *rule, size_t length)
304
0
{
305
0
  const char *src;
306
0
  char *dst;
307
308
0
  suffix->label = suffix->label_buf;
309
310
0
  if (length >= sizeof(suffix->label_buf) - 1) {
311
0
    suffix->nlabels = 0;
312
    /* fprintf(stderr, "Suffix rule too long (%zd, ignored): %s\n", length, rule); */
313
0
    return -1;
314
0
  }
315
316
0
  suffix->length = (unsigned char)length;
317
318
0
  suffix->nlabels = 1;
319
320
0
  for (dst = suffix->label_buf, src = rule; *src;) {
321
0
    if (*src == '.')
322
0
      suffix->nlabels++;
323
0
    *dst++ = *src++;
324
0
  }
325
0
  *dst = 0;
326
327
0
  return 0;
328
0
}
329
330
#if !defined(WITH_LIBIDN) && !defined(WITH_LIBIDN2) && !defined(WITH_LIBICU)
331
/*
332
 * When configured without runtime IDNA support (./configure --disable-runtime), we need a pure ASCII
333
 * representation of non-ASCII characters in labels as found in UTF-8 domain names.
334
 * This is because the current DAFSA format used may only hold character values [21..127].
335
 *
336
  Code copied from http://www.nicemice.net/idn/punycode-spec.gz on
337
  2011-01-04 with SHA-1 a966a8017f6be579d74a50a226accc7607c40133
338
  labeled punycode-spec 1.0.3 (2006-Mar-24-Thu).  It is modified for
339
  libpsl by Tim Rühsen.  License on the original code:
340
341
  punycode-spec 1.0.3 (2006-Mar-23-Thu)
342
  http://www.nicemice.net/idn/
343
  Adam M. Costello
344
  http://www.nicemice.net/amc/
345
346
  B. Disclaimer and license
347
348
    Regarding this entire document or any portion of it (including
349
    the pseudocode and C code), the author makes no guarantees and
350
    is not responsible for any damage resulting from its use.  The
351
    author grants irrevocable permission to anyone to use, modify,
352
    and distribute it in any way that does not diminish the rights
353
    of anyone else to use, modify, and distribute it, provided that
354
    redistributed derivative works do not contain misleading author or
355
    version information.  Derivative works need not be licensed under
356
    similar terms.
357
358
  C. Punycode sample implementation
359
360
  punycode-sample.c 2.0.0 (2004-Mar-21-Sun)
361
  http://www.nicemice.net/idn/
362
  Adam M. Costello
363
  http://www.nicemice.net/amc/
364
365
  This is ANSI C code (C89) implementing Punycode 1.0.x.
366
 */
367
enum punycode_status {
368
  punycode_success = 0,
369
  punycode_bad_input = 1, /* Input is invalid.                       */
370
  punycode_big_output = 2, /* Output would exceed the space provided. */
371
  punycode_overflow = 3 /* Wider integers needed to process input. */
372
};
373
374
#ifdef PUNYCODE_UINT
375
  typedef PUNYCODE_UINT punycode_uint;
376
#elif UINT_MAX >= (1 << 26) - 1
377
  typedef unsigned int punycode_uint;
378
#else
379
  typedef unsigned long punycode_uint;
380
#endif
381
382
/*** Bootstring parameters for Punycode ***/
383
enum {
384
  base = 36, tmin = 1, tmax = 26, skew = 38, damp = 700,
385
  initial_bias = 72, initial_n = 0x80, delimiter = 0x2D
386
};
387
388
static char encode_digit(punycode_uint d)
389
{
390
  return d + 22 + 75 * (d < 26);
391
  /*  0..25 map to ASCII a..z or A..Z */
392
  /* 26..35 map to ASCII 0..9         */
393
}
394
#define flagged(bcp) ((punycode_uint)(bcp) - 65 < 26)
395
static const punycode_uint maxint = -1;
396
397
static punycode_uint adapt(punycode_uint delta, punycode_uint numpoints, int firsttime)
398
{
399
  punycode_uint k;
400
401
  delta = firsttime ? delta / damp : delta >> 1;
402
  /* delta >> 1 is a faster way of doing delta / 2 */
403
  delta += delta / numpoints;
404
405
  for (k = 0; delta > ((base - tmin) * tmax) / 2; k += base) {
406
    delta /= base - tmin;
407
  }
408
409
  return k + (base - tmin + 1) * delta / (delta + skew);
410
}
411
412
static enum punycode_status punycode_encode(
413
  size_t input_length_orig,
414
  const punycode_uint input[],
415
  size_t *output_length,
416
  char output[])
417
{
418
  punycode_uint input_length, n, delta, h, b, bias, j, m, q, k, t;
419
  size_t out, max_out;
420
421
  /* The Punycode spec assumes that the input length is the same type */
422
  /* of integer as a code point, so we need to convert the size_t to  */
423
  /* a punycode_uint, which could overflow.                           */
424
425
  if (input_length_orig > maxint)
426
    return punycode_overflow;
427
428
  input_length = (punycode_uint) input_length_orig;
429
430
  /* Initialize the state: */
431
432
  n = initial_n;
433
  delta = 0;
434
  out = 0;
435
  max_out = *output_length;
436
  bias = initial_bias;
437
438
  /* Handle the basic code points: */
439
  for (j = 0; j < input_length; ++j) {
440
    if (input[j] < 0x80) {
441
      if (max_out - out < 2)
442
        return punycode_big_output;
443
      output[out++] = (char) input[j];
444
    }
445
    /* else if (input[j] < n) return punycode_bad_input; */
446
    /* (not needed for Punycode with unsigned code points) */
447
  }
448
449
  h = b = (punycode_uint) out;
450
  /* cannot overflow because out <= input_length <= maxint */
451
452
  /* h is the number of code points that have been handled, b is the  */
453
  /* number of basic code points, and out is the number of ASCII code */
454
  /* points that have been output.                                    */
455
456
  if (b > 0)
457
    output[out++] = delimiter;
458
459
  /* Main encoding loop: */
460
461
  while (h < input_length) {
462
    /* All non-basic code points < n have been     */
463
    /* handled already.  Find the next larger one: */
464
465
    for (m = maxint, j = 0; j < input_length; ++j) {
466
      /* if (basic(input[j])) continue; */
467
      /* (not needed for Punycode) */
468
      if (input[j] >= n && input[j] < m)
469
        m = input[j];
470
    }
471
472
    /* Increase delta enough to advance the decoder's    */
473
    /* <n,i> state to <m,0>, but guard against overflow: */
474
475
    if (m - n > (maxint - delta) / (h + 1))
476
      return punycode_overflow;
477
    delta += (m - n) * (h + 1);
478
    n = m;
479
480
    for (j = 0; j < input_length; ++j) {
481
      /* Punycode does not need to check whether input[j] is basic: */
482
      if (input[j] < n /* || basic(input[j]) */) {
483
        if (++delta == 0)
484
          return punycode_overflow;
485
      }
486
487
      if (input[j] == n) {
488
        /* Represent delta as a generalized variable-length integer: */
489
490
        for (q = delta, k = base;; k += base) {
491
          if (out >= max_out)
492
            return punycode_big_output;
493
          t = k <= bias /* + tmin */ ? tmin : /* +tmin not needed */
494
            k >= bias + tmax ? tmax : k - bias;
495
          if (q < t)
496
            break;
497
          output[out++] = encode_digit(t + (q - t) % (base - t));
498
          q = (q - t) / (base - t);
499
        }
500
501
        output[out++] = encode_digit(q);
502
        bias = adapt(delta, h + 1, h == b);
503
        delta = 0;
504
        ++h;
505
      }
506
    }
507
508
    ++delta, ++n;
509
  }
510
511
  *output_length = out;
512
  return punycode_success;
513
}
514
515
static ssize_t utf8_to_utf32(const char *in, size_t inlen, punycode_uint *out, size_t outlen)
516
{
517
  size_t n = 0;
518
  const unsigned char *s = (void *)in;
519
  const unsigned char *e = (void *)(in + inlen);
520
521
  if (!outlen)
522
    return -1;
523
524
  outlen--;
525
526
  while (n < outlen) {
527
    size_t inleft = e - s;
528
529
    if (inleft >= 1 && (*s & 0x80) == 0) { /* 0xxxxxxx ASCII char */
530
      out[n++] = *s;
531
      s++;
532
    } else if (inleft >= 2 && (*s & 0xE0) == 0xC0) /* 110xxxxx 10xxxxxx */ {
533
      if ((s[1] & 0xC0) != 0x80)
534
        return -1;
535
      out[n++] = ((*s & 0x1F) << 6) | (s[1] & 0x3F);
536
      s += 2;
537
    } else if (inleft >= 3 && (*s & 0xF0) == 0xE0) /* 1110xxxx 10xxxxxx 10xxxxxx */ {
538
      if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80)
539
        return -1;
540
      out[n++] = ((*s & 0x0F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
541
      s += 3;
542
    } else if (inleft >= 4 && (*s & 0xF8) == 0xF0) /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ {
543
      if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] & 0xC0) != 0x80)
544
        return -1;
545
      out[n++] = ((*s & 0x07) << 18) | ((s[1] & 0x3F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
546
      s += 4;
547
    } else if (!inleft) {
548
      break;
549
    } else
550
      return -1;
551
  }
552
553
  return n;
554
}
555
556
static int mem_is_ascii(const char *s, size_t n)
557
{
558
  for (; n; n--) /* 'while(n--)' generates unsigned integer overflow on n = 0 */
559
    if (*((unsigned char *)s++) >= 128)
560
      return 0;
561
562
  return 1;
563
}
564
565
static int domain_to_punycode(const char *domain, char *out, size_t outsize)
566
{
567
  size_t outlen = 0, labellen;
568
  punycode_uint input[256];
569
  const char *label, *e;
570
571
  for (e = label = domain; e;) {
572
    e = strchr(label, '.');
573
    labellen = e ? (size_t) (e - label) : strlen(label);
574
575
    if (mem_is_ascii(label, labellen)) {
576
      if (outlen + labellen + (e != NULL) >= outsize)
577
        return 1;
578
579
      memcpy(out + outlen, label, labellen);
580
      outlen += labellen;
581
    } else {
582
      ssize_t inputlen = 0;
583
584
      if (outlen + labellen + (e != NULL) + 4 >= outsize)
585
        return 1;
586
587
      if ((inputlen = utf8_to_utf32(label, labellen, input, countof(input))) < 0)
588
        return 1;
589
590
      memcpy(out + outlen, "xn--", 4);
591
      outlen += 4;
592
593
      labellen = outsize - outlen - (e != NULL) - 1; // -1 to leave space for the trailing \0
594
      if (punycode_encode(inputlen, input, &labellen, out + outlen))
595
        return 1;
596
      outlen += labellen;
597
    }
598
599
    if (e) {
600
      label = e + 1;
601
      out[outlen++] = '.';
602
    }
603
    out[outlen] = 0;
604
  }
605
606
  return 0;
607
}
608
#endif
609
610
static int isspace_ascii(const char c)
611
0
{
612
0
  return c == ' ' || c == '\t' || c == '\r' || c == '\n';
613
0
}
614
615
static int str_is_ascii(const char *s)
616
0
{
617
0
  while (*s && *((unsigned char *)s) < 128) s++;
618
619
0
  return !*s;
620
0
}
621
622
#if defined(WITH_LIBIDN)
623
/*
624
 * Work around a libidn <= 1.30 vulnerability.
625
 *
626
 * The function checks for a valid UTF-8 character sequence before
627
 * passing it to idna_to_ascii_8z().
628
 *
629
 * [1] https://lists.gnu.org/archive/html/help-libidn/2015-05/msg00002.html
630
 * [2] https://lists.gnu.org/archive/html/bug-wget/2015-06/msg00002.html
631
 * [3] https://curl.haxx.se/mail/lib-2015-06/0143.html
632
 */
633
static int utf8_is_valid(const char *utf8)
634
{
635
  const unsigned char *s = (const unsigned char *) utf8;
636
637
  while (*s) {
638
    if ((*s & 0x80) == 0) /* 0xxxxxxx ASCII char */
639
      s++;
640
    else if ((*s & 0xE0) == 0xC0) /* 110xxxxx 10xxxxxx */ {
641
      if ((s[1] & 0xC0) != 0x80)
642
        return 0;
643
      s += 2;
644
    } else if ((*s & 0xF0) == 0xE0) /* 1110xxxx 10xxxxxx 10xxxxxx */ {
645
      if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80)
646
        return 0;
647
      s += 3;
648
    } else if ((*s & 0xF8) == 0xF0) /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ {
649
      if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] & 0xC0) != 0x80)
650
        return 0;
651
      s += 4;
652
    } else
653
      return 0;
654
  }
655
656
  return 1;
657
}
658
#endif
659
660
typedef void *psl_idna_t;
661
662
static psl_idna_t *psl_idna_open(void)
663
0
{
664
#if defined(WITH_LIBICU)
665
  UErrorCode status = 0;
666
  return (void *)uidna_openUTS46(UIDNA_USE_STD3_RULES | UIDNA_NONTRANSITIONAL_TO_ASCII, &status);
667
#endif
668
0
  return NULL;
669
0
}
670
671
static void psl_idna_close(psl_idna_t *idna PSL_UNUSED)
672
0
{
673
#if defined(WITH_LIBICU)
674
  if (idna)
675
    uidna_close((UIDNA *)idna);
676
#endif
677
0
}
678
679
static int psl_idna_toASCII(psl_idna_t *idna PSL_UNUSED, const char *utf8, char **ascii)
680
0
{
681
0
  int ret = -1;
682
683
#if defined(WITH_LIBICU)
684
  /* IDNA2008 UTS#46 punycode conversion */
685
  if (idna) {
686
    char lookupname_buf[128] = "", *lookupname = lookupname_buf;
687
    UErrorCode status = 0;
688
    UIDNAInfo info = UIDNA_INFO_INITIALIZER;
689
    UChar utf16_dst[128], utf16_src_buf[128];
690
    UChar *utf16_src = utf16_src_buf;
691
    int32_t utf16_src_length, bytes_written;
692
    int32_t utf16_dst_length;
693
694
    u_strFromUTF8(utf16_src, countof(utf16_src_buf), &utf16_src_length, utf8, -1, &status);
695
    if (!U_SUCCESS(status)) goto cleanup; /* UTF-8 to UTF-16 conversion failed */
696
697
    if (utf16_src_length >= (int) countof(utf16_src_buf)) {
698
      utf16_src = malloc((utf16_src_length + 1) * sizeof(UChar));
699
      if (!utf16_src) goto cleanup;
700
701
      u_strFromUTF8(utf16_src, utf16_src_length, NULL, utf8, -1, &status);
702
      if (!U_SUCCESS(status)) goto cleanup; /* UTF-8 to UTF-16 conversion failed */
703
704
      utf16_src[utf16_src_length] = 0; /* u_strFromUTF8() doesn't 0-terminate if dest is filled up */
705
    }
706
707
    utf16_dst_length = uidna_nameToASCII((UIDNA *)idna, utf16_src, utf16_src_length, utf16_dst, countof(utf16_dst), &info, &status);
708
    if (!U_SUCCESS(status)) goto cleanup; /* to ASCII conversion failed */
709
710
    u_strToUTF8(lookupname, sizeof(lookupname_buf), &bytes_written, utf16_dst, utf16_dst_length, &status);
711
    if (!U_SUCCESS(status)) goto cleanup; /* UTF-16 to UTF-8 conversion failed */
712
713
    if (bytes_written >= (int) sizeof(lookupname_buf)) {
714
      lookupname = malloc(bytes_written + 1);
715
      if (!lookupname) goto cleanup;
716
717
      u_strToUTF8(lookupname, bytes_written, NULL, utf16_dst, utf16_dst_length, &status);
718
      if (!U_SUCCESS(status)) goto cleanup; /* UTF-16 to UTF-8 conversion failed */
719
720
      lookupname[bytes_written] = 0; /* u_strToUTF8() doesn't 0-terminate if dest is filled up */
721
    } else {
722
      if (!(lookupname = strdup(lookupname)))
723
        goto cleanup;
724
    }
725
726
    if (ascii) {
727
      *ascii = lookupname;
728
      lookupname = NULL;
729
    }
730
731
    ret = 0;
732
733
cleanup:
734
    if (lookupname != lookupname_buf)
735
      free(lookupname);
736
    if (utf16_src != utf16_src_buf)
737
      free(utf16_src);
738
  }
739
#elif defined(WITH_LIBIDN2)
740
0
#if IDN2_VERSION_NUMBER >= 0x00140000
741
0
  int rc;
742
743
  /* IDN2_TRANSITIONAL automatically converts to lowercase
744
   * IDN2_NFC_INPUT converts to NFC before toASCII conversion
745
   * Since IDN2_TRANSITIONAL implicitly does NFC conversion, we don't need
746
   * the additional IDN2_NFC_INPUT. But just for the unlikely case that the linked
747
   * library is not matching the headers when building and it doesn't support TR46,
748
   * we provide IDN2_NFC_INPUT. */
749
750
0
  if ((rc = idn2_lookup_u8((uint8_t *)utf8, (uint8_t **)ascii, IDN2_NFC_INPUT | IDN2_NONTRANSITIONAL)) == IDN2_OK)
751
0
    ret = 0;
752
  /* else
753
    fprintf(stderr, "toASCII(%s) failed (%d): %s\n", lower, rc, idn2_strerror(rc)); */
754
#else
755
  int rc;
756
  uint8_t *lower;
757
  size_t len = u8_strlen((uint8_t *)utf8) + 1;
758
759
  /* we need a conversion to lowercase */
760
  if (!(lower = u8_tolower((uint8_t *)utf8, len, 0, UNINORM_NFKC, NULL, &len))) {
761
    /* fprintf(stderr, "u8_tolower(%s) failed (%d)\n", utf8, errno); */
762
    return -1;
763
  }
764
765
  if ((rc = idn2_lookup_u8(lower, (uint8_t **)ascii, 0)) == IDN2_OK) {
766
    ret = 0;
767
  } /* else
768
    fprintf(stderr, "toASCII(%s) failed (%d): %s\n", lower, rc, idn2_strerror(rc)); */
769
770
  free(lower);
771
#endif
772
#elif defined(WITH_LIBIDN)
773
  int rc;
774
775
  if (!utf8_is_valid(utf8)) {
776
    /* fprintf(stderr, "Invalid UTF-8 sequence not converted: '%s'\n", utf8); */
777
    return -1;
778
  }
779
780
  /* idna_to_ascii_8z() automatically converts UTF-8 to lowercase */
781
782
  if ((rc = idna_to_ascii_8z(utf8, ascii, IDNA_USE_STD3_ASCII_RULES)) == IDNA_SUCCESS) {
783
    ret = 0;
784
  } /* else
785
    fprintf(stderr, "toASCII failed (%d): %s\n", rc, idna_strerror(rc)); */
786
#else
787
  char lookupname[128];
788
789
  if (domain_to_punycode(utf8, lookupname, sizeof(lookupname)) == 0) {
790
    if (ascii)
791
      if ((*ascii = strdup(lookupname)))
792
        ret = 0;
793
  }
794
#endif
795
796
0
  return ret;
797
0
}
798
799
static void add_punycode_if_needed(psl_idna_t *idna, psl_vector_t *v, psl_entry_t *e)
800
0
{
801
0
  char *lookupname;
802
803
0
  if (str_is_ascii(e->label_buf))
804
0
    return;
805
806
0
  if (psl_idna_toASCII(idna, e->label_buf, &lookupname) == 0) {
807
0
    if (strcmp(e->label_buf, lookupname)) {
808
0
      psl_entry_t suffix, *suffixp;
809
810
      /* fprintf(stderr, "toASCII '%s' -> '%s'\n", e->label_buf, lookupname); */
811
0
      if (suffix_init(&suffix, lookupname, strlen(lookupname)) == 0) {
812
0
        suffix.flags = e->flags;
813
0
        if ((suffixp = vector_get(v, vector_add(v, &suffix))))
814
0
          suffixp->label = suffixp->label_buf; /* set label to changed address */
815
0
      }
816
0
    } /* else ignore */
817
818
0
    free(lookupname);
819
0
  }
820
0
}
821
822
/* prototypes */
823
int LookupStringInFixedSet(const unsigned char* graph, size_t length, const char* key, size_t key_length);
824
int GetUtfMode(const unsigned char *graph, size_t length);
825
826
static int is_public_suffix(const psl_ctx_t *psl, const char *domain, int type)
827
0
{
828
0
  psl_entry_t suffix;
829
0
  const char *p;
830
0
  char *punycode = NULL;
831
0
  int need_conversion = 0;
832
833
  /* this function should be called without leading dots, just make sure */
834
0
  if (*domain == '.')
835
0
    domain++;
836
837
0
  suffix.nlabels = 1;
838
839
0
  for (p = domain; *p; p++) {
840
0
    if (*p == '.') {
841
0
      if (suffix.nlabels == 255) // weird input, avoid 8bit overflow
842
0
        return 0;
843
0
      suffix.nlabels++;
844
0
    }
845
0
    else if (*((unsigned char *)p) >= 128)
846
0
      need_conversion = 1; /* in case domain is non-ascii we need a toASCII conversion */
847
0
  }
848
849
0
  if (suffix.nlabels == 1) {
850
    /* TLD, this is the prevailing '*' match. If type excludes the '*' rule, continue.
851
     */
852
0
    if (!(type & PSL_TYPE_NO_STAR_RULE))
853
0
      return 1;
854
0
  }
855
856
0
  type &= ~PSL_TYPE_NO_STAR_RULE;
857
858
0
  if (psl->utf8 || psl == &builtin_psl)
859
0
    need_conversion = 0;
860
861
0
  if (need_conversion) {
862
0
    psl_idna_t *idna = psl_idna_open();
863
864
0
    if (psl_idna_toASCII(idna, domain, &punycode) == 0) {
865
0
      suffix.label = punycode;
866
0
      suffix.length = strlen(punycode);
867
0
    } else {
868
      /* fallback */
869
870
0
      suffix.label = domain;
871
0
      suffix.length = p - suffix.label;
872
0
    }
873
874
0
    psl_idna_close(idna);
875
0
  } else {
876
0
    suffix.label = domain;
877
0
    suffix.length = p - suffix.label;
878
0
  }
879
880
0
  if (psl == &builtin_psl || psl->dafsa) {
881
0
    size_t dafsa_size = psl == &builtin_psl ? sizeof(kDafsa) : psl->dafsa_size;
882
0
    const unsigned char *dafsa = psl == &builtin_psl ? kDafsa : psl->dafsa;
883
0
    int rc = LookupStringInFixedSet(dafsa, dafsa_size, suffix.label, suffix.length);
884
0
    if (rc != -1) {
885
      /* check for correct rule type */
886
0
      if (type == PSL_TYPE_ICANN && !(rc & PRIV_PSL_FLAG_ICANN))
887
0
        goto suffix_no;
888
0
      else if (type == PSL_TYPE_PRIVATE && !(rc & PRIV_PSL_FLAG_PRIVATE))
889
0
        goto suffix_no;
890
891
0
      if (rc & PRIV_PSL_FLAG_EXCEPTION)
892
0
        goto suffix_no;
893
894
      /* wildcard *.foo.bar implicitly make foo.bar a public suffix */
895
      /* definitely a match, no matter if the found rule is a wildcard or not */
896
0
      goto suffix_yes;
897
0
    }
898
0
    if ((suffix.label = strchr(suffix.label, '.'))) {
899
0
      suffix.label++;
900
0
      suffix.length = strlen(suffix.label);
901
0
      suffix.nlabels--;
902
903
0
      rc = LookupStringInFixedSet(dafsa, dafsa_size, suffix.label, suffix.length);
904
0
      if (rc != -1) {
905
        /* check for correct rule type */
906
0
        if (type == PSL_TYPE_ICANN && !(rc & PRIV_PSL_FLAG_ICANN))
907
0
          goto suffix_no;
908
0
        else if (type == PSL_TYPE_PRIVATE && !(rc & PRIV_PSL_FLAG_PRIVATE))
909
0
          goto suffix_no;
910
911
0
        if (rc & PRIV_PSL_FLAG_WILDCARD)
912
0
          goto suffix_yes;
913
0
      }
914
0
    }
915
0
  } else {
916
0
    psl_entry_t *rule = vector_get(psl->suffixes, 0);
917
918
0
    if (!rule || rule->nlabels < suffix.nlabels - 1)
919
0
      goto suffix_no;
920
921
0
    rule = vector_get(psl->suffixes, vector_find(psl->suffixes, &suffix));
922
923
0
    if (rule) {
924
      /* check for correct rule type */
925
0
      if (type == PSL_TYPE_ICANN && !(rule->flags & PRIV_PSL_FLAG_ICANN))
926
0
        goto suffix_no;
927
0
      else if (type == PSL_TYPE_PRIVATE && !(rule->flags & PRIV_PSL_FLAG_PRIVATE))
928
0
        goto suffix_no;
929
930
0
      if (rule->flags & PRIV_PSL_FLAG_EXCEPTION)
931
0
        goto suffix_no;
932
933
      /* wildcard *.foo.bar implicitly make foo.bar a public suffix */
934
      /* definitely a match, no matter if the found rule is a wildcard or not */
935
0
      goto suffix_yes;
936
0
    }
937
938
0
    if ((suffix.label = strchr(suffix.label, '.'))) {
939
0
      suffix.label++;
940
0
      suffix.length = strlen(suffix.label);
941
0
      suffix.nlabels--;
942
943
0
      rule = vector_get(psl->suffixes, vector_find(psl->suffixes, &suffix));
944
945
0
      if (rule) {
946
        /* check for correct rule type */
947
0
        if (type == PSL_TYPE_ICANN && !(rule->flags & PRIV_PSL_FLAG_ICANN))
948
0
          goto suffix_no;
949
0
        else if (type == PSL_TYPE_PRIVATE && !(rule->flags & PRIV_PSL_FLAG_PRIVATE))
950
0
          goto suffix_no;
951
952
0
        if (rule->flags & PRIV_PSL_FLAG_WILDCARD)
953
0
          goto suffix_yes;
954
0
      }
955
0
    }
956
0
  }
957
958
0
suffix_no:
959
0
  if (punycode)
960
0
    free(punycode);
961
0
  return 0;
962
963
0
suffix_yes:
964
0
  if (punycode)
965
0
    free(punycode);
966
0
  return 1;
967
0
}
968
969
/**
970
 * psl_is_public_suffix:
971
 * @psl: PSL context
972
 * @domain: Domain string
973
 *
974
 * This function checks if @domain is a public suffix by the means of the
975
 * [Mozilla Public Suffix List](https://publicsuffix.org).
976
 *
977
 * For cookie domain checking see psl_is_cookie_domain_acceptable().
978
 *
979
 * International @domain names have to be either in UTF-8 (lowercase + NFKC) or in ASCII/ACE format (punycode).
980
 * Other encodings likely result in incorrect return values.
981
 * Use helper function psl_str_to_utf8lower() for normalization @domain.
982
 *
983
 * @psl is a context returned by either psl_load_file(), psl_load_fp() or
984
 * psl_builtin().
985
 *
986
 * Returns: 1 if domain is a public suffix, 0 if not.
987
 *
988
 * Since: 0.1
989
 */
990
int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
991
0
{
992
0
  if (!psl || !domain)
993
0
    return 1;
994
995
0
  return is_public_suffix(psl, domain, PSL_TYPE_ANY);
996
0
}
997
998
/**
999
 * psl_is_public_suffix2:
1000
 * @psl: PSL context
1001
 * @domain: Domain string
1002
 * @type: Domain type
1003
 *
1004
 * This function checks if @domain is a public suffix by the means of the
1005
 * [Mozilla Public Suffix List](https://publicsuffix.org).
1006
 *
1007
 * @type specifies the PSL section where to perform the lookup. Valid values are
1008
 * %PSL_TYPE_PRIVATE, %PSL_TYPE_ICANN, %PSL_TYPE_NO_STAR_RULE, and %PSL_TYPE_ANY.
1009
 *
1010
 * %PSL_TYPE_NO_STAR_RULE switches of the 'prevailing star rule' (see
1011
 * [List](https://publicsuffix.org/list) under 'Algorithm' 2.).
1012
 * Applying the flag means that TLDs not explicitly listed in the PSL are *not* treated as public suffixes.
1013
 *
1014
 * International @domain names have to be either in UTF-8 (lowercase + NFKC) or in ASCII/ACE format (punycode).
1015
 * Other encodings likely result in incorrect return values.
1016
 * Use helper function psl_str_to_utf8lower() for normalization @domain.
1017
 *
1018
 * @psl is a context returned by either psl_load_file(), psl_load_fp() or
1019
 * psl_builtin().
1020
 *
1021
 * Returns: 1 if domain is a public suffix, 0 if not.
1022
 *
1023
 * Since: 0.1
1024
 */
1025
int psl_is_public_suffix2(const psl_ctx_t *psl, const char *domain, int type)
1026
0
{
1027
0
  if (!psl || !domain)
1028
0
    return 1;
1029
1030
0
  return is_public_suffix(psl, domain, type);
1031
0
}
1032
1033
/**
1034
 * psl_unregistrable_domain:
1035
 * @psl: PSL context
1036
 * @domain: Domain string
1037
 *
1038
 * This function finds the longest public suffix part of @domain by the means
1039
 * of the [Mozilla Public Suffix List](https://publicsuffix.org).
1040
 *
1041
 * International @domain names have to be either in UTF-8 (lowercase + NFKC) or in ASCII/ACE format (punycode).
1042
 * Other encodings likely result in incorrect return values.
1043
 * Use helper function psl_str_to_utf8lower() for normalization @domain.
1044
 *
1045
 * @psl is a context returned by either psl_load_file(), psl_load_fp() or
1046
 * psl_builtin().
1047
 *
1048
 * Returns: Pointer to longest public suffix part of @domain or %NULL if @domain
1049
 * does not contain a public suffix (or if @psl is %NULL).
1050
 *
1051
 * Since: 0.1
1052
 */
1053
const char *psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain)
1054
0
{
1055
0
  int nlabels = 0;
1056
0
  const char *p;
1057
1058
0
  if (!psl || !domain)
1059
0
    return NULL;
1060
1061
  /*
1062
   * In the main loop we introduce a O(N^2) behavior to avoid code duplication.
1063
   * To avoid nasty CPU hogging, we limit the lookup to max. 8 domain labels to the right.
1064
   */
1065
0
  for (p = domain + strlen(domain) - 1; p >= domain; p--) {
1066
0
    if (*p == '.' && ++nlabels > 8) {
1067
0
      domain = p + 1;
1068
0
      break;
1069
0
    }
1070
0
  }
1071
1072
  /*
1073
   *  We check from left to right to catch special PSL entries like 'forgot.his.name':
1074
   *   'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
1075
   */
1076
1077
0
  while (!is_public_suffix(psl, domain, 0)) {
1078
0
    if ((domain = strchr(domain, '.')))
1079
0
      domain++;
1080
0
    else
1081
0
      break; /* prevent endless loop if is_public_suffix() is broken. */
1082
0
  }
1083
1084
0
  return domain;
1085
0
}
1086
1087
/**
1088
 * psl_registrable_domain:
1089
 * @psl: PSL context
1090
 * @domain: Domain string
1091
 *
1092
 * This function finds the shortest private suffix part of @domain by the means
1093
 * of the [Mozilla Public Suffix List](https://publicsuffix.org).
1094
 *
1095
 * International @domain names have to be either in UTF-8 (lowercase + NFKC) or in ASCII/ACE format (punycode).
1096
 * Other encodings likely result in incorrect return values.
1097
 * Use helper function psl_str_to_utf8lower() for normalization @domain.
1098
 *
1099
 * @psl is a context returned by either psl_load_file(), psl_load_fp() or
1100
 * psl_builtin().
1101
 *
1102
 * Returns: Pointer to shortest private suffix part of @domain or %NULL if @domain
1103
 * does not contain a private suffix (or if @psl is %NULL).
1104
 *
1105
 * Since: 0.1
1106
 */
1107
const char *psl_registrable_domain(const psl_ctx_t *psl, const char *domain)
1108
0
{
1109
0
  const char *p, *regdom = NULL;
1110
0
  int nlabels = 0;
1111
1112
0
  if (!psl || !domain || *domain == '.')
1113
0
    return NULL;
1114
1115
  /*
1116
   * In the main loop we introduce a O(N^2) behavior to avoid code duplication.
1117
   * To avoid nasty CPU hogging, we limit the lookup to max. 8 domain labels to the right.
1118
   */
1119
0
  for (p = domain + strlen(domain) - 1; p >= domain; p--) {
1120
0
    if (*p == '.' && ++nlabels > 8) {
1121
0
      domain = p + 1;
1122
0
      break;
1123
0
    }
1124
0
  }
1125
1126
  /*
1127
   *  We check from left to right to catch special PSL entries like 'forgot.his.name':
1128
   *   'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
1129
   */
1130
1131
0
  while (!is_public_suffix(psl, domain, 0)) {
1132
0
    if ((p = strchr(domain, '.'))) {
1133
0
      regdom = domain;
1134
0
      domain = p + 1;
1135
0
    } else
1136
0
      break; /* prevent endless loop if is_public_suffix() is broken. */
1137
0
  }
1138
1139
0
  return regdom;
1140
0
}
1141
1142
/**
1143
 * psl_load_file:
1144
 * @fname: Name of PSL file
1145
 *
1146
 * This function loads the public suffixes file named @fname.
1147
 * To free the allocated resources, call psl_free().
1148
 *
1149
 * The suffixes are expected to be UTF-8 encoded (lowercase + NFKC) if they are international.
1150
 *
1151
 * Returns: Pointer to a PSL context or %NULL on failure.
1152
 *
1153
 * Since: 0.1
1154
 */
1155
psl_ctx_t *psl_load_file(const char *fname)
1156
0
{
1157
0
  FILE *fp;
1158
0
  psl_ctx_t *psl = NULL;
1159
1160
0
  if (!fname)
1161
0
    return NULL;
1162
1163
0
  if ((fp = fopen(fname, "rb"))) {
1164
0
    psl = psl_load_fp(fp);
1165
0
    fclose(fp);
1166
0
  }
1167
1168
0
  return psl;
1169
0
}
1170
1171
/**
1172
 * psl_load_fp:
1173
 * @fp: %FILE pointer
1174
 *
1175
 * This function loads the public suffixes from a %FILE pointer.
1176
 * To free the allocated resources, call psl_free().
1177
 *
1178
 * The suffixes are expected to be UTF-8 encoded (lowercase + NFKC) if they are international.
1179
 *
1180
 * Returns: Pointer to a PSL context or %NULL on failure.
1181
 *
1182
 * Since: 0.1
1183
 */
1184
psl_ctx_t *psl_load_fp(FILE *fp)
1185
0
{
1186
0
  psl_ctx_t *psl;
1187
0
  psl_entry_t suffix, *suffixp;
1188
0
  char buf[256], *linep, *p;
1189
0
  int type = 0, is_dafsa;
1190
0
  psl_idna_t *idna;
1191
1192
0
  if (!fp)
1193
0
    return NULL;
1194
1195
0
  if (!(psl = calloc(1, sizeof(psl_ctx_t))))
1196
0
    return NULL;
1197
1198
  /* read first line to allow ASCII / DAFSA detection */
1199
0
  if (!(linep = fgets(buf, sizeof(buf) - 1, fp)))
1200
0
    goto fail;
1201
1202
0
  is_dafsa = strlen(buf) == 16 && !strncmp(buf, ".DAFSA@PSL_", 11);
1203
1204
0
  if (is_dafsa) {
1205
0
    void *m;
1206
0
    size_t size = 65536, n, len = 0;
1207
0
    int version = atoi(buf + 11);
1208
1209
0
    if (version != 0)
1210
0
      goto fail;
1211
1212
0
    if (!(psl->dafsa = malloc(size)))
1213
0
      goto fail;
1214
1215
0
    memcpy(psl->dafsa, buf, len);
1216
1217
0
    while ((n = fread(psl->dafsa + len, 1, size - len, fp)) > 0) {
1218
0
      len += n;
1219
0
      if (len >= size) {
1220
0
        if (!(m = realloc(psl->dafsa, size *= 2)))
1221
0
          goto fail;
1222
0
        psl->dafsa = m;
1223
0
      }
1224
0
    }
1225
1226
    /* release unused memory */
1227
0
    if ((m = realloc(psl->dafsa, len)))
1228
0
      psl->dafsa = m;
1229
0
    else if (!len)
1230
0
      psl->dafsa = NULL; /* realloc() just free'd psl->dafsa */
1231
1232
0
    psl->dafsa_size = len;
1233
0
    psl->utf8 = !!GetUtfMode(psl->dafsa, len);
1234
1235
0
    return psl;
1236
0
  }
1237
1238
0
  idna = psl_idna_open();
1239
1240
  /*
1241
   *  as of 02.11.2012, the list at https://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions.
1242
   *  as of 19.02.2014, the list at https://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions.
1243
   *  as of 07.10.2018, the list at https://publicsuffix.org/list/ contains ~8600 rules and 8 exceptions.
1244
   */
1245
0
  psl->suffixes = vector_alloc(8*1024, suffix_compare_array);
1246
0
  psl->utf8 = 1; /* we put UTF-8 and punycode rules in the lookup vector */
1247
1248
0
  do {
1249
0
    while (isspace_ascii(*linep)) linep++; /* ignore leading whitespace */
1250
0
    if (!*linep) continue; /* skip empty lines */
1251
1252
0
    if (*linep == '/' && linep[1] == '/') {
1253
0
      if (!type) {
1254
0
        if (strstr(linep + 2, "===BEGIN ICANN DOMAINS==="))
1255
0
          type = PRIV_PSL_FLAG_ICANN;
1256
0
        else if (!type && strstr(linep + 2, "===BEGIN PRIVATE DOMAINS==="))
1257
0
          type = PRIV_PSL_FLAG_PRIVATE;
1258
0
      }
1259
0
      else if (type == PRIV_PSL_FLAG_ICANN && strstr(linep + 2, "===END ICANN DOMAINS==="))
1260
0
        type = 0;
1261
0
      else if (type == PRIV_PSL_FLAG_PRIVATE && strstr(linep + 2, "===END PRIVATE DOMAINS==="))
1262
0
        type = 0;
1263
1264
0
      continue; /* skip comments */
1265
0
    }
1266
1267
    /* parse suffix rule */
1268
0
    for (p = linep; *linep && !isspace_ascii(*linep);) linep++;
1269
0
    *linep = 0;
1270
1271
0
    if (*p == '!') {
1272
0
      p++;
1273
0
      suffix.flags = PRIV_PSL_FLAG_EXCEPTION | type;
1274
0
      psl->nexceptions++;
1275
0
    } else if (*p == '*') {
1276
0
      if (*++p != '.') {
1277
        /* fprintf(stderr, "Unsupported kind of rule (ignored): %s\n", p - 1); */
1278
0
        continue;
1279
0
      }
1280
0
      p++;
1281
      /* wildcard *.foo.bar implicitly make foo.bar a public suffix */
1282
0
      suffix.flags = PRIV_PSL_FLAG_WILDCARD | PRIV_PSL_FLAG_PLAIN | type;
1283
0
      psl->nwildcards++;
1284
0
      psl->nsuffixes++;
1285
0
    } else {
1286
0
      suffix.flags = PRIV_PSL_FLAG_PLAIN | type;
1287
0
      psl->nsuffixes++;
1288
0
    }
1289
1290
0
    if (suffix_init(&suffix, p, linep - p) == 0) {
1291
0
      int index;
1292
1293
0
      if ((index = vector_find(psl->suffixes, &suffix)) >= 0) {
1294
        /* Found existing entry:
1295
         * Combination of exception and plain rule is ambiguous
1296
         * !foo.bar
1297
         * foo.bar
1298
         *
1299
         * Allowed:
1300
         * !foo.bar + *.foo.bar
1301
         * foo.bar + *.foo.bar
1302
         *
1303
         * We do not check here, let's do it later.
1304
         */
1305
1306
0
        suffixp = vector_get(psl->suffixes, index);
1307
0
        suffixp->flags |= suffix.flags;
1308
0
      } else {
1309
        /* New entry */
1310
0
        suffixp = vector_get(psl->suffixes, vector_add(psl->suffixes, &suffix));
1311
0
      }
1312
1313
0
      if (suffixp) {
1314
0
        suffixp->label = suffixp->label_buf; /* set label to changed address */
1315
0
        add_punycode_if_needed(idna, psl->suffixes, suffixp);
1316
0
      }
1317
0
    }
1318
0
  } while ((linep = fgets(buf, sizeof(buf), fp)));
1319
1320
0
  vector_sort(psl->suffixes);
1321
1322
0
  psl_idna_close(idna);
1323
1324
0
  return psl;
1325
1326
0
fail:
1327
0
  psl_free(psl);
1328
0
  return NULL;
1329
0
}
1330
1331
/**
1332
 * psl_free:
1333
 * @psl: PSL context pointer
1334
 *
1335
 * This function frees the the PSL context that has been retrieved via
1336
 * psl_load_fp() or psl_load_file().
1337
 *
1338
 * Since: 0.1
1339
 */
1340
void psl_free(psl_ctx_t *psl)
1341
0
{
1342
0
  if (psl && psl != &builtin_psl) {
1343
0
    vector_free(&psl->suffixes);
1344
0
    free(psl->dafsa);
1345
0
    free(psl);
1346
0
  }
1347
0
}
1348
1349
/**
1350
 * psl_builtin:
1351
 *
1352
 * This function returns the PSL context that has been generated and built in at compile-time.
1353
 * You don't have to free the returned context explicitly.
1354
 *
1355
 * The builtin data also contains punycode entries, one for each international domain name.
1356
 *
1357
 * If the generation of built-in data has been disabled during compilation, %NULL will be returned.
1358
 * When using the builtin psl context, you can provide UTF-8 (lowercase + NFKC) or ASCII/ACE (punycode)
1359
 * representations of domains to functions like psl_is_public_suffix().
1360
 *
1361
 * Returns: Pointer to the built in PSL data or %NULL if this data is not available.
1362
 *
1363
 * Since: 0.1
1364
 */
1365
const psl_ctx_t *psl_builtin(void)
1366
0
{
1367
0
#ifdef ENABLE_BUILTIN
1368
0
  return &builtin_psl;
1369
#else
1370
  return NULL;
1371
#endif
1372
0
}
1373
1374
/**
1375
 * psl_suffix_count:
1376
 * @psl: PSL context pointer
1377
 *
1378
 * This function returns number of public suffixes maintained by @psl.
1379
 * The number of exceptions within the Public Suffix List are not included.
1380
 *
1381
 * If the information is not available, the return value is -1 (since 0.19).
1382
 * This is the case with DAFSA blobs or if @psl is %NULL.
1383
 *
1384
 * Returns: Number of public suffixes entries in PSL context or -1 if this information is not available.
1385
 *
1386
 * Since: 0.1
1387
 */
1388
int psl_suffix_count(const psl_ctx_t *psl)
1389
0
{
1390
0
  if (psl == &builtin_psl)
1391
0
    return _psl_nsuffixes;
1392
0
  else if (psl)
1393
0
    return psl->dafsa ? -1 : psl->nsuffixes;
1394
0
  else
1395
0
    return -1;
1396
0
}
1397
1398
/**
1399
 * psl_suffix_exception_count:
1400
 * @psl: PSL context pointer
1401
 *
1402
 * This function returns number of public suffix exceptions maintained by @psl.
1403
 *
1404
 * If the information is not available, the return value is -1 (since 0.19).
1405
 * This is the case with DAFSA blobs or if @psl is %NULL.
1406
 *
1407
 * Returns: Number of public suffix exceptions in PSL context or -1 if this information is not available.
1408
 *
1409
 * Since: 0.1
1410
 */
1411
int psl_suffix_exception_count(const psl_ctx_t *psl)
1412
0
{
1413
0
  if (psl == &builtin_psl)
1414
0
    return _psl_nexceptions;
1415
0
  else if (psl)
1416
0
    return psl->dafsa ? -1 : psl->nexceptions;
1417
0
  else
1418
0
    return -1;
1419
0
}
1420
1421
/**
1422
 * psl_suffix_wildcard_count:
1423
 * @psl: PSL context pointer
1424
 *
1425
 * This function returns number of public suffix wildcards maintained by @psl.
1426
 *
1427
 * If the information is not available, the return value is -1 (since 0.19).
1428
 * This is the case with DAFSA blobs or if @psl is %NULL.
1429
 *
1430
 * Returns: Number of public suffix wildcards in PSL context or -1 if this information is not available.
1431
 *
1432
 * Since: 0.10.0
1433
 */
1434
int psl_suffix_wildcard_count(const psl_ctx_t *psl)
1435
0
{
1436
0
  if (psl == &builtin_psl)
1437
0
    return _psl_nwildcards;
1438
0
  else if (psl)
1439
0
    return psl->dafsa ? -1 : psl->nwildcards;
1440
0
  else
1441
0
    return -1;
1442
0
}
1443
1444
/**
1445
 * psl_builtin_file_time:
1446
 *
1447
 * This function returns the mtime of the Public Suffix List file that has been built in.
1448
 *
1449
 * If the generation of built-in data has been disabled during compilation, 0 will be returned.
1450
 *
1451
 * Returns: time_t value or 0.
1452
 *
1453
 * Since: 0.1
1454
 */
1455
time_t psl_builtin_file_time(void)
1456
0
{
1457
0
  return _psl_file_time;
1458
0
}
1459
1460
/**
1461
 * psl_builtin_sha1sum:
1462
 *
1463
 * This function returns the SHA1 checksum of the Public Suffix List file that has been built in.
1464
 * The returned string is in lowercase hex encoding, e.g. "2af1e9e3044eda0678bb05949d7cca2f769901d8".
1465
 *
1466
 * If the generation of built-in data has been disabled during compilation, an empty string will be returned.
1467
 *
1468
 * Returns: String containing SHA1 checksum or an empty string.
1469
 *
1470
 * Since: 0.1
1471
 */
1472
const char *psl_builtin_sha1sum(void)
1473
0
{
1474
0
  return _psl_sha1_checksum;
1475
0
}
1476
1477
/**
1478
 * psl_builtin_filename:
1479
 *
1480
 * This function returns the file name of the Public Suffix List file that has been built in.
1481
 *
1482
 * If the generation of built-in data has been disabled during compilation, an empty string will be returned.
1483
 *
1484
 * Returns: String containing the PSL file name or an empty string.
1485
 *
1486
 * Since: 0.1
1487
 */
1488
const char *psl_builtin_filename(void)
1489
0
{
1490
0
  return _psl_filename;
1491
0
}
1492
1493
/**
1494
 * psl_builtin_outdated:
1495
 *
1496
 * This function checks if the built-in data is older than the file it has been created from.
1497
 * If it is, it might be a good idea for the application to reload the PSL.
1498
 * The mtime is taken as reference.
1499
 *
1500
 * If the PSL file does not exist, it is assumed that the built-in data is not outdated.
1501
 *
1502
 * Returns: 1 if the built-in is outdated, 0 otherwise.
1503
 *
1504
 * Since: 0.10.0
1505
 */
1506
int psl_builtin_outdated(void)
1507
0
{
1508
0
  struct stat st;
1509
1510
0
  if (stat(_psl_filename, &st) == 0 && st.st_mtime > _psl_file_time)
1511
0
    return 1;
1512
1513
0
  return 0;
1514
0
}
1515
1516
/**
1517
 * psl_dist_filename:
1518
 *
1519
 * This function returns the file name of the distribution/system PSL data file.
1520
 * This file will be considered by psl_latest().
1521
 *
1522
 * Return the filename that is set by ./configure --with-psl-distfile, or an empty string.
1523
 *
1524
 * Returns: String containing a PSL file name or an empty string.
1525
 *
1526
 * Since: 0.16
1527
 */
1528
const char *psl_dist_filename(void)
1529
0
{
1530
0
  return _psl_dist_filename;
1531
0
}
1532
1533
/**
1534
 * psl_get_version:
1535
 *
1536
 * Get libpsl version.
1537
 *
1538
 * Returns: String containing version of libpsl.
1539
 *
1540
 * Since: 0.2.5
1541
 **/
1542
const char *psl_get_version(void)
1543
0
{
1544
#ifdef WITH_LIBICU
1545
  return PACKAGE_VERSION " (+libicu/" U_ICU_VERSION ")";
1546
#elif defined(WITH_LIBIDN2)
1547
0
  return PACKAGE_VERSION " (+libidn2/" IDN2_VERSION ")";
1548
#elif defined(WITH_LIBIDN)
1549
  return PACKAGE_VERSION " (+libidn/" STRINGPREP_VERSION ")";
1550
#else
1551
  return PACKAGE_VERSION " (no IDNA support)";
1552
#endif
1553
0
}
1554
1555
/**
1556
 * psl_check_version_number:
1557
 * @version: Version number (hex) to check against.
1558
 *
1559
 * Check the given version number is at minimum the current library version number.
1560
 * The version number must be a hexadecimal number like 0x000a01 (V0.10.1).
1561
 *
1562
 * Returns: Returns the library version number if the given version number is at least
1563
 * the version of the library, else return 0; If the argument is 0, the function returns
1564
 * the library version number without performing a check.
1565
 *
1566
 * Since: 0.11.0
1567
 **/
1568
int psl_check_version_number(int version)
1569
0
{
1570
0
  if (version) {
1571
0
    int major = version >> 16;
1572
0
    int minor = (version >> 8) & 0xFF;
1573
0
    int patch = version & 0xFF;
1574
1575
0
    if (major < PSL_VERSION_MAJOR
1576
0
      || (major == PSL_VERSION_MAJOR && minor < PSL_VERSION_MINOR)
1577
0
      || (major == PSL_VERSION_MAJOR && minor == PSL_VERSION_MINOR && patch < PSL_VERSION_PATCH))
1578
0
    {
1579
0
      return 0;
1580
0
    }
1581
0
  }
1582
1583
0
  return PSL_VERSION_NUMBER;
1584
0
}
1585
1586
/* return whether hostname is an IP address or not */
1587
static int isip(const char *hostname)
1588
0
{
1589
#ifdef _WIN32
1590
  WCHAR wName[INET6_ADDRSTRLEN+1];
1591
1592
  struct sockaddr_in  addr  = {0};
1593
  struct sockaddr_in6 addr6 = {0};
1594
1595
  INT size  = sizeof(addr);
1596
  INT size6 = sizeof(addr6);
1597
1598
  if (!MultiByteToWideChar(CP_UTF8, 0, hostname, -1, wName, countof(wName)))
1599
    return 0;
1600
1601
  return (WSAStringToAddressW(wName, AF_INET,  NULL, (struct sockaddr *)&addr,  &size) != SOCKET_ERROR) |
1602
         (WSAStringToAddressW(wName, AF_INET6, NULL, (struct sockaddr *)&addr6, &size6) != SOCKET_ERROR);
1603
#else
1604
0
  struct in_addr addr;
1605
0
  struct in6_addr addr6;
1606
1607
0
  return inet_pton(AF_INET, hostname, &addr) || inet_pton(AF_INET6, hostname, &addr6);
1608
0
#endif
1609
0
}
1610
1611
/**
1612
 * psl_is_cookie_domain_acceptable:
1613
 * @psl: PSL context pointer
1614
 * @hostname: The request hostname.
1615
 * @cookie_domain: The domain value from a cookie
1616
 *
1617
 * This helper function checks whether @cookie_domain is an acceptable cookie domain value for the request
1618
 * @hostname.
1619
 *
1620
 * For international domain names both, @hostname and @cookie_domain, have to be either in UTF-8 (lowercase + NFKC)
1621
 * or in ASCII/ACE (punycode) format. Other encodings or mixing UTF-8 and punycode likely result in incorrect return values.
1622
 *
1623
 * Use helper function psl_str_to_utf8lower() for normalization of @hostname and @cookie_domain.
1624
 *
1625
 * Hint for Windows users:
1626
 * Please make sure the calling application has called WSAStartup() before calling psl_is_cookie_domain_acceptable().
1627
 *
1628
 * Examples:
1629
 * 1. Cookie domain 'example.com' would be acceptable for hostname 'www.example.com',
1630
 * but '.com' or 'com' would NOT be acceptable since 'com' is a public suffix.
1631
 *
1632
 * 2. Cookie domain 'his.name' would be acceptable for hostname 'remember.his.name',
1633
 *  but NOT for 'forgot.his.name' since 'forgot.his.name' is a public suffix.
1634
 *
1635
 * Returns: 1 if acceptable, 0 if not acceptable.
1636
 *
1637
 * Since: 0.1
1638
 */
1639
int psl_is_cookie_domain_acceptable(const psl_ctx_t *psl, const char *hostname, const char *cookie_domain)
1640
0
{
1641
0
  const char *p;
1642
0
  size_t hostname_length, cookie_domain_length;
1643
1644
0
  if (!psl || !hostname || !cookie_domain)
1645
0
    return 0;
1646
1647
0
  while (*cookie_domain == '.')
1648
0
    cookie_domain++;
1649
1650
0
  if (!strcmp(hostname, cookie_domain))
1651
0
    return 1; /* an exact match is acceptable (and pretty common) */
1652
1653
0
  if (isip(hostname))
1654
0
    return 0; /* Hostname is an IP address and these must match fully (RFC 6265, 5.1.3) */
1655
1656
0
  cookie_domain_length = strlen(cookie_domain);
1657
0
  hostname_length = strlen(hostname);
1658
1659
0
  if (cookie_domain_length >= hostname_length)
1660
0
    return 0; /* cookie_domain is too long */
1661
1662
0
  p = hostname + hostname_length - cookie_domain_length;
1663
0
  if (!strcmp(p, cookie_domain) && p[-1] == '.') {
1664
    /* OK, cookie_domain matches, but it must be longer than the longest public suffix in 'hostname' */
1665
1666
0
    if (!(p = psl_unregistrable_domain(psl, hostname)))
1667
0
      return 1;
1668
1669
0
    if (cookie_domain_length > strlen(p))
1670
0
      return 1;
1671
0
  }
1672
1673
0
  return 0;
1674
0
}
1675
1676
/**
1677
 * psl_free_string:
1678
 * @str: pointer to lowercase string returned by psl_str_to_utf8lower()
1679
 *
1680
 * This function free()'s the memory allocated by psl_str_to_utf8lower() when
1681
 * returning a lowercase string
1682
 *
1683
 * Since: 0.19
1684
 */
1685
void psl_free_string(char *str)
1686
0
{
1687
0
  if (str)
1688
0
    free(str);
1689
0
}
1690
1691
/**
1692
 * psl_str_to_utf8lower:
1693
 * @str: string to convert
1694
 * @encoding: charset encoding of @str, e.g. 'iso-8859-1' or %NULL
1695
 * @locale: locale of @str for to lowercase conversion, e.g. 'de' or %NULL
1696
 * @lower: return value containing the converted string
1697
 *
1698
 * This helper function converts a string to UTF-8 lowercase + NFKC representation.
1699
 * Lowercase + NFKC UTF-8 is needed as input to the domain checking functions.
1700
 *
1701
 * @lower stays unchanged on error.
1702
 *
1703
 * When returning PSL_SUCCESS, the return value 'lower' must be freed after usage.
1704
 *
1705
 * Returns: psl_error_t value.
1706
 *   PSL_SUCCESS: Success
1707
 *   PSL_ERR_INVALID_ARG: @str is a %NULL value.
1708
 *   PSL_ERR_CONVERTER: Failed to open the unicode converter with name @encoding
1709
 *   PSL_ERR_TO_UTF16: Failed to convert @str to unicode
1710
 *   PSL_ERR_TO_LOWER: Failed to convert unicode to lowercase
1711
 *   PSL_ERR_TO_UTF8: Failed to convert unicode to UTF-8
1712
 *   PSL_ERR_NO_MEM: Failed to allocate memory
1713
 *
1714
 * Since: 0.4
1715
 */
1716
psl_error_t psl_str_to_utf8lower(const char *str, const char *encoding PSL_UNUSED, const char *locale PSL_UNUSED, char **lower)
1717
0
{
1718
0
  int ret = PSL_ERR_INVALID_ARG;
1719
1720
0
  if (!str)
1721
0
    return PSL_ERR_INVALID_ARG;
1722
1723
  /* shortcut to avoid costly conversion */
1724
0
  if (str_is_ascii(str)) {
1725
0
    if (lower) {
1726
0
      char *p, *tmp;
1727
1728
0
      if (!(tmp = strdup(str)))
1729
0
        return PSL_ERR_NO_MEM;
1730
1731
0
      *lower = tmp;
1732
1733
      /* convert ASCII string to lowercase */
1734
0
      for (p = *lower; *p; p++)
1735
0
        if (isupper(*p))
1736
0
          *p = tolower(*p);
1737
0
    }
1738
0
    return PSL_SUCCESS;
1739
0
  }
1740
1741
#ifdef WITH_LIBICU
1742
  do {
1743
  size_t str_length = strlen(str);
1744
  UErrorCode status = 0;
1745
  UChar *utf16_dst, *utf16_lower;
1746
  int32_t utf16_dst_length;
1747
  char *utf8_lower;
1748
  UConverter *uconv;
1749
1750
  if (str_length < 256) {
1751
    /* C89 allocation */
1752
    utf16_dst   = alloca(sizeof(UChar) * (str_length * 2 + 1));
1753
    utf16_lower = alloca(sizeof(UChar) * (str_length * 2 + 1));
1754
    utf8_lower  = alloca(str_length * 6 + 1);
1755
  } else {
1756
    utf16_dst   = malloc(sizeof(UChar) * (str_length * 2 + 1));
1757
    utf16_lower = malloc(sizeof(UChar) * (str_length * 2 + 1));
1758
    utf8_lower  = malloc(str_length * 6 + 1);
1759
1760
    if (!utf16_dst || !utf16_lower || !utf8_lower) {
1761
      ret = PSL_ERR_NO_MEM;
1762
      goto out;
1763
    }
1764
  }
1765
1766
  uconv = ucnv_open(encoding, &status);
1767
  if (U_SUCCESS(status)) {
1768
    utf16_dst_length = ucnv_toUChars(uconv, utf16_dst, str_length * 2 + 1, str, str_length, &status);
1769
    ucnv_close(uconv);
1770
1771
    if (U_SUCCESS(status)) {
1772
      int32_t utf16_lower_length = u_strToLower(utf16_lower, str_length * 2 + 1, utf16_dst, utf16_dst_length, locale, &status);
1773
      if (U_SUCCESS(status)) {
1774
        u_strToUTF8(utf8_lower, str_length * 6 + 1, NULL, utf16_lower, utf16_lower_length, &status);
1775
        if (U_SUCCESS(status)) {
1776
          ret = PSL_SUCCESS;
1777
          if (lower) {
1778
            char *tmp = strdup(utf8_lower);
1779
1780
            if (tmp)
1781
              *lower = tmp;
1782
            else
1783
              ret = PSL_ERR_NO_MEM;
1784
          }
1785
        } else {
1786
          ret = PSL_ERR_TO_UTF8;
1787
          /* fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); */
1788
        }
1789
      } else {
1790
        ret = PSL_ERR_TO_LOWER;
1791
        /* fprintf(stderr, "Failed to convert UTF-16 to lowercase (status %d)\n", status); */
1792
      }
1793
    } else {
1794
      ret = PSL_ERR_TO_UTF16;
1795
      /* fprintf(stderr, "Failed to convert string to UTF-16 (status %d)\n", status); */
1796
    }
1797
  } else {
1798
    ret = PSL_ERR_CONVERTER;
1799
    /* fprintf(stderr, "Failed to open converter for '%s' (status %d)\n", encoding, status); */
1800
  }
1801
out:
1802
  if (str_length >= 256) {
1803
    free(utf16_dst);
1804
    free(utf16_lower);
1805
    free(utf8_lower);
1806
  }
1807
  } while (0);
1808
#elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN)
1809
0
  do {
1810
    /* find out local charset encoding */
1811
0
    if (!encoding) {
1812
0
#ifdef HAVE_NL_LANGINFO
1813
0
      encoding = nl_langinfo(CODESET);
1814
#elif defined _WIN32
1815
      static char buf[16];
1816
      snprintf(buf, sizeof(buf), "CP%u", GetACP());
1817
      encoding = buf;
1818
#endif
1819
0
      if (!encoding || !*encoding)
1820
0
        encoding = "ASCII";
1821
0
    }
1822
1823
    /* convert to UTF-8 */
1824
0
    if (strcasecmp(encoding, "utf-8")) {
1825
0
      iconv_t cd = iconv_open("utf-8", encoding);
1826
1827
0
      if (cd != (iconv_t)-1) {
1828
0
        char *tmp = (char *)str; /* iconv won't change where str points to, but changes tmp itself */
1829
0
        size_t tmp_len = strlen(str) + 1;
1830
0
        size_t dst_len = tmp_len * 6, dst_len_tmp = dst_len;
1831
0
        char *dst = malloc(dst_len + 1), *dst_tmp = dst;
1832
1833
0
        if (!dst) {
1834
0
          ret = PSL_ERR_NO_MEM;
1835
0
        }
1836
0
        else if (iconv(cd, (WINICONV_CONST char **)&tmp, &tmp_len, &dst_tmp, &dst_len_tmp) != (size_t)-1
1837
0
          && iconv(cd, NULL, NULL, &dst_tmp, &dst_len_tmp) != (size_t)-1)
1838
0
        {
1839
          /* start size for u8_tolower internal memory allocation.
1840
           * u8_tolower() does not terminate the result string. we have 0 byte included in above tmp_len
1841
           * and thus in len. */
1842
0
          size_t len = dst_len - dst_len_tmp;
1843
1844
0
          if ((tmp = (char *)u8_tolower((uint8_t *)dst, len, 0, UNINORM_NFKC, NULL, &len))) {
1845
0
            ret = PSL_SUCCESS;
1846
0
            if (lower) {
1847
0
              *lower = tmp;
1848
0
              tmp = NULL;
1849
0
            } else
1850
0
              free(tmp);
1851
0
          } else {
1852
0
            ret = PSL_ERR_TO_LOWER;
1853
            /* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */
1854
0
          }
1855
0
        } else {
1856
0
          ret = PSL_ERR_TO_UTF8;
1857
          /* fprintf(stderr, "Failed to convert '%s' string into '%s' (%d)\n", src_encoding, dst_encoding, errno); */
1858
0
        }
1859
1860
0
        free(dst);
1861
0
        iconv_close(cd);
1862
0
      } else {
1863
0
        ret = PSL_ERR_TO_UTF8;
1864
        /* fprintf(stderr, "Failed to prepare encoding '%s' into '%s' (%d)\n", src_encoding, dst_encoding, errno); */
1865
0
      }
1866
0
    } else {
1867
      /* we need a conversion to lowercase */
1868
0
      uint8_t *tmp;
1869
1870
      /* start size for u8_tolower internal memory allocation.
1871
       * u8_tolower() does not terminate the result string, so include terminating 0 byte in len. */
1872
0
      size_t len = u8_strlen((uint8_t *)str) + 1;
1873
1874
0
      if ((tmp = u8_tolower((uint8_t *)str, len, 0, UNINORM_NFKC, NULL, &len))) {
1875
0
        ret = PSL_SUCCESS;
1876
0
        if (lower) {
1877
0
          *lower = (char*)tmp;
1878
0
          tmp = NULL;
1879
0
        } else
1880
0
          free(tmp);
1881
0
      } else {
1882
0
        ret = PSL_ERR_TO_LOWER;
1883
        /* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */
1884
0
      }
1885
0
    }
1886
1887
0
  } while (0);
1888
0
#endif
1889
1890
0
  return ret;
1891
0
}
1892
1893
/* if file is newer than the builtin data, insert it reverse sorted by mtime */
1894
static int insert_file(const char *fname, const char **psl_fname, time_t *psl_mtime, int n)
1895
0
{
1896
0
  struct stat st;
1897
0
  int it;
1898
1899
0
  if (fname && *fname && stat(fname, &st) == 0 && st.st_mtime > _psl_file_time) {
1900
    /* add file name and mtime to end of array */
1901
0
    psl_fname[n] = fname;
1902
0
    psl_mtime[n++] = st.st_mtime;
1903
1904
    /* move the new entry to it's correct position */
1905
0
    for (it = n - 2; it >= 0 && st.st_mtime > psl_mtime[it]; it--) {
1906
0
      psl_fname[it + 1] = psl_fname[it];
1907
0
      psl_mtime[it + 1] = psl_mtime[it];
1908
0
      psl_fname[it] = fname;
1909
0
      psl_mtime[it] = st.st_mtime;
1910
0
    }
1911
0
  }
1912
1913
0
  return n;
1914
0
}
1915
1916
/**
1917
 * psl_latest:
1918
 * @fname: Name of PSL file or %NULL
1919
 *
1920
 * This function loads the the latest available PSL data from either
1921
 * - @fname (application specific filename, may be %NULL)
1922
 * - location specified during built-time (filename from ./configure --with-psl-distfile)
1923
 * - built-in PSL data (generated from ./configure --with-psl-file)
1924
 * - location of built-in data (filename from ./configure --with-psl-file)
1925
 *
1926
 * If none of the above is available, the function returns %NULL.
1927
 *
1928
 * To free the allocated resources, call psl_free().
1929
 *
1930
 * Returns: Pointer to a PSL context or %NULL on failure.
1931
 *
1932
 * Since: 0.16
1933
 */
1934
psl_ctx_t *psl_latest(const char *fname)
1935
0
{
1936
0
  psl_ctx_t *psl;
1937
0
  const char *psl_fname[3];
1938
0
  time_t psl_mtime[3];
1939
0
  int it, ntimes;
1940
1941
0
  psl_fname[0] = NULL; /* silence gcc 6.2 false warning */
1942
1943
  /* create array of PSL files reverse sorted by mtime (latest first) */
1944
0
  ntimes = insert_file(fname, psl_fname, psl_mtime, 0);
1945
0
  ntimes = insert_file(_psl_dist_filename, psl_fname, psl_mtime, ntimes);
1946
0
  ntimes = insert_file(_psl_filename, psl_fname, psl_mtime, ntimes);
1947
1948
  /* load PSL data from the latest file, falling back to the second recent, ... */
1949
0
  for (psl = NULL, it = 0; it < ntimes; it++) {
1950
0
    if (psl_mtime[it] > _psl_file_time)
1951
0
      if ((psl = psl_load_file(psl_fname[it])))
1952
0
        break;
1953
0
  }
1954
1955
  /* if file loading failed or there is no file newer than the builtin data,
1956
   * then return the builtin data. */
1957
0
  return psl ? psl : (psl_ctx_t *) psl_builtin();
1958
0
}