Coverage Report

Created: 2024-03-08 06:32

/src/libpsl/src/psl.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright(c) 2014-2024 Tim Ruehsen
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice shall be included in
12
 * all copies or substantial portions of the Software.
13
 *
14
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20
 * DEALINGS IN THE SOFTWARE.
21
 *
22
 * This file is part of libpsl.
23
 *
24
 * Public Suffix List routines
25
 *
26
 * Changelog
27
 * 19.03.2014  Tim Ruehsen  created from libmget/cookie.c
28
 *
29
 */
30
31
#if HAVE_CONFIG_H
32
# include <config.h>
33
#endif
34
35
#if defined(__GNUC__) && defined(__GNUC_MINOR__)
36
#       define GCC_VERSION_AT_LEAST(major, minor) ((__GNUC__ > (major)) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
37
#else
38
#       define GCC_VERSION_AT_LEAST(major, minor) 0
39
#endif
40
41
#include <sys/types.h>
42
#include <sys/stat.h>
43
44
#ifdef _WIN32
45
# include <winsock2.h>
46
# include <ws2tcpip.h>
47
#else
48
# include <sys/socket.h>
49
# include <netinet/in.h>
50
# include <unistd.h>
51
#endif
52
53
#if defined(_MSC_VER) && ! defined(ssize_t)
54
# include <basetsd.h>
55
typedef SSIZE_T ssize_t;
56
#endif
57
58
#include <stdio.h>
59
#include <stdlib.h>
60
#include <string.h>
61
#include <ctype.h>
62
#include <time.h>
63
#include <errno.h>
64
#include <limits.h> /* for UINT_MAX */
65
66
#ifdef HAVE_NL_LANGINFO
67
# include <langinfo.h>
68
#endif
69
70
#ifndef _WIN32
71
# include <arpa/inet.h>
72
#else
73
# include <malloc.h>
74
#endif
75
76
#ifdef WITH_LIBICU
77
# include <unicode/uversion.h>
78
# include <unicode/ustring.h>
79
# include <unicode/uidna.h>
80
# include <unicode/ucnv.h>
81
#elif defined(WITH_LIBIDN2)
82
# include <iconv.h>
83
# include <idn2.h>
84
# include <unicase.h>
85
# include <unistr.h>
86
#elif defined(WITH_LIBIDN)
87
# include <iconv.h>
88
# include <stringprep.h>
89
# include <idna.h>
90
# include <unicase.h>
91
# include <unistr.h>
92
#endif
93
94
#ifdef WINICONV_CONST
95
#  define ICONV_CONST WINICONV_CONST
96
#endif
97
#ifndef ICONV_CONST
98
#  define ICONV_CONST
99
#endif
100
101
102
#include <libpsl.h>
103
104
/**
105
 * SECTION:libpsl
106
 * @short_description: Public Suffix List library functions
107
 * @title: libpsl
108
 * @stability: Stable
109
 * @include: libpsl.h
110
 *
111
 * [Public Suffix List](https://publicsuffix.org/) library functions.
112
 *
113
 */
114
115
#define countof(a) (sizeof(a)/sizeof(*(a)))
116
117
0
#define PRIV_PSL_FLAG_EXCEPTION (1<<0)
118
0
#define PRIV_PSL_FLAG_WILDCARD  (1<<1)
119
0
#define PRIV_PSL_FLAG_ICANN     (1<<2) /* entry of ICANN section */
120
0
#define PRIV_PSL_FLAG_PRIVATE   (1<<3) /* entry of PRIVATE section */
121
0
#define PRIV_PSL_FLAG_PLAIN     (1<<4) /* just used for PSL syntax checking */
122
123
typedef struct {
124
  char
125
    label_buf[128];
126
  const char *
127
    label;
128
  unsigned short
129
    length;
130
  unsigned char
131
    nlabels, /* number of labels */
132
    flags;
133
} psl_entry_t;
134
135
/* stripped down version libmget vector routines */
136
typedef struct {
137
  int
138
    (*cmp)(const psl_entry_t **, const psl_entry_t **); /* comparison function */
139
  psl_entry_t
140
    **entry; /* pointer to array of pointers to elements */
141
  int
142
    max,     /* allocated elements */
143
    cur;     /* number of elements in use */
144
} psl_vector_t;
145
146
struct psl_ctx_st {
147
  psl_vector_t
148
    *suffixes;
149
  unsigned char
150
    *dafsa;
151
  size_t
152
    dafsa_size;
153
  int
154
    nsuffixes,
155
    nexceptions,
156
    nwildcards;
157
  unsigned
158
    utf8 : 1; /* 1: data contains UTF-8 + punycode encoded rules */
159
};
160
161
/* include the PSL data generated by psl-make-dafsa */
162
#ifdef ENABLE_BUILTIN
163
#include "suffixes_dafsa.h"
164
#else
165
static const unsigned char kDafsa[] = "";
166
static time_t _psl_file_time = 0;
167
static int _psl_nsuffixes = 0;
168
static int _psl_nexceptions = 0;
169
static int _psl_nwildcards = 0;
170
static const char _psl_sha1_checksum[] = "";
171
static const char _psl_filename[] = "";
172
#endif
173
174
/* references to these PSLs will result in lookups to built-in data */
175
static const psl_ctx_t
176
  builtin_psl;
177
178
#ifdef PSL_DISTFILE
179
static const char _psl_dist_filename[] = PSL_DISTFILE;
180
#else
181
static const char _psl_dist_filename[] = "";
182
#endif
183
184
static psl_vector_t *vector_alloc(int max, int (*cmp)(const psl_entry_t **, const psl_entry_t **))
185
0
{
186
0
  psl_vector_t *v;
187
188
0
  if (!(v = calloc(1, sizeof(psl_vector_t))))
189
0
    return NULL;
190
191
0
  if (!(v->entry = malloc(max * sizeof(psl_entry_t *)))) {
192
0
    free(v);
193
0
    return NULL;
194
0
  }
195
196
0
  v->max = max;
197
0
  v->cmp = cmp;
198
0
  return v;
199
0
}
200
201
static void vector_free(psl_vector_t **v)
202
0
{
203
0
  if (v && *v) {
204
0
    if ((*v)->entry) {
205
0
      int it;
206
207
0
      for (it = 0; it < (*v)->cur; it++)
208
0
        free((*v)->entry[it]);
209
210
0
      free((*v)->entry);
211
0
    }
212
0
    free(*v);
213
0
  }
214
0
}
215
216
static psl_entry_t *vector_get(const psl_vector_t *v, int pos)
217
0
{
218
0
  if (pos < 0 || !v || pos >= v->cur) return NULL;
219
220
0
  return v->entry[pos];
221
0
}
222
223
/* the entries must be sorted by */
224
static int vector_find(const psl_vector_t *v, const psl_entry_t *elem)
225
0
{
226
0
  if (v) {
227
0
    int l, r, m;
228
0
    int res;
229
230
    /* binary search for element (exact match) */
231
0
    for (l = 0, r = v->cur - 1; l <= r;) {
232
0
      m = (l + r) / 2;
233
0
      if ((res = v->cmp(&elem, (const psl_entry_t **)&(v->entry[m]))) > 0) l = m + 1;
234
0
      else if (res < 0) r = m - 1;
235
0
      else return m;
236
0
    }
237
0
  }
238
239
0
  return -1; /* not found */
240
0
}
241
242
static int vector_add(psl_vector_t *v, const psl_entry_t *elem)
243
0
{
244
0
  if (v) {
245
0
    void *elemp;
246
247
0
    if (!(elemp = malloc(sizeof(psl_entry_t))))
248
0
      return -1;
249
250
0
    memcpy(elemp, elem, sizeof(psl_entry_t));
251
252
0
    if (v->max == v->cur) {
253
0
      void *m = realloc(v->entry, (v->max *= 2) * sizeof(psl_entry_t *));
254
255
0
      if (m)
256
0
        v->entry = m;
257
0
      else {
258
0
        free(elemp);
259
0
        return -1;
260
0
      }
261
0
    }
262
263
0
    v->entry[v->cur++] = elemp;
264
0
    return v->cur - 1;
265
0
  }
266
267
0
  return -1;
268
0
}
269
270
static void vector_sort(psl_vector_t *v)
271
0
{
272
0
  if (v && v->cmp)
273
0
    qsort(v->entry, v->cur, sizeof(psl_vector_t **), (int(*)(const void *, const void *))v->cmp);
274
0
}
275
276
/* by this kind of sorting, we can easily see if a domain matches or not */
277
static int suffix_compare(const psl_entry_t *s1, const psl_entry_t *s2)
278
0
{
279
0
  int n;
280
281
0
  if ((n = s2->nlabels - s1->nlabels))
282
0
    return n; /* most labels first */
283
284
0
  if ((n = s1->length - s2->length))
285
0
    return n;  /* shorter rules first */
286
287
0
  return strcmp(s1->label ? s1->label : s1->label_buf, s2->label ? s2->label : s2->label_buf);
288
0
}
289
290
/* needed to sort array of pointers, given to qsort() */
291
static int suffix_compare_array(const psl_entry_t **s1, const psl_entry_t **s2)
292
0
{
293
0
  return suffix_compare(*s1, *s2);
294
0
}
295
296
static int suffix_init(psl_entry_t *suffix, const char *rule, size_t length)
297
0
{
298
0
  const char *src;
299
0
  char *dst;
300
301
0
  suffix->label = suffix->label_buf;
302
303
0
  if (length >= sizeof(suffix->label_buf) - 1) {
304
0
    suffix->nlabels = 0;
305
    /* fprintf(stderr, "Suffix rule too long (%zd, ignored): %s\n", length, rule); */
306
0
    return -1;
307
0
  }
308
309
0
  suffix->length = (unsigned char)length;
310
311
0
  suffix->nlabels = 1;
312
313
0
  for (dst = suffix->label_buf, src = rule; *src;) {
314
0
    if (*src == '.')
315
0
      suffix->nlabels++;
316
0
    *dst++ = *src++;
317
0
  }
318
0
  *dst = 0;
319
320
0
  return 0;
321
0
}
322
323
#ifndef HAVE_STRDUP
324
static char *strdup(const char *s)
325
{
326
  char *p = malloc(strlen(s) + 1);
327
  if (!p)
328
    return NULL;
329
  return strcpy(p, s);
330
}
331
#elif !HAVE_DECL_STRDUP
332
/*
333
 *  On Linux with
334
 *    CC=gcc CFLAGS="-Wall -Wextra -Wpedantic -std=c89" ./configure
335
 *  strdup isn't declared (warning: implicit declaration of function 'strdup').
336
 */
337
char *strdup(const char *);
338
#endif
339
340
#if !defined(WITH_LIBIDN) && !defined(WITH_LIBIDN2) && !defined(WITH_LIBICU)
341
/*
342
 * When configured without runtime IDNA support (./configure --disable-runtime), we need a pure ASCII
343
 * representation of non-ASCII characters in labels as found in UTF-8 domain names.
344
 * This is because the current DAFSA format used may only hold character values [21..127].
345
 *
346
  Code copied from http://www.nicemice.net/idn/punycode-spec.gz on
347
  2011-01-04 with SHA-1 a966a8017f6be579d74a50a226accc7607c40133
348
  labeled punycode-spec 1.0.3 (2006-Mar-24-Thu).  It is modified for
349
  libpsl by Tim Rühsen.  License on the original code:
350
351
  punycode-spec 1.0.3 (2006-Mar-23-Thu)
352
  http://www.nicemice.net/idn/
353
  Adam M. Costello
354
  http://www.nicemice.net/amc/
355
356
  B. Disclaimer and license
357
358
    Regarding this entire document or any portion of it (including
359
    the pseudocode and C code), the author makes no guarantees and
360
    is not responsible for any damage resulting from its use.  The
361
    author grants irrevocable permission to anyone to use, modify,
362
    and distribute it in any way that does not diminish the rights
363
    of anyone else to use, modify, and distribute it, provided that
364
    redistributed derivative works do not contain misleading author or
365
    version information.  Derivative works need not be licensed under
366
    similar terms.
367
368
  C. Punycode sample implementation
369
370
  punycode-sample.c 2.0.0 (2004-Mar-21-Sun)
371
  http://www.nicemice.net/idn/
372
  Adam M. Costello
373
  http://www.nicemice.net/amc/
374
375
  This is ANSI C code (C89) implementing Punycode 1.0.x.
376
 */
377
enum punycode_status {
378
  punycode_success = 0,
379
  punycode_bad_input = 1, /* Input is invalid.                       */
380
  punycode_big_output = 2, /* Output would exceed the space provided. */
381
  punycode_overflow = 3 /* Wider integers needed to process input. */
382
};
383
384
#ifdef PUNYCODE_UINT
385
  typedef PUNYCODE_UINT punycode_uint;
386
#elif UINT_MAX >= (1 << 26) - 1
387
  typedef unsigned int punycode_uint;
388
#else
389
  typedef unsigned long punycode_uint;
390
#endif
391
392
/*** Bootstring parameters for Punycode ***/
393
enum {
394
  base = 36, tmin = 1, tmax = 26, skew = 38, damp = 700,
395
  initial_bias = 72, initial_n = 0x80, delimiter = 0x2D
396
};
397
398
static char encode_digit(punycode_uint d)
399
{
400
  return d + 22 + 75 * (d < 26);
401
  /*  0..25 map to ASCII a..z or A..Z */
402
  /* 26..35 map to ASCII 0..9         */
403
}
404
#define flagged(bcp) ((punycode_uint)(bcp) - 65 < 26)
405
static const punycode_uint maxint = -1;
406
407
static punycode_uint adapt(punycode_uint delta, punycode_uint numpoints, int firsttime)
408
{
409
  punycode_uint k;
410
411
  delta = firsttime ? delta / damp : delta >> 1;
412
  /* delta >> 1 is a faster way of doing delta / 2 */
413
  delta += delta / numpoints;
414
415
  for (k = 0; delta > ((base - tmin) * tmax) / 2; k += base) {
416
    delta /= base - tmin;
417
  }
418
419
  return k + (base - tmin + 1) * delta / (delta + skew);
420
}
421
422
static enum punycode_status punycode_encode(
423
  size_t input_length_orig,
424
  const punycode_uint input[],
425
  size_t *output_length,
426
  char output[])
427
{
428
  punycode_uint input_length, n, delta, h, b, bias, j, m, q, k, t;
429
  size_t out, max_out;
430
431
  /* The Punycode spec assumes that the input length is the same type */
432
  /* of integer as a code point, so we need to convert the size_t to  */
433
  /* a punycode_uint, which could overflow.                           */
434
435
  if (input_length_orig > maxint)
436
    return punycode_overflow;
437
438
  input_length = (punycode_uint) input_length_orig;
439
440
  /* Initialize the state: */
441
442
  n = initial_n;
443
  delta = 0;
444
  out = 0;
445
  max_out = *output_length;
446
  bias = initial_bias;
447
448
  /* Handle the basic code points: */
449
  for (j = 0; j < input_length; ++j) {
450
    if (input[j] < 0x80) {
451
      if (max_out - out < 2)
452
        return punycode_big_output;
453
      output[out++] = (char) input[j];
454
    }
455
    /* else if (input[j] < n) return punycode_bad_input; */
456
    /* (not needed for Punycode with unsigned code points) */
457
  }
458
459
  h = b = (punycode_uint) out;
460
  /* cannot overflow because out <= input_length <= maxint */
461
462
  /* h is the number of code points that have been handled, b is the  */
463
  /* number of basic code points, and out is the number of ASCII code */
464
  /* points that have been output.                                    */
465
466
  if (b > 0)
467
    output[out++] = delimiter;
468
469
  /* Main encoding loop: */
470
471
  while (h < input_length) {
472
    /* All non-basic code points < n have been     */
473
    /* handled already.  Find the next larger one: */
474
475
    for (m = maxint, j = 0; j < input_length; ++j) {
476
      /* if (basic(input[j])) continue; */
477
      /* (not needed for Punycode) */
478
      if (input[j] >= n && input[j] < m)
479
        m = input[j];
480
    }
481
482
    /* Increase delta enough to advance the decoder's    */
483
    /* <n,i> state to <m,0>, but guard against overflow: */
484
485
    if (m - n > (maxint - delta) / (h + 1))
486
      return punycode_overflow;
487
    delta += (m - n) * (h + 1);
488
    n = m;
489
490
    for (j = 0; j < input_length; ++j) {
491
      /* Punycode does not need to check whether input[j] is basic: */
492
      if (input[j] < n /* || basic(input[j]) */) {
493
        if (++delta == 0)
494
          return punycode_overflow;
495
      }
496
497
      if (input[j] == n) {
498
        /* Represent delta as a generalized variable-length integer: */
499
500
        for (q = delta, k = base;; k += base) {
501
          if (out >= max_out)
502
            return punycode_big_output;
503
          t = k <= bias /* + tmin */ ? tmin : /* +tmin not needed */
504
            k >= bias + tmax ? tmax : k - bias;
505
          if (q < t)
506
            break;
507
          output[out++] = encode_digit(t + (q - t) % (base - t));
508
          q = (q - t) / (base - t);
509
        }
510
511
        output[out++] = encode_digit(q);
512
        bias = adapt(delta, h + 1, h == b);
513
        delta = 0;
514
        ++h;
515
      }
516
    }
517
518
    ++delta, ++n;
519
  }
520
521
  *output_length = out;
522
  return punycode_success;
523
}
524
525
static ssize_t utf8_to_utf32(const char *in, size_t inlen, punycode_uint *out, size_t outlen)
526
{
527
  size_t n = 0;
528
  const unsigned char *s = (void *)in;
529
  const unsigned char *e = (void *)(in + inlen);
530
531
  if (!outlen)
532
    return -1;
533
534
  outlen--;
535
536
  while (n < outlen) {
537
    size_t inleft = e - s;
538
539
    if (inleft >= 1 && (*s & 0x80) == 0) { /* 0xxxxxxx ASCII char */
540
      out[n++] = *s;
541
      s++;
542
    } else if (inleft >= 2 && (*s & 0xE0) == 0xC0) /* 110xxxxx 10xxxxxx */ {
543
      if ((s[1] & 0xC0) != 0x80)
544
        return -1;
545
      out[n++] = ((*s & 0x1F) << 6) | (s[1] & 0x3F);
546
      s += 2;
547
    } else if (inleft >= 3 && (*s & 0xF0) == 0xE0) /* 1110xxxx 10xxxxxx 10xxxxxx */ {
548
      if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80)
549
        return -1;
550
      out[n++] = ((*s & 0x0F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
551
      s += 3;
552
    } else if (inleft >= 4 && (*s & 0xF8) == 0xF0) /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ {
553
      if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] & 0xC0) != 0x80)
554
        return -1;
555
      out[n++] = ((*s & 0x07) << 18) | ((s[1] & 0x3F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
556
      s += 4;
557
    } else if (!inleft) {
558
      break;
559
    } else
560
      return -1;
561
  }
562
563
  return n;
564
}
565
566
static int mem_is_ascii(const char *s, size_t n)
567
{
568
  for (; n; n--) /* 'while(n--)' generates unsigned integer overflow on n = 0 */
569
    if (*((unsigned char *)s++) >= 128)
570
      return 0;
571
572
  return 1;
573
}
574
575
static int domain_to_punycode(const char *domain, char *out, size_t outsize)
576
{
577
  size_t outlen = 0, labellen;
578
  punycode_uint input[256];
579
  const char *label, *e;
580
581
  for (e = label = domain; e;) {
582
    e = strchr(label, '.');
583
    labellen = e ? (size_t) (e - label) : strlen(label);
584
585
    if (mem_is_ascii(label, labellen)) {
586
      if (outlen + labellen + (e != NULL) >= outsize)
587
        return 1;
588
589
      memcpy(out + outlen, label, labellen);
590
      outlen += labellen;
591
    } else {
592
      ssize_t inputlen = 0;
593
594
      if (outlen + labellen + (e != NULL) + 4 >= outsize)
595
        return 1;
596
597
      if ((inputlen = utf8_to_utf32(label, labellen, input, countof(input))) < 0)
598
        return 1;
599
600
      memcpy(out + outlen, "xn--", 4);
601
      outlen += 4;
602
603
      labellen = outsize - outlen - (e != NULL) - 1; // -1 to leave space for the trailing \0
604
      if (punycode_encode(inputlen, input, &labellen, out + outlen))
605
        return 1;
606
      outlen += labellen;
607
    }
608
609
    if (e) {
610
      label = e + 1;
611
      out[outlen++] = '.';
612
    }
613
    out[outlen] = 0;
614
  }
615
616
  return 0;
617
}
618
#endif
619
620
static int isspace_ascii(const char c)
621
0
{
622
0
  return c == ' ' || c == '\t' || c == '\r' || c == '\n';
623
0
}
624
625
static int str_is_ascii(const char *s)
626
0
{
627
0
  while (*s && *((unsigned char *)s) < 128) s++;
628
629
0
  return !*s;
630
0
}
631
632
#if defined(WITH_LIBIDN)
633
/*
634
 * Work around a libidn <= 1.30 vulnerability.
635
 *
636
 * The function checks for a valid UTF-8 character sequence before
637
 * passing it to idna_to_ascii_8z().
638
 *
639
 * [1] https://lists.gnu.org/archive/html/help-libidn/2015-05/msg00002.html
640
 * [2] https://lists.gnu.org/archive/html/bug-wget/2015-06/msg00002.html
641
 * [3] https://curl.haxx.se/mail/lib-2015-06/0143.html
642
 */
643
static int utf8_is_valid(const char *utf8)
644
{
645
  const unsigned char *s = (const unsigned char *) utf8;
646
647
  while (*s) {
648
    if ((*s & 0x80) == 0) /* 0xxxxxxx ASCII char */
649
      s++;
650
    else if ((*s & 0xE0) == 0xC0) /* 110xxxxx 10xxxxxx */ {
651
      if ((s[1] & 0xC0) != 0x80)
652
        return 0;
653
      s += 2;
654
    } else if ((*s & 0xF0) == 0xE0) /* 1110xxxx 10xxxxxx 10xxxxxx */ {
655
      if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80)
656
        return 0;
657
      s += 3;
658
    } else if ((*s & 0xF8) == 0xF0) /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ {
659
      if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] & 0xC0) != 0x80)
660
        return 0;
661
      s += 4;
662
    } else
663
      return 0;
664
  }
665
666
  return 1;
667
}
668
#endif
669
670
typedef void *psl_idna_t;
671
672
static psl_idna_t *psl_idna_open(void)
673
0
{
674
#if defined(WITH_LIBICU)
675
  UErrorCode status = 0;
676
  return (void *)uidna_openUTS46(UIDNA_USE_STD3_RULES | UIDNA_NONTRANSITIONAL_TO_ASCII, &status);
677
#endif
678
0
  return NULL;
679
0
}
680
681
static void psl_idna_close(psl_idna_t *idna)
682
0
{
683
0
  (void) idna;
684
685
#if defined(WITH_LIBICU)
686
  if (idna)
687
    uidna_close((UIDNA *)idna);
688
#endif
689
0
}
690
691
static int psl_idna_toASCII(psl_idna_t *idna, const char *utf8, char **ascii)
692
0
{
693
0
  int ret = -1;
694
695
#if defined(WITH_LIBICU)
696
  (void) idna;
697
698
  /* IDNA2008 UTS#46 punycode conversion */
699
  if (idna) {
700
    char lookupname_buf[128] = "", *lookupname = lookupname_buf;
701
    UErrorCode status = 0;
702
    UIDNAInfo info = UIDNA_INFO_INITIALIZER;
703
    UChar utf16_dst[128], utf16_src_buf[128];
704
    UChar *utf16_src = utf16_src_buf;
705
    int32_t utf16_src_length, bytes_written;
706
    int32_t utf16_dst_length;
707
708
    u_strFromUTF8(utf16_src, countof(utf16_src_buf), &utf16_src_length, utf8, -1, &status);
709
    if (!U_SUCCESS(status)) goto cleanup; /* UTF-8 to UTF-16 conversion failed */
710
711
    if (utf16_src_length >= (int) countof(utf16_src_buf)) {
712
      utf16_src = malloc((utf16_src_length + 1) * sizeof(UChar));
713
      if (!utf16_src) goto cleanup;
714
715
      u_strFromUTF8(utf16_src, utf16_src_length, NULL, utf8, -1, &status);
716
      if (!U_SUCCESS(status)) goto cleanup; /* UTF-8 to UTF-16 conversion failed */
717
718
      utf16_src[utf16_src_length] = 0; /* u_strFromUTF8() doesn't 0-terminate if dest is filled up */
719
    }
720
721
    utf16_dst_length = uidna_nameToASCII((UIDNA *)idna, utf16_src, utf16_src_length, utf16_dst, countof(utf16_dst), &info, &status);
722
    if (!U_SUCCESS(status)) goto cleanup; /* to ASCII conversion failed */
723
724
    u_strToUTF8(lookupname, sizeof(lookupname_buf), &bytes_written, utf16_dst, utf16_dst_length, &status);
725
    if (!U_SUCCESS(status)) goto cleanup; /* UTF-16 to UTF-8 conversion failed */
726
727
    if (bytes_written >= (int) sizeof(lookupname_buf)) {
728
      lookupname = malloc(bytes_written + 1);
729
      if (!lookupname) goto cleanup;
730
731
      u_strToUTF8(lookupname, bytes_written, NULL, utf16_dst, utf16_dst_length, &status);
732
      if (!U_SUCCESS(status)) goto cleanup; /* UTF-16 to UTF-8 conversion failed */
733
734
      lookupname[bytes_written] = 0; /* u_strToUTF8() doesn't 0-terminate if dest is filled up */
735
    } else {
736
      if (!(lookupname = strdup(lookupname)))
737
        goto cleanup;
738
    }
739
740
    if (ascii) {
741
      *ascii = lookupname;
742
      lookupname = NULL;
743
    }
744
745
    ret = 0;
746
747
cleanup:
748
    if (lookupname != lookupname_buf)
749
      free(lookupname);
750
    if (utf16_src != utf16_src_buf)
751
      free(utf16_src);
752
  }
753
#elif defined(WITH_LIBIDN2)
754
0
#if IDN2_VERSION_NUMBER >= 0x00140000
755
0
  int rc;
756
757
0
  (void) idna;
758
759
  /* IDN2_TRANSITIONAL automatically converts to lowercase
760
   * IDN2_NFC_INPUT converts to NFC before toASCII conversion
761
   * Since IDN2_TRANSITIONAL implicitly does NFC conversion, we don't need
762
   * the additional IDN2_NFC_INPUT. But just for the unlikely case that the linked
763
   * library is not matching the headers when building and it doesn't support TR46,
764
   * we provide IDN2_NFC_INPUT. */
765
766
0
  if ((rc = idn2_lookup_u8((uint8_t *)utf8, (uint8_t **)ascii, IDN2_NFC_INPUT | IDN2_NONTRANSITIONAL)) == IDN2_OK)
767
0
    ret = 0;
768
  /* else
769
    fprintf(stderr, "toASCII(%s) failed (%d): %s\n", lower, rc, idn2_strerror(rc)); */
770
#else
771
  int rc;
772
  uint8_t *lower;
773
  size_t len = u8_strlen((uint8_t *)utf8) + 1;
774
775
  /* we need a conversion to lowercase */
776
  if (!(lower = u8_tolower((uint8_t *)utf8, len, 0, UNINORM_NFKC, NULL, &len))) {
777
    /* fprintf(stderr, "u8_tolower(%s) failed (%d)\n", utf8, errno); */
778
    return -1;
779
  }
780
781
  if ((rc = idn2_lookup_u8(lower, (uint8_t **)ascii, 0)) == IDN2_OK) {
782
    ret = 0;
783
  } /* else
784
    fprintf(stderr, "toASCII(%s) failed (%d): %s\n", lower, rc, idn2_strerror(rc)); */
785
786
  free(lower);
787
#endif
788
#elif defined(WITH_LIBIDN)
789
  int rc;
790
791
  (void) idna;
792
793
  if (!utf8_is_valid(utf8)) {
794
    /* fprintf(stderr, "Invalid UTF-8 sequence not converted: '%s'\n", utf8); */
795
    return -1;
796
  }
797
798
  /* idna_to_ascii_8z() automatically converts UTF-8 to lowercase */
799
800
  if ((rc = idna_to_ascii_8z(utf8, ascii, IDNA_USE_STD3_ASCII_RULES)) == IDNA_SUCCESS) {
801
    ret = 0;
802
  } /* else
803
    fprintf(stderr, "toASCII failed (%d): %s\n", rc, idna_strerror(rc)); */
804
#else
805
  char lookupname[128];
806
807
  (void) idna;
808
809
  if (domain_to_punycode(utf8, lookupname, sizeof(lookupname)) == 0) {
810
    if (ascii)
811
      if ((*ascii = strdup(lookupname)))
812
        ret = 0;
813
  }
814
#endif
815
816
0
  return ret;
817
0
}
818
819
static void add_punycode_if_needed(psl_idna_t *idna, psl_vector_t *v, psl_entry_t *e)
820
0
{
821
0
  char *lookupname;
822
823
0
  if (str_is_ascii(e->label_buf))
824
0
    return;
825
826
0
  if (psl_idna_toASCII(idna, e->label_buf, &lookupname) == 0) {
827
0
    if (strcmp(e->label_buf, lookupname)) {
828
0
      psl_entry_t suffix, *suffixp;
829
830
      /* fprintf(stderr, "toASCII '%s' -> '%s'\n", e->label_buf, lookupname); */
831
0
      if (suffix_init(&suffix, lookupname, strlen(lookupname)) == 0) {
832
0
        suffix.flags = e->flags;
833
0
        if ((suffixp = vector_get(v, vector_add(v, &suffix))))
834
0
          suffixp->label = suffixp->label_buf; /* set label to changed address */
835
0
      }
836
0
    } /* else ignore */
837
838
0
    free(lookupname);
839
0
  }
840
0
}
841
842
/* prototypes */
843
int LookupStringInFixedSet(const unsigned char* graph, size_t length, const char* key, size_t key_length);
844
int GetUtfMode(const unsigned char *graph, size_t length);
845
846
static int is_public_suffix(const psl_ctx_t *psl, const char *domain, int type)
847
0
{
848
0
  psl_entry_t suffix;
849
0
  const char *p;
850
0
  char *punycode = NULL;
851
0
  int need_conversion = 0;
852
853
  /* this function should be called without leading dots, just make sure */
854
0
  if (*domain == '.')
855
0
    domain++;
856
857
0
  suffix.nlabels = 1;
858
859
0
  for (p = domain; *p; p++) {
860
0
    if (*p == '.') {
861
0
      if (suffix.nlabels == 255) /* weird input, avoid 8bit overflow */
862
0
        return 0;
863
0
      suffix.nlabels++;
864
0
    }
865
0
    else if (*((unsigned char *)p) >= 128)
866
0
      need_conversion = 1; /* in case domain is non-ascii we need a toASCII conversion */
867
0
  }
868
869
0
  if (suffix.nlabels == 1) {
870
    /* TLD, this is the prevailing '*' match. If type excludes the '*' rule, continue.
871
     */
872
0
    if (!(type & PSL_TYPE_NO_STAR_RULE))
873
0
      return 1;
874
0
  }
875
876
0
  type &= ~PSL_TYPE_NO_STAR_RULE;
877
878
0
  if (psl->utf8 || psl == &builtin_psl)
879
0
    need_conversion = 0;
880
881
0
  if (need_conversion) {
882
0
    psl_idna_t *idna = psl_idna_open();
883
884
0
    if (psl_idna_toASCII(idna, domain, &punycode) == 0) {
885
0
      suffix.label = punycode;
886
0
      suffix.length = strlen(punycode);
887
0
    } else {
888
      /* fallback */
889
890
0
      suffix.label = domain;
891
0
      suffix.length = p - suffix.label;
892
0
    }
893
894
0
    psl_idna_close(idna);
895
0
  } else {
896
0
    suffix.label = domain;
897
0
    suffix.length = p - suffix.label;
898
0
  }
899
900
0
  if (psl == &builtin_psl || psl->dafsa) {
901
0
    size_t dafsa_size = psl == &builtin_psl ? sizeof(kDafsa) : psl->dafsa_size;
902
0
    const unsigned char *dafsa = psl == &builtin_psl ? kDafsa : psl->dafsa;
903
0
    int rc = LookupStringInFixedSet(dafsa, dafsa_size, suffix.label, suffix.length);
904
0
    if (rc != -1) {
905
      /* check for correct rule type */
906
0
      if (type == PSL_TYPE_ICANN && !(rc & PRIV_PSL_FLAG_ICANN))
907
0
        goto suffix_no;
908
0
      else if (type == PSL_TYPE_PRIVATE && !(rc & PRIV_PSL_FLAG_PRIVATE))
909
0
        goto suffix_no;
910
911
0
      if (rc & PRIV_PSL_FLAG_EXCEPTION)
912
0
        goto suffix_no;
913
914
      /* wildcard *.foo.bar implicitly make foo.bar a public suffix */
915
      /* definitely a match, no matter if the found rule is a wildcard or not */
916
0
      goto suffix_yes;
917
0
    }
918
0
    if ((suffix.label = strchr(suffix.label, '.'))) {
919
0
      suffix.label++;
920
0
      suffix.length = strlen(suffix.label);
921
0
      suffix.nlabels--;
922
923
0
      rc = LookupStringInFixedSet(dafsa, dafsa_size, suffix.label, suffix.length);
924
0
      if (rc != -1) {
925
        /* check for correct rule type */
926
0
        if (type == PSL_TYPE_ICANN && !(rc & PRIV_PSL_FLAG_ICANN))
927
0
          goto suffix_no;
928
0
        else if (type == PSL_TYPE_PRIVATE && !(rc & PRIV_PSL_FLAG_PRIVATE))
929
0
          goto suffix_no;
930
931
0
        if (rc & PRIV_PSL_FLAG_WILDCARD)
932
0
          goto suffix_yes;
933
0
      }
934
0
    }
935
0
  } else {
936
0
    psl_entry_t *rule = vector_get(psl->suffixes, 0);
937
938
0
    if (!rule || rule->nlabels < suffix.nlabels - 1)
939
0
      goto suffix_no;
940
941
0
    rule = vector_get(psl->suffixes, vector_find(psl->suffixes, &suffix));
942
943
0
    if (rule) {
944
      /* check for correct rule type */
945
0
      if (type == PSL_TYPE_ICANN && !(rule->flags & PRIV_PSL_FLAG_ICANN))
946
0
        goto suffix_no;
947
0
      else if (type == PSL_TYPE_PRIVATE && !(rule->flags & PRIV_PSL_FLAG_PRIVATE))
948
0
        goto suffix_no;
949
950
0
      if (rule->flags & PRIV_PSL_FLAG_EXCEPTION)
951
0
        goto suffix_no;
952
953
      /* wildcard *.foo.bar implicitly make foo.bar a public suffix */
954
      /* definitely a match, no matter if the found rule is a wildcard or not */
955
0
      goto suffix_yes;
956
0
    }
957
958
0
    if ((suffix.label = strchr(suffix.label, '.'))) {
959
0
      suffix.label++;
960
0
      suffix.length = strlen(suffix.label);
961
0
      suffix.nlabels--;
962
963
0
      rule = vector_get(psl->suffixes, vector_find(psl->suffixes, &suffix));
964
965
0
      if (rule) {
966
        /* check for correct rule type */
967
0
        if (type == PSL_TYPE_ICANN && !(rule->flags & PRIV_PSL_FLAG_ICANN))
968
0
          goto suffix_no;
969
0
        else if (type == PSL_TYPE_PRIVATE && !(rule->flags & PRIV_PSL_FLAG_PRIVATE))
970
0
          goto suffix_no;
971
972
0
        if (rule->flags & PRIV_PSL_FLAG_WILDCARD)
973
0
          goto suffix_yes;
974
0
      }
975
0
    }
976
0
  }
977
978
0
suffix_no:
979
0
  if (punycode)
980
0
    free(punycode);
981
0
  return 0;
982
983
0
suffix_yes:
984
0
  if (punycode)
985
0
    free(punycode);
986
0
  return 1;
987
0
}
988
989
/**
990
 * psl_is_public_suffix:
991
 * @psl: PSL context
992
 * @domain: Domain string
993
 *
994
 * This function checks if @domain is a public suffix by the means of the
995
 * [Mozilla Public Suffix List](https://publicsuffix.org).
996
 *
997
 * For cookie domain checking see psl_is_cookie_domain_acceptable().
998
 *
999
 * International @domain names have to be either in UTF-8 (lowercase + NFKC) or in ASCII/ACE format (punycode).
1000
 * Other encodings likely result in incorrect return values.
1001
 * Use helper function psl_str_to_utf8lower() for normalization @domain.
1002
 *
1003
 * @psl is a context returned by either psl_load_file(), psl_load_fp() or
1004
 * psl_builtin().
1005
 *
1006
 * Returns: 1 if domain is a public suffix, 0 if not.
1007
 *
1008
 * Since: 0.1
1009
 */
1010
int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
1011
0
{
1012
0
  if (!psl || !domain)
1013
0
    return 1;
1014
1015
0
  return is_public_suffix(psl, domain, PSL_TYPE_ANY);
1016
0
}
1017
1018
/**
1019
 * psl_is_public_suffix2:
1020
 * @psl: PSL context
1021
 * @domain: Domain string
1022
 * @type: Domain type
1023
 *
1024
 * This function checks if @domain is a public suffix by the means of the
1025
 * [Mozilla Public Suffix List](https://publicsuffix.org).
1026
 *
1027
 * @type specifies the PSL section where to perform the lookup. Valid values are
1028
 * %PSL_TYPE_PRIVATE, %PSL_TYPE_ICANN, %PSL_TYPE_NO_STAR_RULE, and %PSL_TYPE_ANY.
1029
 *
1030
 * %PSL_TYPE_NO_STAR_RULE switches of the 'prevailing star rule' (see
1031
 * [List](https://publicsuffix.org/list) under 'Algorithm' 2.).
1032
 * Applying the flag means that TLDs not explicitly listed in the PSL are *not* treated as public suffixes.
1033
 *
1034
 * International @domain names have to be either in UTF-8 (lowercase + NFKC) or in ASCII/ACE format (punycode).
1035
 * Other encodings likely result in incorrect return values.
1036
 * Use helper function psl_str_to_utf8lower() for normalization @domain.
1037
 *
1038
 * @psl is a context returned by either psl_load_file(), psl_load_fp() or
1039
 * psl_builtin().
1040
 *
1041
 * Returns: 1 if domain is a public suffix, 0 if not.
1042
 *
1043
 * Since: 0.1
1044
 */
1045
int psl_is_public_suffix2(const psl_ctx_t *psl, const char *domain, int type)
1046
0
{
1047
0
  if (!psl || !domain)
1048
0
    return 1;
1049
1050
0
  return is_public_suffix(psl, domain, type);
1051
0
}
1052
1053
/**
1054
 * psl_unregistrable_domain:
1055
 * @psl: PSL context
1056
 * @domain: Domain string
1057
 *
1058
 * This function finds the longest public suffix part of @domain by the means
1059
 * of the [Mozilla Public Suffix List](https://publicsuffix.org).
1060
 *
1061
 * International @domain names have to be either in UTF-8 (lowercase + NFKC) or in ASCII/ACE format (punycode).
1062
 * Other encodings likely result in incorrect return values.
1063
 * Use helper function psl_str_to_utf8lower() for normalization @domain.
1064
 *
1065
 * @psl is a context returned by either psl_load_file(), psl_load_fp() or
1066
 * psl_builtin().
1067
 *
1068
 * Returns: Pointer to longest public suffix part of @domain or %NULL if @domain
1069
 * does not contain a public suffix (or if @psl is %NULL).
1070
 *
1071
 * Since: 0.1
1072
 */
1073
const char *psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain)
1074
0
{
1075
0
  int nlabels = 0;
1076
0
  const char *p;
1077
1078
0
  if (!psl || !domain)
1079
0
    return NULL;
1080
1081
  /*
1082
   * In the main loop we introduce a O(N^2) behavior to avoid code duplication.
1083
   * To avoid nasty CPU hogging, we limit the lookup to max. 8 domain labels to the right.
1084
   */
1085
0
  for (p = domain + strlen(domain) - 1; p >= domain; p--) {
1086
0
    if (*p == '.' && ++nlabels > 8) {
1087
0
      domain = p + 1;
1088
0
      break;
1089
0
    }
1090
0
  }
1091
1092
  /*
1093
   *  We check from left to right to catch special PSL entries like 'forgot.his.name':
1094
   *   'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
1095
   */
1096
1097
0
  while (!is_public_suffix(psl, domain, 0)) {
1098
0
    if ((domain = strchr(domain, '.')))
1099
0
      domain++;
1100
0
    else
1101
0
      break; /* prevent endless loop if is_public_suffix() is broken. */
1102
0
  }
1103
1104
0
  return domain;
1105
0
}
1106
1107
/**
1108
 * psl_registrable_domain:
1109
 * @psl: PSL context
1110
 * @domain: Domain string
1111
 *
1112
 * This function finds the shortest private suffix part of @domain by the means
1113
 * of the [Mozilla Public Suffix List](https://publicsuffix.org).
1114
 *
1115
 * International @domain names have to be either in UTF-8 (lowercase + NFKC) or in ASCII/ACE format (punycode).
1116
 * Other encodings likely result in incorrect return values.
1117
 * Use helper function psl_str_to_utf8lower() for normalization @domain.
1118
 *
1119
 * @psl is a context returned by either psl_load_file(), psl_load_fp() or
1120
 * psl_builtin().
1121
 *
1122
 * Returns: Pointer to shortest private suffix part of @domain or %NULL if @domain
1123
 * does not contain a private suffix (or if @psl is %NULL).
1124
 *
1125
 * Since: 0.1
1126
 */
1127
const char *psl_registrable_domain(const psl_ctx_t *psl, const char *domain)
1128
0
{
1129
0
  const char *p, *regdom = NULL;
1130
0
  int nlabels = 0;
1131
1132
0
  if (!psl || !domain || *domain == '.')
1133
0
    return NULL;
1134
1135
  /*
1136
   * In the main loop we introduce a O(N^2) behavior to avoid code duplication.
1137
   * To avoid nasty CPU hogging, we limit the lookup to max. 8 domain labels to the right.
1138
   */
1139
0
  for (p = domain + strlen(domain) - 1; p >= domain; p--) {
1140
0
    if (*p == '.' && ++nlabels > 8) {
1141
0
      domain = p + 1;
1142
0
      break;
1143
0
    }
1144
0
  }
1145
1146
  /*
1147
   *  We check from left to right to catch special PSL entries like 'forgot.his.name':
1148
   *   'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
1149
   */
1150
1151
0
  while (!is_public_suffix(psl, domain, 0)) {
1152
0
    if ((p = strchr(domain, '.'))) {
1153
0
      regdom = domain;
1154
0
      domain = p + 1;
1155
0
    } else
1156
0
      break; /* prevent endless loop if is_public_suffix() is broken. */
1157
0
  }
1158
1159
0
  return regdom;
1160
0
}
1161
1162
/**
1163
 * psl_load_file:
1164
 * @fname: Name of PSL file
1165
 *
1166
 * This function loads the public suffixes file named @fname.
1167
 * To free the allocated resources, call psl_free().
1168
 *
1169
 * The suffixes are expected to be UTF-8 encoded (lowercase + NFKC) if they are international.
1170
 *
1171
 * Returns: Pointer to a PSL context or %NULL on failure.
1172
 *
1173
 * Since: 0.1
1174
 */
1175
psl_ctx_t *psl_load_file(const char *fname)
1176
0
{
1177
0
  FILE *fp;
1178
0
  psl_ctx_t *psl = NULL;
1179
1180
0
  if (!fname)
1181
0
    return NULL;
1182
1183
0
  if ((fp = fopen(fname, "rb"))) {
1184
0
    psl = psl_load_fp(fp);
1185
0
    fclose(fp);
1186
0
  }
1187
1188
0
  return psl;
1189
0
}
1190
1191
/**
1192
 * psl_load_fp:
1193
 * @fp: %FILE pointer
1194
 *
1195
 * This function loads the public suffixes from a %FILE pointer.
1196
 * To free the allocated resources, call psl_free().
1197
 *
1198
 * The suffixes are expected to be UTF-8 encoded (lowercase + NFKC) if they are international.
1199
 *
1200
 * Returns: Pointer to a PSL context or %NULL on failure.
1201
 *
1202
 * Since: 0.1
1203
 */
1204
psl_ctx_t *psl_load_fp(FILE *fp)
1205
0
{
1206
0
  psl_ctx_t *psl;
1207
0
  psl_entry_t suffix, *suffixp;
1208
0
  char buf[256], *linep, *p;
1209
0
  int type = 0, is_dafsa;
1210
0
  psl_idna_t *idna;
1211
1212
0
  if (!fp)
1213
0
    return NULL;
1214
1215
0
  if (!(psl = calloc(1, sizeof(psl_ctx_t))))
1216
0
    return NULL;
1217
1218
  /* read first line to allow ASCII / DAFSA detection */
1219
0
  if (!(linep = fgets(buf, sizeof(buf) - 1, fp)))
1220
0
    goto fail;
1221
1222
0
  is_dafsa = strlen(buf) == 16 && !strncmp(buf, ".DAFSA@PSL_", 11);
1223
1224
0
  if (is_dafsa) {
1225
0
    void *m;
1226
0
    size_t size = 65536, n, len = 0;
1227
0
    int version = atoi(buf + 11);
1228
1229
0
    if (version != 0)
1230
0
      goto fail;
1231
1232
0
    if (!(psl->dafsa = malloc(size)))
1233
0
      goto fail;
1234
1235
0
    memcpy(psl->dafsa, buf, len);
1236
1237
0
    while ((n = fread(psl->dafsa + len, 1, size - len, fp)) > 0) {
1238
0
      len += n;
1239
0
      if (len >= size) {
1240
0
        if (!(m = realloc(psl->dafsa, size *= 2)))
1241
0
          goto fail;
1242
0
        psl->dafsa = m;
1243
0
      }
1244
0
    }
1245
1246
    /* release unused memory */
1247
0
    if ((m = realloc(psl->dafsa, len)))
1248
0
      psl->dafsa = m;
1249
0
    else if (!len)
1250
0
      psl->dafsa = NULL; /* realloc() just free'd psl->dafsa */
1251
1252
0
    psl->dafsa_size = len;
1253
0
    psl->utf8 = !!GetUtfMode(psl->dafsa, len);
1254
1255
0
    return psl;
1256
0
  }
1257
1258
0
  idna = psl_idna_open();
1259
1260
  /*
1261
   *  as of 02.11.2012, the list at https://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions.
1262
   *  as of 19.02.2014, the list at https://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions.
1263
   *  as of 07.10.2018, the list at https://publicsuffix.org/list/ contains ~8600 rules and 8 exceptions.
1264
   */
1265
0
  psl->suffixes = vector_alloc(8*1024, suffix_compare_array);
1266
0
  psl->utf8 = 1; /* we put UTF-8 and punycode rules in the lookup vector */
1267
1268
0
  do {
1269
0
    while (isspace_ascii(*linep)) linep++; /* ignore leading whitespace */
1270
0
    if (!*linep) continue; /* skip empty lines */
1271
1272
0
    if (*linep == '/' && linep[1] == '/') {
1273
0
      if (!type) {
1274
0
        if (strstr(linep + 2, "===BEGIN ICANN DOMAINS==="))
1275
0
          type = PRIV_PSL_FLAG_ICANN;
1276
0
        else if (!type && strstr(linep + 2, "===BEGIN PRIVATE DOMAINS==="))
1277
0
          type = PRIV_PSL_FLAG_PRIVATE;
1278
0
      }
1279
0
      else if (type == PRIV_PSL_FLAG_ICANN && strstr(linep + 2, "===END ICANN DOMAINS==="))
1280
0
        type = 0;
1281
0
      else if (type == PRIV_PSL_FLAG_PRIVATE && strstr(linep + 2, "===END PRIVATE DOMAINS==="))
1282
0
        type = 0;
1283
1284
0
      continue; /* skip comments */
1285
0
    }
1286
1287
    /* parse suffix rule */
1288
0
    for (p = linep; *linep && !isspace_ascii(*linep);) linep++;
1289
0
    *linep = 0;
1290
1291
0
    if (*p == '!') {
1292
0
      p++;
1293
0
      suffix.flags = PRIV_PSL_FLAG_EXCEPTION | type;
1294
0
      psl->nexceptions++;
1295
0
    } else if (*p == '*') {
1296
0
      if (*++p != '.') {
1297
        /* fprintf(stderr, "Unsupported kind of rule (ignored): %s\n", p - 1); */
1298
0
        continue;
1299
0
      }
1300
0
      p++;
1301
      /* wildcard *.foo.bar implicitly make foo.bar a public suffix */
1302
0
      suffix.flags = PRIV_PSL_FLAG_WILDCARD | PRIV_PSL_FLAG_PLAIN | type;
1303
0
      psl->nwildcards++;
1304
0
      psl->nsuffixes++;
1305
0
    } else {
1306
0
      suffix.flags = PRIV_PSL_FLAG_PLAIN | type;
1307
0
      psl->nsuffixes++;
1308
0
    }
1309
1310
0
    if (suffix_init(&suffix, p, linep - p) == 0) {
1311
0
      int index;
1312
1313
0
      if ((index = vector_find(psl->suffixes, &suffix)) >= 0) {
1314
        /* Found existing entry:
1315
         * Combination of exception and plain rule is ambiguous
1316
         * !foo.bar
1317
         * foo.bar
1318
         *
1319
         * Allowed:
1320
         * !foo.bar + *.foo.bar
1321
         * foo.bar + *.foo.bar
1322
         *
1323
         * We do not check here, let's do it later.
1324
         */
1325
1326
0
        suffixp = vector_get(psl->suffixes, index);
1327
0
        suffixp->flags |= suffix.flags;
1328
0
      } else {
1329
        /* New entry */
1330
0
        suffixp = vector_get(psl->suffixes, vector_add(psl->suffixes, &suffix));
1331
0
      }
1332
1333
0
      if (suffixp) {
1334
0
        suffixp->label = suffixp->label_buf; /* set label to changed address */
1335
0
        add_punycode_if_needed(idna, psl->suffixes, suffixp);
1336
0
      }
1337
0
    }
1338
0
  } while ((linep = fgets(buf, sizeof(buf), fp)));
1339
1340
0
  vector_sort(psl->suffixes);
1341
1342
0
  psl_idna_close(idna);
1343
1344
0
  return psl;
1345
1346
0
fail:
1347
0
  psl_free(psl);
1348
0
  return NULL;
1349
0
}
1350
1351
/**
1352
 * psl_free:
1353
 * @psl: PSL context pointer
1354
 *
1355
 * This function frees the the PSL context that has been retrieved via
1356
 * psl_load_fp() or psl_load_file().
1357
 *
1358
 * Since: 0.1
1359
 */
1360
void psl_free(psl_ctx_t *psl)
1361
0
{
1362
0
  if (psl && psl != &builtin_psl) {
1363
0
    vector_free(&psl->suffixes);
1364
0
    free(psl->dafsa);
1365
0
    free(psl);
1366
0
  }
1367
0
}
1368
1369
/**
1370
 * psl_builtin:
1371
 *
1372
 * This function returns the PSL context that has been generated and built in at compile-time.
1373
 * You don't have to free the returned context explicitly.
1374
 *
1375
 * The builtin data also contains punycode entries, one for each international domain name.
1376
 *
1377
 * If the generation of built-in data has been disabled during compilation, %NULL will be returned.
1378
 * When using the builtin psl context, you can provide UTF-8 (lowercase + NFKC) or ASCII/ACE (punycode)
1379
 * representations of domains to functions like psl_is_public_suffix().
1380
 *
1381
 * Returns: Pointer to the built in PSL data or %NULL if this data is not available.
1382
 *
1383
 * Since: 0.1
1384
 */
1385
const psl_ctx_t *psl_builtin(void)
1386
0
{
1387
0
#ifdef ENABLE_BUILTIN
1388
0
  return &builtin_psl;
1389
#else
1390
  return NULL;
1391
#endif
1392
0
}
1393
1394
/**
1395
 * psl_suffix_count:
1396
 * @psl: PSL context pointer
1397
 *
1398
 * This function returns number of public suffixes maintained by @psl.
1399
 * The number of exceptions within the Public Suffix List are not included.
1400
 *
1401
 * If the information is not available, the return value is -1 (since 0.19).
1402
 * This is the case with DAFSA blobs or if @psl is %NULL.
1403
 *
1404
 * Returns: Number of public suffixes entries in PSL context or -1 if this information is not available.
1405
 *
1406
 * Since: 0.1
1407
 */
1408
int psl_suffix_count(const psl_ctx_t *psl)
1409
0
{
1410
0
  if (psl == &builtin_psl)
1411
0
    return _psl_nsuffixes;
1412
0
  else if (psl)
1413
0
    return psl->dafsa ? -1 : psl->nsuffixes;
1414
0
  else
1415
0
    return -1;
1416
0
}
1417
1418
/**
1419
 * psl_suffix_exception_count:
1420
 * @psl: PSL context pointer
1421
 *
1422
 * This function returns number of public suffix exceptions maintained by @psl.
1423
 *
1424
 * If the information is not available, the return value is -1 (since 0.19).
1425
 * This is the case with DAFSA blobs or if @psl is %NULL.
1426
 *
1427
 * Returns: Number of public suffix exceptions in PSL context or -1 if this information is not available.
1428
 *
1429
 * Since: 0.1
1430
 */
1431
int psl_suffix_exception_count(const psl_ctx_t *psl)
1432
0
{
1433
0
  if (psl == &builtin_psl)
1434
0
    return _psl_nexceptions;
1435
0
  else if (psl)
1436
0
    return psl->dafsa ? -1 : psl->nexceptions;
1437
0
  else
1438
0
    return -1;
1439
0
}
1440
1441
/**
1442
 * psl_suffix_wildcard_count:
1443
 * @psl: PSL context pointer
1444
 *
1445
 * This function returns number of public suffix wildcards maintained by @psl.
1446
 *
1447
 * If the information is not available, the return value is -1 (since 0.19).
1448
 * This is the case with DAFSA blobs or if @psl is %NULL.
1449
 *
1450
 * Returns: Number of public suffix wildcards in PSL context or -1 if this information is not available.
1451
 *
1452
 * Since: 0.10.0
1453
 */
1454
int psl_suffix_wildcard_count(const psl_ctx_t *psl)
1455
0
{
1456
0
  if (psl == &builtin_psl)
1457
0
    return _psl_nwildcards;
1458
0
  else if (psl)
1459
0
    return psl->dafsa ? -1 : psl->nwildcards;
1460
0
  else
1461
0
    return -1;
1462
0
}
1463
1464
/**
1465
 * psl_builtin_file_time:
1466
 *
1467
 * This function returns the mtime of the Public Suffix List file that has been built in.
1468
 *
1469
 * If the generation of built-in data has been disabled during compilation, 0 will be returned.
1470
 *
1471
 * Returns: time_t value or 0.
1472
 *
1473
 * Since: 0.1
1474
 */
1475
time_t psl_builtin_file_time(void)
1476
0
{
1477
0
  return _psl_file_time;
1478
0
}
1479
1480
/**
1481
 * psl_builtin_sha1sum:
1482
 *
1483
 * This function returns the SHA1 checksum of the Public Suffix List file that has been built in.
1484
 * The returned string is in lowercase hex encoding, e.g. "2af1e9e3044eda0678bb05949d7cca2f769901d8".
1485
 *
1486
 * If the generation of built-in data has been disabled during compilation, an empty string will be returned.
1487
 *
1488
 * Returns: String containing SHA1 checksum or an empty string.
1489
 *
1490
 * Since: 0.1
1491
 */
1492
const char *psl_builtin_sha1sum(void)
1493
0
{
1494
0
  return _psl_sha1_checksum;
1495
0
}
1496
1497
/**
1498
 * psl_builtin_filename:
1499
 *
1500
 * This function returns the file name of the Public Suffix List file that has been built in.
1501
 *
1502
 * If the generation of built-in data has been disabled during compilation, an empty string will be returned.
1503
 *
1504
 * Returns: String containing the PSL file name or an empty string.
1505
 *
1506
 * Since: 0.1
1507
 */
1508
const char *psl_builtin_filename(void)
1509
0
{
1510
0
  return _psl_filename;
1511
0
}
1512
1513
/**
1514
 * psl_builtin_outdated:
1515
 *
1516
 * This function checks if the built-in data is older than the file it has been created from.
1517
 * If it is, it might be a good idea for the application to reload the PSL.
1518
 * The mtime is taken as reference.
1519
 *
1520
 * If the PSL file does not exist, it is assumed that the built-in data is not outdated.
1521
 *
1522
 * Returns: 1 if the built-in is outdated, 0 otherwise.
1523
 *
1524
 * Since: 0.10.0
1525
 */
1526
int psl_builtin_outdated(void)
1527
0
{
1528
0
  struct stat st;
1529
1530
0
  if (stat(_psl_filename, &st) == 0 && st.st_mtime > _psl_file_time)
1531
0
    return 1;
1532
1533
0
  return 0;
1534
0
}
1535
1536
/**
1537
 * psl_dist_filename:
1538
 *
1539
 * This function returns the file name of the distribution/system PSL data file.
1540
 * This file will be considered by psl_latest().
1541
 *
1542
 * Return the filename that is set by ./configure --with-psl-distfile, or an empty string.
1543
 *
1544
 * Returns: String containing a PSL file name or an empty string.
1545
 *
1546
 * Since: 0.16
1547
 */
1548
const char *psl_dist_filename(void)
1549
0
{
1550
0
  return _psl_dist_filename;
1551
0
}
1552
1553
/**
1554
 * psl_get_version:
1555
 *
1556
 * Get libpsl version.
1557
 *
1558
 * Returns: String containing version of libpsl.
1559
 *
1560
 * Since: 0.2.5
1561
 **/
1562
const char *psl_get_version(void)
1563
0
{
1564
#ifdef WITH_LIBICU
1565
  return PACKAGE_VERSION " (+libicu/" U_ICU_VERSION ")";
1566
#elif defined(WITH_LIBIDN2)
1567
0
  return PACKAGE_VERSION " (+libidn2/" IDN2_VERSION ")";
1568
#elif defined(WITH_LIBIDN)
1569
  return PACKAGE_VERSION " (+libidn/" STRINGPREP_VERSION ")";
1570
#else
1571
  return PACKAGE_VERSION " (no IDNA support)";
1572
#endif
1573
0
}
1574
1575
/**
1576
 * psl_check_version_number:
1577
 * @version: Version number (hex) to check against.
1578
 *
1579
 * Check the given version number is at minimum the current library version number.
1580
 * The version number must be a hexadecimal number like 0x000a01 (V0.10.1).
1581
 *
1582
 * Returns: Returns the library version number if the given version number is at least
1583
 * the version of the library, else return 0; If the argument is 0, the function returns
1584
 * the library version number without performing a check.
1585
 *
1586
 * Since: 0.11.0
1587
 **/
1588
int psl_check_version_number(int version)
1589
0
{
1590
0
  if (version) {
1591
0
    int major = version >> 16;
1592
0
    int minor = (version >> 8) & 0xFF;
1593
0
    int patch = version & 0xFF;
1594
1595
0
    if (major < PSL_VERSION_MAJOR
1596
0
      || (major == PSL_VERSION_MAJOR && minor < PSL_VERSION_MINOR)
1597
0
      || (major == PSL_VERSION_MAJOR && minor == PSL_VERSION_MINOR && patch < PSL_VERSION_PATCH))
1598
0
    {
1599
0
      return 0;
1600
0
    }
1601
0
  }
1602
1603
0
  return PSL_VERSION_NUMBER;
1604
0
}
1605
1606
/* return whether hostname is an IP address or not */
1607
static int isip(const char *hostname)
1608
0
{
1609
#ifdef _WIN32
1610
  WCHAR wName[INET6_ADDRSTRLEN+1];
1611
1612
  struct sockaddr_in  addr  = {0};
1613
  struct sockaddr_in6 addr6 = {0};
1614
1615
  INT size  = sizeof(addr);
1616
  INT size6 = sizeof(addr6);
1617
1618
  if (!MultiByteToWideChar(CP_UTF8, 0, hostname, -1, wName, countof(wName)))
1619
    return 0;
1620
1621
  return (WSAStringToAddressW(wName, AF_INET,  NULL, (struct sockaddr *)&addr,  &size) != SOCKET_ERROR) |
1622
         (WSAStringToAddressW(wName, AF_INET6, NULL, (struct sockaddr *)&addr6, &size6) != SOCKET_ERROR);
1623
#else
1624
0
  struct in_addr addr;
1625
0
  struct in6_addr addr6;
1626
1627
0
  return inet_pton(AF_INET, hostname, &addr) || inet_pton(AF_INET6, hostname, &addr6);
1628
0
#endif
1629
0
}
1630
1631
/**
1632
 * psl_is_cookie_domain_acceptable:
1633
 * @psl: PSL context pointer
1634
 * @hostname: The request hostname.
1635
 * @cookie_domain: The domain value from a cookie
1636
 *
1637
 * This helper function checks whether @cookie_domain is an acceptable cookie domain value for the request
1638
 * @hostname.
1639
 *
1640
 * For international domain names both, @hostname and @cookie_domain, have to be either in UTF-8 (lowercase + NFKC)
1641
 * or in ASCII/ACE (punycode) format. Other encodings or mixing UTF-8 and punycode likely result in incorrect return values.
1642
 *
1643
 * Use helper function psl_str_to_utf8lower() for normalization of @hostname and @cookie_domain.
1644
 *
1645
 * Hint for Windows users:
1646
 * Please make sure the calling application has called WSAStartup() before calling psl_is_cookie_domain_acceptable().
1647
 *
1648
 * Examples:
1649
 * 1. Cookie domain 'example.com' would be acceptable for hostname 'www.example.com',
1650
 * but '.com' or 'com' would NOT be acceptable since 'com' is a public suffix.
1651
 *
1652
 * 2. Cookie domain 'his.name' would be acceptable for hostname 'remember.his.name',
1653
 *  but NOT for 'forgot.his.name' since 'forgot.his.name' is a public suffix.
1654
 *
1655
 * Returns: 1 if acceptable, 0 if not acceptable.
1656
 *
1657
 * Since: 0.1
1658
 */
1659
int psl_is_cookie_domain_acceptable(const psl_ctx_t *psl, const char *hostname, const char *cookie_domain)
1660
0
{
1661
0
  const char *p;
1662
0
  size_t hostname_length, cookie_domain_length;
1663
1664
0
  if (!psl || !hostname || !cookie_domain)
1665
0
    return 0;
1666
1667
0
  while (*cookie_domain == '.')
1668
0
    cookie_domain++;
1669
1670
0
  if (!strcmp(hostname, cookie_domain))
1671
0
    return 1; /* an exact match is acceptable (and pretty common) */
1672
1673
0
  if (isip(hostname))
1674
0
    return 0; /* Hostname is an IP address and these must match fully (RFC 6265, 5.1.3) */
1675
1676
0
  cookie_domain_length = strlen(cookie_domain);
1677
0
  hostname_length = strlen(hostname);
1678
1679
0
  if (cookie_domain_length >= hostname_length)
1680
0
    return 0; /* cookie_domain is too long */
1681
1682
0
  p = hostname + hostname_length - cookie_domain_length;
1683
0
  if (!strcmp(p, cookie_domain) && p[-1] == '.') {
1684
    /* OK, cookie_domain matches, but it must be longer than the longest public suffix in 'hostname' */
1685
1686
0
    if (!(p = psl_unregistrable_domain(psl, hostname)))
1687
0
      return 1;
1688
1689
0
    if (cookie_domain_length > strlen(p))
1690
0
      return 1;
1691
0
  }
1692
1693
0
  return 0;
1694
0
}
1695
1696
/**
1697
 * psl_free_string:
1698
 * @str: pointer to lowercase string returned by psl_str_to_utf8lower()
1699
 *
1700
 * This function free()'s the memory allocated by psl_str_to_utf8lower() when
1701
 * returning a lowercase string
1702
 *
1703
 * Since: 0.19
1704
 */
1705
void psl_free_string(char *str)
1706
0
{
1707
0
  if (str)
1708
0
    free(str);
1709
0
}
1710
1711
#if defined(WITH_LIBIDN2) || defined(WITH_LIBIDN)
1712
/* Avoid using strcasecmp() or _stricmp() */
1713
0
static int isUTF8(const char *s) {
1714
0
  return (s[0] == 'u' || s[0] == 'U')
1715
0
    && (s[1] == 't' || s[1] == 'T')
1716
0
    && (s[2] == 'f' || s[2] == 'F')
1717
0
    && s[3] == '-' && s[4] == 0;
1718
0
}
1719
#endif
1720
1721
/**
1722
 * psl_str_to_utf8lower:
1723
 * @str: string to convert
1724
 * @encoding: charset encoding of @str, e.g. 'iso-8859-1' or %NULL
1725
 * @locale: locale of @str for to lowercase conversion, e.g. 'de' or %NULL
1726
 * @lower: return value containing the converted string
1727
 *
1728
 * This helper function converts a string to UTF-8 lowercase + NFKC representation.
1729
 * Lowercase + NFKC UTF-8 is needed as input to the domain checking functions.
1730
 *
1731
 * @lower stays unchanged on error.
1732
 *
1733
 * When returning PSL_SUCCESS, the return value 'lower' must be freed after usage.
1734
 *
1735
 * Returns: psl_error_t value.
1736
 *   PSL_SUCCESS: Success
1737
 *   PSL_ERR_INVALID_ARG: @str is a %NULL value.
1738
 *   PSL_ERR_CONVERTER: Failed to open the unicode converter with name @encoding
1739
 *   PSL_ERR_TO_UTF16: Failed to convert @str to unicode
1740
 *   PSL_ERR_TO_LOWER: Failed to convert unicode to lowercase
1741
 *   PSL_ERR_TO_UTF8: Failed to convert unicode to UTF-8
1742
 *   PSL_ERR_NO_MEM: Failed to allocate memory
1743
 *
1744
 * Since: 0.4
1745
 */
1746
psl_error_t psl_str_to_utf8lower(const char *str, const char *encoding, const char *locale, char **lower)
1747
0
{
1748
0
  int ret = PSL_ERR_INVALID_ARG;
1749
1750
0
  (void) encoding;
1751
0
  (void) locale;
1752
1753
0
  if (!str)
1754
0
    return PSL_ERR_INVALID_ARG;
1755
1756
  /* shortcut to avoid costly conversion */
1757
0
  if (str_is_ascii(str)) {
1758
0
    if (lower) {
1759
0
      char *p, *tmp;
1760
1761
0
      if (!(tmp = strdup(str)))
1762
0
        return PSL_ERR_NO_MEM;
1763
1764
0
      *lower = tmp;
1765
1766
      /* convert ASCII string to lowercase */
1767
0
      for (p = *lower; *p; p++)
1768
0
        if (isupper(*p))
1769
0
          *p = tolower(*p);
1770
0
    }
1771
0
    return PSL_SUCCESS;
1772
0
  }
1773
1774
#ifdef WITH_LIBICU
1775
#define STACK_STRLENGTH 256
1776
  do {
1777
  UErrorCode status = 0;
1778
  UChar *utf16_dst, *utf16_lower;
1779
  char *utf8_lower;
1780
  int32_t utf16_dst_length, utf16_dst_size, utf16_lower_size, utf8_lower_size;
1781
  UConverter *uconv;
1782
  UChar utf16_dst_buf[STACK_STRLENGTH * 2 + 1];
1783
  UChar utf16_lower_buf[STACK_STRLENGTH * 2 + 1];
1784
  char utf8_lower_buf[STACK_STRLENGTH * 6 + 1];
1785
  size_t str_length = strlen(str);
1786
1787
  if (str_length <= STACK_STRLENGTH) {
1788
    utf16_dst_size = countof(utf16_dst_buf);
1789
    utf16_lower_size = countof(utf16_lower_buf);
1790
    utf8_lower_size = countof(utf8_lower_buf);
1791
    utf16_dst   = utf16_dst_buf;
1792
    utf16_lower = utf16_lower_buf;
1793
    utf8_lower  = utf8_lower_buf;
1794
  } else {
1795
    utf16_dst_size = utf16_lower_size = str_length * 2 + 1;
1796
    utf8_lower_size = str_length * 6 + 1;
1797
    utf16_dst   = malloc(sizeof(UChar) * utf16_dst_size);
1798
    utf16_lower = malloc(sizeof(UChar) * utf16_lower_size);
1799
    utf8_lower  = malloc(sizeof(char) * utf8_lower_size);
1800
1801
    if (!utf16_dst || !utf16_lower || !utf8_lower) {
1802
      ret = PSL_ERR_NO_MEM;
1803
      goto out;
1804
    }
1805
  }
1806
1807
  uconv = ucnv_open(encoding, &status);
1808
  if (U_SUCCESS(status)) {
1809
    utf16_dst_length = ucnv_toUChars(uconv, utf16_dst, utf16_dst_size, str, str_length, &status);
1810
    ucnv_close(uconv);
1811
1812
    if (U_SUCCESS(status)) {
1813
      int32_t utf16_lower_length = u_strToLower(utf16_lower, utf16_lower_size, utf16_dst, utf16_dst_length, locale, &status);
1814
      if (U_SUCCESS(status)) {
1815
        u_strToUTF8(utf8_lower, utf8_lower_size, NULL, utf16_lower, utf16_lower_length, &status);
1816
        if (U_SUCCESS(status)) {
1817
          ret = PSL_SUCCESS;
1818
          if (lower) {
1819
            char *tmp = strdup(utf8_lower);
1820
1821
            if (tmp)
1822
              *lower = tmp;
1823
            else
1824
              ret = PSL_ERR_NO_MEM;
1825
          }
1826
        } else {
1827
          ret = PSL_ERR_TO_UTF8;
1828
          /* fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); */
1829
        }
1830
      } else {
1831
        ret = PSL_ERR_TO_LOWER;
1832
        /* fprintf(stderr, "Failed to convert UTF-16 to lowercase (status %d)\n", status); */
1833
      }
1834
    } else {
1835
      ret = PSL_ERR_TO_UTF16;
1836
      /* fprintf(stderr, "Failed to convert string to UTF-16 (status %d)\n", status); */
1837
    }
1838
  } else {
1839
    ret = PSL_ERR_CONVERTER;
1840
    /* fprintf(stderr, "Failed to open converter for '%s' (status %d)\n", encoding, status); */
1841
  }
1842
out:
1843
  if (utf16_dst != utf16_dst_buf)
1844
    free(utf16_dst);
1845
  if (utf16_lower != utf16_lower_buf)
1846
    free(utf16_lower);
1847
  if (utf8_lower != utf8_lower_buf)
1848
    free(utf8_lower);
1849
1850
  } while (0);
1851
#elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN)
1852
0
  do {
1853
    /* find out local charset encoding */
1854
0
    if (!encoding) {
1855
0
#ifdef HAVE_NL_LANGINFO
1856
0
      encoding = nl_langinfo(CODESET);
1857
#elif defined _WIN32
1858
      static char buf[16];
1859
      snprintf(buf, sizeof(buf), "CP%u", GetACP());
1860
      encoding = buf;
1861
#endif
1862
0
      if (!encoding || !*encoding)
1863
0
        encoding = "ASCII";
1864
0
    }
1865
1866
    /* convert to UTF-8 */
1867
0
    if (!isUTF8(encoding)) {
1868
0
      iconv_t cd = iconv_open("utf-8", encoding);
1869
1870
0
      if (cd != (iconv_t)-1) {
1871
0
        char *tmp = (char *)str; /* iconv won't change where str points to, but changes tmp itself */
1872
0
        size_t tmp_len = strlen(str) + 1;
1873
0
        size_t dst_len = tmp_len * 6, dst_len_tmp = dst_len;
1874
0
        char *dst = malloc(dst_len + 1), *dst_tmp = dst;
1875
1876
0
        if (!dst) {
1877
0
          ret = PSL_ERR_NO_MEM;
1878
0
        }
1879
0
        else if (iconv(cd, (ICONV_CONST char **)&tmp, &tmp_len, &dst_tmp, &dst_len_tmp) != (size_t)-1
1880
0
          && iconv(cd, NULL, NULL, &dst_tmp, &dst_len_tmp) != (size_t)-1)
1881
0
        {
1882
          /* start size for u8_tolower internal memory allocation.
1883
           * u8_tolower() does not terminate the result string. we have 0 byte included in above tmp_len
1884
           * and thus in len. */
1885
0
          size_t len = dst_len - dst_len_tmp;
1886
1887
0
          if ((tmp = (char *)u8_tolower((uint8_t *)dst, len, 0, UNINORM_NFKC, NULL, &len))) {
1888
0
            ret = PSL_SUCCESS;
1889
0
            if (lower) {
1890
0
              *lower = tmp;
1891
0
              tmp = NULL;
1892
0
            } else
1893
0
              free(tmp);
1894
0
          } else {
1895
0
            ret = PSL_ERR_TO_LOWER;
1896
            /* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */
1897
0
          }
1898
0
        } else {
1899
0
          ret = PSL_ERR_TO_UTF8;
1900
          /* fprintf(stderr, "Failed to convert '%s' string into '%s' (%d)\n", src_encoding, dst_encoding, errno); */
1901
0
        }
1902
1903
0
        free(dst);
1904
0
        iconv_close(cd);
1905
0
      } else {
1906
0
        ret = PSL_ERR_TO_UTF8;
1907
        /* fprintf(stderr, "Failed to prepare encoding '%s' into '%s' (%d)\n", src_encoding, dst_encoding, errno); */
1908
0
      }
1909
0
    } else {
1910
      /* we need a conversion to lowercase */
1911
0
      uint8_t *tmp;
1912
1913
      /* start size for u8_tolower internal memory allocation.
1914
       * u8_tolower() does not terminate the result string, so include terminating 0 byte in len. */
1915
0
      size_t len = u8_strlen((uint8_t *)str) + 1;
1916
1917
0
      if ((tmp = u8_tolower((uint8_t *)str, len, 0, UNINORM_NFKC, NULL, &len))) {
1918
0
        ret = PSL_SUCCESS;
1919
0
        if (lower) {
1920
0
          *lower = (char*)tmp;
1921
0
          tmp = NULL;
1922
0
        } else
1923
0
          free(tmp);
1924
0
      } else {
1925
0
        ret = PSL_ERR_TO_LOWER;
1926
        /* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */
1927
0
      }
1928
0
    }
1929
1930
0
  } while (0);
1931
0
#endif
1932
1933
0
  return ret;
1934
0
}
1935
1936
/* if file is newer than the builtin data, insert it reverse sorted by mtime */
1937
static int insert_file(const char *fname, const char **psl_fname, time_t *psl_mtime, int n)
1938
0
{
1939
0
  struct stat st;
1940
0
  int it;
1941
1942
0
  if (fname && *fname && stat(fname, &st) == 0 && st.st_mtime > _psl_file_time) {
1943
    /* add file name and mtime to end of array */
1944
0
    psl_fname[n] = fname;
1945
0
    psl_mtime[n++] = st.st_mtime;
1946
1947
    /* move the new entry to it's correct position */
1948
0
    for (it = n - 2; it >= 0 && st.st_mtime > psl_mtime[it]; it--) {
1949
0
      psl_fname[it + 1] = psl_fname[it];
1950
0
      psl_mtime[it + 1] = psl_mtime[it];
1951
0
      psl_fname[it] = fname;
1952
0
      psl_mtime[it] = st.st_mtime;
1953
0
    }
1954
0
  }
1955
1956
0
  return n;
1957
0
}
1958
1959
/**
1960
 * psl_latest:
1961
 * @fname: Name of PSL file or %NULL
1962
 *
1963
 * This function loads the the latest available PSL data from either
1964
 * - @fname (application specific filename, may be %NULL)
1965
 * - location specified during built-time (filename from ./configure --with-psl-distfile)
1966
 * - built-in PSL data (generated from ./configure --with-psl-file)
1967
 * - location of built-in data (filename from ./configure --with-psl-file)
1968
 *
1969
 * If none of the above is available, the function returns %NULL.
1970
 *
1971
 * To free the allocated resources, call psl_free().
1972
 *
1973
 * Returns: Pointer to a PSL context or %NULL on failure.
1974
 *
1975
 * Since: 0.16
1976
 */
1977
psl_ctx_t *psl_latest(const char *fname)
1978
0
{
1979
0
  psl_ctx_t *psl;
1980
0
  const char *psl_fname[3];
1981
0
  time_t psl_mtime[3];
1982
0
  int it, ntimes;
1983
1984
0
  psl_fname[0] = NULL; /* silence gcc 6.2 false warning */
1985
1986
  /* create array of PSL files reverse sorted by mtime (latest first) */
1987
0
  ntimes = insert_file(fname, psl_fname, psl_mtime, 0);
1988
0
  ntimes = insert_file(_psl_dist_filename, psl_fname, psl_mtime, ntimes);
1989
0
  ntimes = insert_file(_psl_filename, psl_fname, psl_mtime, ntimes);
1990
1991
  /* load PSL data from the latest file, falling back to the second recent, ... */
1992
0
  for (psl = NULL, it = 0; it < ntimes; it++) {
1993
0
    if (psl_mtime[it] > _psl_file_time)
1994
0
      if ((psl = psl_load_file(psl_fname[it])))
1995
0
        break;
1996
0
  }
1997
1998
  /* if file loading failed or there is no file newer than the builtin data,
1999
   * then return the builtin data. */
2000
0
  return psl ? psl : (psl_ctx_t *) psl_builtin();
2001
0
}