| Line | Count | Source (jump to first uncovered line) | 
| 1 |  | /* | 
| 2 |  |  * Copyright(c) 2014-2022 Tim Ruehsen | 
| 3 |  |  * | 
| 4 |  |  * Permission is hereby granted, free of charge, to any person obtaining a | 
| 5 |  |  * copy of this software and associated documentation files (the "Software"), | 
| 6 |  |  * to deal in the Software without restriction, including without limitation | 
| 7 |  |  * the rights to use, copy, modify, merge, publish, distribute, sublicense, | 
| 8 |  |  * and/or sell copies of the Software, and to permit persons to whom the | 
| 9 |  |  * Software is furnished to do so, subject to the following conditions: | 
| 10 |  |  * | 
| 11 |  |  * The above copyright notice and this permission notice shall be included in | 
| 12 |  |  * all copies or substantial portions of the Software. | 
| 13 |  |  * | 
| 14 |  |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
| 15 |  |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
| 16 |  |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | 
| 17 |  |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 
| 18 |  |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | 
| 19 |  |  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | 
| 20 |  |  * DEALINGS IN THE SOFTWARE. | 
| 21 |  |  * | 
| 22 |  |  * This file is part of libpsl. | 
| 23 |  |  * | 
| 24 |  |  * Public Suffix List routines | 
| 25 |  |  * | 
| 26 |  |  * Changelog | 
| 27 |  |  * 19.03.2014  Tim Ruehsen  created from libmget/cookie.c | 
| 28 |  |  * | 
| 29 |  |  */ | 
| 30 |  |  | 
| 31 |  | #if HAVE_CONFIG_H | 
| 32 |  | # include <config.h> | 
| 33 |  | #endif | 
| 34 |  |  | 
| 35 |  | #if defined(__GNUC__) && defined(__GNUC_MINOR__) | 
| 36 |  | #       define GCC_VERSION_AT_LEAST(major, minor) ((__GNUC__ > (major)) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor))) | 
| 37 |  | #else | 
| 38 |  | #       define GCC_VERSION_AT_LEAST(major, minor) 0 | 
| 39 |  | #endif | 
| 40 |  |  | 
| 41 |  | #if GCC_VERSION_AT_LEAST(2,95) | 
| 42 |  | #  define PSL_UNUSED __attribute__ ((unused)) | 
| 43 |  | #else | 
| 44 |  | #  define PSL_UNUSED | 
| 45 |  | #endif | 
| 46 |  |  | 
| 47 |  | #include <sys/types.h> | 
| 48 |  | #include <sys/stat.h> | 
| 49 |  |  | 
| 50 |  | #ifdef _WIN32 | 
| 51 |  | # include <winsock2.h> | 
| 52 |  | # include <ws2tcpip.h> | 
| 53 |  | #else | 
| 54 |  | # include <sys/socket.h> | 
| 55 |  | # include <netinet/in.h> | 
| 56 |  | # include <unistd.h> | 
| 57 |  | #endif | 
| 58 |  |  | 
| 59 |  | #if defined(_MSC_VER) && ! defined(ssize_t) | 
| 60 |  | # include <basetsd.h> | 
| 61 |  | typedef SSIZE_T ssize_t; | 
| 62 |  | #endif | 
| 63 |  |  | 
| 64 |  | #include <stdio.h> | 
| 65 |  | #include <stdlib.h> | 
| 66 |  | #include <string.h> | 
| 67 |  | #ifdef HAVE_STRINGS_H | 
| 68 |  | # include <strings.h> | 
| 69 |  | #endif | 
| 70 |  | #include <ctype.h> | 
| 71 |  | #include <time.h> | 
| 72 |  | #include <errno.h> | 
| 73 |  | #include <limits.h> /* for UINT_MAX */ | 
| 74 |  |  | 
| 75 |  | #ifdef HAVE_NL_LANGINFO | 
| 76 |  | # include <langinfo.h> | 
| 77 |  | #endif | 
| 78 |  |  | 
| 79 |  | #ifndef _WIN32 | 
| 80 |  | # include <arpa/inet.h> | 
| 81 |  | #endif | 
| 82 |  |  | 
| 83 |  | #ifdef HAVE_ALLOCA_H | 
| 84 |  | # include <alloca.h> | 
| 85 |  | #endif | 
| 86 |  |  | 
| 87 |  | #ifdef WITH_LIBICU | 
| 88 |  | # include <unicode/uversion.h> | 
| 89 |  | # include <unicode/ustring.h> | 
| 90 |  | # include <unicode/uidna.h> | 
| 91 |  | # include <unicode/ucnv.h> | 
| 92 |  | #elif defined(WITH_LIBIDN2) | 
| 93 |  | # include <iconv.h> | 
| 94 |  | # include <idn2.h> | 
| 95 |  | # include <unicase.h> | 
| 96 |  | # include <unistr.h> | 
| 97 |  | #elif defined(WITH_LIBIDN) | 
| 98 |  | # include <iconv.h> | 
| 99 |  | # include <stringprep.h> | 
| 100 |  | # include <idna.h> | 
| 101 |  | # include <unicase.h> | 
| 102 |  | # include <unistr.h> | 
| 103 |  | #endif | 
| 104 |  |  | 
| 105 |  | #ifndef WINICONV_CONST | 
| 106 |  | #  define WINICONV_CONST | 
| 107 |  | #endif | 
| 108 |  |  | 
| 109 |  | #include <libpsl.h> | 
| 110 |  |  | 
| 111 |  | /** | 
| 112 |  |  * SECTION:libpsl | 
| 113 |  |  * @short_description: Public Suffix List library functions | 
| 114 |  |  * @title: libpsl | 
| 115 |  |  * @stability: Stable | 
| 116 |  |  * @include: libpsl.h | 
| 117 |  |  * | 
| 118 |  |  * [Public Suffix List](https://publicsuffix.org/) library functions. | 
| 119 |  |  * | 
| 120 |  |  */ | 
| 121 |  |  | 
| 122 |  | #define countof(a) (sizeof(a)/sizeof(*(a))) | 
| 123 |  |  | 
| 124 | 0 | #define PRIV_PSL_FLAG_EXCEPTION (1<<0) | 
| 125 | 0 | #define PRIV_PSL_FLAG_WILDCARD  (1<<1) | 
| 126 | 0 | #define PRIV_PSL_FLAG_ICANN     (1<<2) /* entry of ICANN section */ | 
| 127 | 0 | #define PRIV_PSL_FLAG_PRIVATE   (1<<3) /* entry of PRIVATE section */ | 
| 128 | 0 | #define PRIV_PSL_FLAG_PLAIN     (1<<4) /* just used for PSL syntax checking */ | 
| 129 |  |  | 
| 130 |  | typedef struct { | 
| 131 |  |   char | 
| 132 |  |     label_buf[128]; | 
| 133 |  |   const char * | 
| 134 |  |     label; | 
| 135 |  |   unsigned short | 
| 136 |  |     length; | 
| 137 |  |   unsigned char | 
| 138 |  |     nlabels, /* number of labels */ | 
| 139 |  |     flags; | 
| 140 |  | } psl_entry_t; | 
| 141 |  |  | 
| 142 |  | /* stripped down version libmget vector routines */ | 
| 143 |  | typedef struct { | 
| 144 |  |   int | 
| 145 |  |     (*cmp)(const psl_entry_t **, const psl_entry_t **); /* comparison function */ | 
| 146 |  |   psl_entry_t | 
| 147 |  |     **entry; /* pointer to array of pointers to elements */ | 
| 148 |  |   int | 
| 149 |  |     max,     /* allocated elements */ | 
| 150 |  |     cur;     /* number of elements in use */ | 
| 151 |  | } psl_vector_t; | 
| 152 |  |  | 
| 153 |  | struct psl_ctx_st { | 
| 154 |  |   psl_vector_t | 
| 155 |  |     *suffixes; | 
| 156 |  |   unsigned char | 
| 157 |  |     *dafsa; | 
| 158 |  |   size_t | 
| 159 |  |     dafsa_size; | 
| 160 |  |   int | 
| 161 |  |     nsuffixes, | 
| 162 |  |     nexceptions, | 
| 163 |  |     nwildcards; | 
| 164 |  |   unsigned | 
| 165 |  |     utf8 : 1; /* 1: data contains UTF-8 + punycode encoded rules */ | 
| 166 |  | }; | 
| 167 |  |  | 
| 168 |  | /* include the PSL data generated by psl-make-dafsa */ | 
| 169 |  | #ifdef ENABLE_BUILTIN | 
| 170 |  | #include "suffixes_dafsa.h" | 
| 171 |  | #else | 
| 172 |  | static const unsigned char kDafsa[] = ""; | 
| 173 |  | static time_t _psl_file_time = 0; | 
| 174 |  | static int _psl_nsuffixes = 0; | 
| 175 |  | static int _psl_nexceptions = 0; | 
| 176 |  | static int _psl_nwildcards = 0; | 
| 177 |  | static const char _psl_sha1_checksum[] = ""; | 
| 178 |  | static const char _psl_filename[] = ""; | 
| 179 |  | #endif | 
| 180 |  |  | 
| 181 |  | /* references to these PSLs will result in lookups to built-in data */ | 
| 182 |  | static const psl_ctx_t | 
| 183 |  |   builtin_psl; | 
| 184 |  |  | 
| 185 |  | #ifdef PSL_DISTFILE | 
| 186 |  | static const char _psl_dist_filename[] = PSL_DISTFILE; | 
| 187 |  | #else | 
| 188 |  | static const char _psl_dist_filename[] = ""; | 
| 189 |  | #endif | 
| 190 |  |  | 
| 191 |  | static psl_vector_t *vector_alloc(int max, int (*cmp)(const psl_entry_t **, const psl_entry_t **)) | 
| 192 | 0 | { | 
| 193 | 0 |   psl_vector_t *v; | 
| 194 |  | 
 | 
| 195 | 0 |   if (!(v = calloc(1, sizeof(psl_vector_t)))) | 
| 196 | 0 |     return NULL; | 
| 197 |  |  | 
| 198 | 0 |   if (!(v->entry = malloc(max * sizeof(psl_entry_t *)))) { | 
| 199 | 0 |     free(v); | 
| 200 | 0 |     return NULL; | 
| 201 | 0 |   } | 
| 202 |  |  | 
| 203 | 0 |   v->max = max; | 
| 204 | 0 |   v->cmp = cmp; | 
| 205 | 0 |   return v; | 
| 206 | 0 | } | 
| 207 |  |  | 
| 208 |  | static void vector_free(psl_vector_t **v) | 
| 209 | 0 | { | 
| 210 | 0 |   if (v && *v) { | 
| 211 | 0 |     if ((*v)->entry) { | 
| 212 | 0 |       int it; | 
| 213 |  | 
 | 
| 214 | 0 |       for (it = 0; it < (*v)->cur; it++) | 
| 215 | 0 |         free((*v)->entry[it]); | 
| 216 |  | 
 | 
| 217 | 0 |       free((*v)->entry); | 
| 218 | 0 |     } | 
| 219 | 0 |     free(*v); | 
| 220 | 0 |   } | 
| 221 | 0 | } | 
| 222 |  |  | 
| 223 |  | static psl_entry_t *vector_get(const psl_vector_t *v, int pos) | 
| 224 | 0 | { | 
| 225 | 0 |   if (pos < 0 || !v || pos >= v->cur) return NULL; | 
| 226 |  |  | 
| 227 | 0 |   return v->entry[pos]; | 
| 228 | 0 | } | 
| 229 |  |  | 
| 230 |  | /* the entries must be sorted by */ | 
| 231 |  | static int vector_find(const psl_vector_t *v, const psl_entry_t *elem) | 
| 232 | 0 | { | 
| 233 | 0 |   if (v) { | 
| 234 | 0 |     int l, r, m; | 
| 235 | 0 |     int res; | 
| 236 |  |  | 
| 237 |  |     /* binary search for element (exact match) */ | 
| 238 | 0 |     for (l = 0, r = v->cur - 1; l <= r;) { | 
| 239 | 0 |       m = (l + r) / 2; | 
| 240 | 0 |       if ((res = v->cmp(&elem, (const psl_entry_t **)&(v->entry[m]))) > 0) l = m + 1; | 
| 241 | 0 |       else if (res < 0) r = m - 1; | 
| 242 | 0 |       else return m; | 
| 243 | 0 |     } | 
| 244 | 0 |   } | 
| 245 |  |  | 
| 246 | 0 |   return -1; /* not found */ | 
| 247 | 0 | } | 
| 248 |  |  | 
| 249 |  | static int vector_add(psl_vector_t *v, const psl_entry_t *elem) | 
| 250 | 0 | { | 
| 251 | 0 |   if (v) { | 
| 252 | 0 |     void *elemp; | 
| 253 |  | 
 | 
| 254 | 0 |     if (!(elemp = malloc(sizeof(psl_entry_t)))) | 
| 255 | 0 |       return -1; | 
| 256 |  |  | 
| 257 | 0 |     memcpy(elemp, elem, sizeof(psl_entry_t)); | 
| 258 |  | 
 | 
| 259 | 0 |     if (v->max == v->cur) { | 
| 260 | 0 |       void *m = realloc(v->entry, (v->max *= 2) * sizeof(psl_entry_t *)); | 
| 261 |  | 
 | 
| 262 | 0 |       if (m) | 
| 263 | 0 |         v->entry = m; | 
| 264 | 0 |       else { | 
| 265 | 0 |         free(elemp); | 
| 266 | 0 |         return -1; | 
| 267 | 0 |       } | 
| 268 | 0 |     } | 
| 269 |  |  | 
| 270 | 0 |     v->entry[v->cur++] = elemp; | 
| 271 | 0 |     return v->cur - 1; | 
| 272 | 0 |   } | 
| 273 |  |  | 
| 274 | 0 |   return -1; | 
| 275 | 0 | } | 
| 276 |  |  | 
| 277 |  | static void vector_sort(psl_vector_t *v) | 
| 278 | 0 | { | 
| 279 | 0 |   if (v && v->cmp) | 
| 280 | 0 |     qsort(v->entry, v->cur, sizeof(psl_vector_t **), (int(*)(const void *, const void *))v->cmp); | 
| 281 | 0 | } | 
| 282 |  |  | 
| 283 |  | /* by this kind of sorting, we can easily see if a domain matches or not */ | 
| 284 |  | static int suffix_compare(const psl_entry_t *s1, const psl_entry_t *s2) | 
| 285 | 0 | { | 
| 286 | 0 |   int n; | 
| 287 |  | 
 | 
| 288 | 0 |   if ((n = s2->nlabels - s1->nlabels)) | 
| 289 | 0 |     return n; /* most labels first */ | 
| 290 |  |  | 
| 291 | 0 |   if ((n = s1->length - s2->length)) | 
| 292 | 0 |     return n;  /* shorter rules first */ | 
| 293 |  |  | 
| 294 | 0 |   return strcmp(s1->label ? s1->label : s1->label_buf, s2->label ? s2->label : s2->label_buf); | 
| 295 | 0 | } | 
| 296 |  |  | 
| 297 |  | /* needed to sort array of pointers, given to qsort() */ | 
| 298 |  | static int suffix_compare_array(const psl_entry_t **s1, const psl_entry_t **s2) | 
| 299 | 0 | { | 
| 300 | 0 |   return suffix_compare(*s1, *s2); | 
| 301 | 0 | } | 
| 302 |  |  | 
| 303 |  | static int suffix_init(psl_entry_t *suffix, const char *rule, size_t length) | 
| 304 | 0 | { | 
| 305 | 0 |   const char *src; | 
| 306 | 0 |   char *dst; | 
| 307 |  | 
 | 
| 308 | 0 |   suffix->label = suffix->label_buf; | 
| 309 |  | 
 | 
| 310 | 0 |   if (length >= sizeof(suffix->label_buf) - 1) { | 
| 311 | 0 |     suffix->nlabels = 0; | 
| 312 |  |     /* fprintf(stderr, "Suffix rule too long (%zd, ignored): %s\n", length, rule); */ | 
| 313 | 0 |     return -1; | 
| 314 | 0 |   } | 
| 315 |  |  | 
| 316 | 0 |   suffix->length = (unsigned char)length; | 
| 317 |  | 
 | 
| 318 | 0 |   suffix->nlabels = 1; | 
| 319 |  | 
 | 
| 320 | 0 |   for (dst = suffix->label_buf, src = rule; *src;) { | 
| 321 | 0 |     if (*src == '.') | 
| 322 | 0 |       suffix->nlabels++; | 
| 323 | 0 |     *dst++ = *src++; | 
| 324 | 0 |   } | 
| 325 | 0 |   *dst = 0; | 
| 326 |  | 
 | 
| 327 | 0 |   return 0; | 
| 328 | 0 | } | 
| 329 |  |  | 
| 330 |  | #if !defined(WITH_LIBIDN) && !defined(WITH_LIBIDN2) && !defined(WITH_LIBICU) | 
| 331 |  | /* | 
| 332 |  |  * When configured without runtime IDNA support (./configure --disable-runtime), we need a pure ASCII | 
| 333 |  |  * representation of non-ASCII characters in labels as found in UTF-8 domain names. | 
| 334 |  |  * This is because the current DAFSA format used may only hold character values [21..127]. | 
| 335 |  |  * | 
| 336 |  |   Code copied from http://www.nicemice.net/idn/punycode-spec.gz on | 
| 337 |  |   2011-01-04 with SHA-1 a966a8017f6be579d74a50a226accc7607c40133 | 
| 338 |  |   labeled punycode-spec 1.0.3 (2006-Mar-24-Thu).  It is modified for | 
| 339 |  |   libpsl by Tim Rühsen.  License on the original code: | 
| 340 |  |  | 
| 341 |  |   punycode-spec 1.0.3 (2006-Mar-23-Thu) | 
| 342 |  |   http://www.nicemice.net/idn/ | 
| 343 |  |   Adam M. Costello | 
| 344 |  |   http://www.nicemice.net/amc/ | 
| 345 |  |  | 
| 346 |  |   B. Disclaimer and license | 
| 347 |  |  | 
| 348 |  |     Regarding this entire document or any portion of it (including | 
| 349 |  |     the pseudocode and C code), the author makes no guarantees and | 
| 350 |  |     is not responsible for any damage resulting from its use.  The | 
| 351 |  |     author grants irrevocable permission to anyone to use, modify, | 
| 352 |  |     and distribute it in any way that does not diminish the rights | 
| 353 |  |     of anyone else to use, modify, and distribute it, provided that | 
| 354 |  |     redistributed derivative works do not contain misleading author or | 
| 355 |  |     version information.  Derivative works need not be licensed under | 
| 356 |  |     similar terms. | 
| 357 |  |  | 
| 358 |  |   C. Punycode sample implementation | 
| 359 |  |  | 
| 360 |  |   punycode-sample.c 2.0.0 (2004-Mar-21-Sun) | 
| 361 |  |   http://www.nicemice.net/idn/ | 
| 362 |  |   Adam M. Costello | 
| 363 |  |   http://www.nicemice.net/amc/ | 
| 364 |  |  | 
| 365 |  |   This is ANSI C code (C89) implementing Punycode 1.0.x. | 
| 366 |  |  */ | 
| 367 |  | enum punycode_status { | 
| 368 |  |   punycode_success = 0, | 
| 369 |  |   punycode_bad_input = 1, /* Input is invalid.                       */ | 
| 370 |  |   punycode_big_output = 2, /* Output would exceed the space provided. */ | 
| 371 |  |   punycode_overflow = 3 /* Wider integers needed to process input. */ | 
| 372 |  | }; | 
| 373 |  |  | 
| 374 |  | #ifdef PUNYCODE_UINT | 
| 375 |  |   typedef PUNYCODE_UINT punycode_uint; | 
| 376 |  | #elif UINT_MAX >= (1 << 26) - 1 | 
| 377 |  |   typedef unsigned int punycode_uint; | 
| 378 |  | #else | 
| 379 |  |   typedef unsigned long punycode_uint; | 
| 380 |  | #endif | 
| 381 |  |  | 
| 382 |  | /*** Bootstring parameters for Punycode ***/ | 
| 383 |  | enum { | 
| 384 |  |   base = 36, tmin = 1, tmax = 26, skew = 38, damp = 700, | 
| 385 |  |   initial_bias = 72, initial_n = 0x80, delimiter = 0x2D | 
| 386 |  | }; | 
| 387 |  |  | 
| 388 |  | static char encode_digit(punycode_uint d) | 
| 389 |  | { | 
| 390 |  |   return d + 22 + 75 * (d < 26); | 
| 391 |  |   /*  0..25 map to ASCII a..z or A..Z */ | 
| 392 |  |   /* 26..35 map to ASCII 0..9         */ | 
| 393 |  | } | 
| 394 |  | #define flagged(bcp) ((punycode_uint)(bcp) - 65 < 26) | 
| 395 |  | static const punycode_uint maxint = -1; | 
| 396 |  |  | 
| 397 |  | static punycode_uint adapt(punycode_uint delta, punycode_uint numpoints, int firsttime) | 
| 398 |  | { | 
| 399 |  |   punycode_uint k; | 
| 400 |  |  | 
| 401 |  |   delta = firsttime ? delta / damp : delta >> 1; | 
| 402 |  |   /* delta >> 1 is a faster way of doing delta / 2 */ | 
| 403 |  |   delta += delta / numpoints; | 
| 404 |  |  | 
| 405 |  |   for (k = 0; delta > ((base - tmin) * tmax) / 2; k += base) { | 
| 406 |  |     delta /= base - tmin; | 
| 407 |  |   } | 
| 408 |  |  | 
| 409 |  |   return k + (base - tmin + 1) * delta / (delta + skew); | 
| 410 |  | } | 
| 411 |  |  | 
| 412 |  | static enum punycode_status punycode_encode( | 
| 413 |  |   size_t input_length_orig, | 
| 414 |  |   const punycode_uint input[], | 
| 415 |  |   size_t *output_length, | 
| 416 |  |   char output[]) | 
| 417 |  | { | 
| 418 |  |   punycode_uint input_length, n, delta, h, b, bias, j, m, q, k, t; | 
| 419 |  |   size_t out, max_out; | 
| 420 |  |  | 
| 421 |  |   /* The Punycode spec assumes that the input length is the same type */ | 
| 422 |  |   /* of integer as a code point, so we need to convert the size_t to  */ | 
| 423 |  |   /* a punycode_uint, which could overflow.                           */ | 
| 424 |  |  | 
| 425 |  |   if (input_length_orig > maxint) | 
| 426 |  |     return punycode_overflow; | 
| 427 |  |  | 
| 428 |  |   input_length = (punycode_uint) input_length_orig; | 
| 429 |  |  | 
| 430 |  |   /* Initialize the state: */ | 
| 431 |  |  | 
| 432 |  |   n = initial_n; | 
| 433 |  |   delta = 0; | 
| 434 |  |   out = 0; | 
| 435 |  |   max_out = *output_length; | 
| 436 |  |   bias = initial_bias; | 
| 437 |  |  | 
| 438 |  |   /* Handle the basic code points: */ | 
| 439 |  |   for (j = 0; j < input_length; ++j) { | 
| 440 |  |     if (input[j] < 0x80) { | 
| 441 |  |       if (max_out - out < 2) | 
| 442 |  |         return punycode_big_output; | 
| 443 |  |       output[out++] = (char) input[j]; | 
| 444 |  |     } | 
| 445 |  |     /* else if (input[j] < n) return punycode_bad_input; */ | 
| 446 |  |     /* (not needed for Punycode with unsigned code points) */ | 
| 447 |  |   } | 
| 448 |  |  | 
| 449 |  |   h = b = (punycode_uint) out; | 
| 450 |  |   /* cannot overflow because out <= input_length <= maxint */ | 
| 451 |  |  | 
| 452 |  |   /* h is the number of code points that have been handled, b is the  */ | 
| 453 |  |   /* number of basic code points, and out is the number of ASCII code */ | 
| 454 |  |   /* points that have been output.                                    */ | 
| 455 |  |  | 
| 456 |  |   if (b > 0) | 
| 457 |  |     output[out++] = delimiter; | 
| 458 |  |  | 
| 459 |  |   /* Main encoding loop: */ | 
| 460 |  |  | 
| 461 |  |   while (h < input_length) { | 
| 462 |  |     /* All non-basic code points < n have been     */ | 
| 463 |  |     /* handled already.  Find the next larger one: */ | 
| 464 |  |  | 
| 465 |  |     for (m = maxint, j = 0; j < input_length; ++j) { | 
| 466 |  |       /* if (basic(input[j])) continue; */ | 
| 467 |  |       /* (not needed for Punycode) */ | 
| 468 |  |       if (input[j] >= n && input[j] < m) | 
| 469 |  |         m = input[j]; | 
| 470 |  |     } | 
| 471 |  |  | 
| 472 |  |     /* Increase delta enough to advance the decoder's    */ | 
| 473 |  |     /* <n,i> state to <m,0>, but guard against overflow: */ | 
| 474 |  |  | 
| 475 |  |     if (m - n > (maxint - delta) / (h + 1)) | 
| 476 |  |       return punycode_overflow; | 
| 477 |  |     delta += (m - n) * (h + 1); | 
| 478 |  |     n = m; | 
| 479 |  |  | 
| 480 |  |     for (j = 0; j < input_length; ++j) { | 
| 481 |  |       /* Punycode does not need to check whether input[j] is basic: */ | 
| 482 |  |       if (input[j] < n /* || basic(input[j]) */) { | 
| 483 |  |         if (++delta == 0) | 
| 484 |  |           return punycode_overflow; | 
| 485 |  |       } | 
| 486 |  |  | 
| 487 |  |       if (input[j] == n) { | 
| 488 |  |         /* Represent delta as a generalized variable-length integer: */ | 
| 489 |  |  | 
| 490 |  |         for (q = delta, k = base;; k += base) { | 
| 491 |  |           if (out >= max_out) | 
| 492 |  |             return punycode_big_output; | 
| 493 |  |           t = k <= bias /* + tmin */ ? tmin : /* +tmin not needed */ | 
| 494 |  |             k >= bias + tmax ? tmax : k - bias; | 
| 495 |  |           if (q < t) | 
| 496 |  |             break; | 
| 497 |  |           output[out++] = encode_digit(t + (q - t) % (base - t)); | 
| 498 |  |           q = (q - t) / (base - t); | 
| 499 |  |         } | 
| 500 |  |  | 
| 501 |  |         output[out++] = encode_digit(q); | 
| 502 |  |         bias = adapt(delta, h + 1, h == b); | 
| 503 |  |         delta = 0; | 
| 504 |  |         ++h; | 
| 505 |  |       } | 
| 506 |  |     } | 
| 507 |  |  | 
| 508 |  |     ++delta, ++n; | 
| 509 |  |   } | 
| 510 |  |  | 
| 511 |  |   *output_length = out; | 
| 512 |  |   return punycode_success; | 
| 513 |  | } | 
| 514 |  |  | 
| 515 |  | static ssize_t utf8_to_utf32(const char *in, size_t inlen, punycode_uint *out, size_t outlen) | 
| 516 |  | { | 
| 517 |  |   size_t n = 0; | 
| 518 |  |   const unsigned char *s = (void *)in; | 
| 519 |  |   const unsigned char *e = (void *)(in + inlen); | 
| 520 |  |  | 
| 521 |  |   if (!outlen) | 
| 522 |  |     return -1; | 
| 523 |  |  | 
| 524 |  |   outlen--; | 
| 525 |  |  | 
| 526 |  |   while (n < outlen) { | 
| 527 |  |     size_t inleft = e - s; | 
| 528 |  |  | 
| 529 |  |     if (inleft >= 1 && (*s & 0x80) == 0) { /* 0xxxxxxx ASCII char */ | 
| 530 |  |       out[n++] = *s; | 
| 531 |  |       s++; | 
| 532 |  |     } else if (inleft >= 2 && (*s & 0xE0) == 0xC0) /* 110xxxxx 10xxxxxx */ { | 
| 533 |  |       if ((s[1] & 0xC0) != 0x80) | 
| 534 |  |         return -1; | 
| 535 |  |       out[n++] = ((*s & 0x1F) << 6) | (s[1] & 0x3F); | 
| 536 |  |       s += 2; | 
| 537 |  |     } else if (inleft >= 3 && (*s & 0xF0) == 0xE0) /* 1110xxxx 10xxxxxx 10xxxxxx */ { | 
| 538 |  |       if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80) | 
| 539 |  |         return -1; | 
| 540 |  |       out[n++] = ((*s & 0x0F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F); | 
| 541 |  |       s += 3; | 
| 542 |  |     } else if (inleft >= 4 && (*s & 0xF8) == 0xF0) /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ { | 
| 543 |  |       if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] & 0xC0) != 0x80) | 
| 544 |  |         return -1; | 
| 545 |  |       out[n++] = ((*s & 0x07) << 18) | ((s[1] & 0x3F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F); | 
| 546 |  |       s += 4; | 
| 547 |  |     } else if (!inleft) { | 
| 548 |  |       break; | 
| 549 |  |     } else | 
| 550 |  |       return -1; | 
| 551 |  |   } | 
| 552 |  |  | 
| 553 |  |   return n; | 
| 554 |  | } | 
| 555 |  |  | 
| 556 |  | static int mem_is_ascii(const char *s, size_t n) | 
| 557 |  | { | 
| 558 |  |   for (; n; n--) /* 'while(n--)' generates unsigned integer overflow on n = 0 */ | 
| 559 |  |     if (*((unsigned char *)s++) >= 128) | 
| 560 |  |       return 0; | 
| 561 |  |  | 
| 562 |  |   return 1; | 
| 563 |  | } | 
| 564 |  |  | 
| 565 |  | static int domain_to_punycode(const char *domain, char *out, size_t outsize) | 
| 566 |  | { | 
| 567 |  |   size_t outlen = 0, labellen; | 
| 568 |  |   punycode_uint input[256]; | 
| 569 |  |   const char *label, *e; | 
| 570 |  |  | 
| 571 |  |   for (e = label = domain; e;) { | 
| 572 |  |     e = strchr(label, '.'); | 
| 573 |  |     labellen = e ? (size_t) (e - label) : strlen(label); | 
| 574 |  |  | 
| 575 |  |     if (mem_is_ascii(label, labellen)) { | 
| 576 |  |       if (outlen + labellen + (e != NULL) >= outsize) | 
| 577 |  |         return 1; | 
| 578 |  |  | 
| 579 |  |       memcpy(out + outlen, label, labellen); | 
| 580 |  |       outlen += labellen; | 
| 581 |  |     } else { | 
| 582 |  |       ssize_t inputlen = 0; | 
| 583 |  |  | 
| 584 |  |       if (outlen + labellen + (e != NULL) + 4 >= outsize) | 
| 585 |  |         return 1; | 
| 586 |  |  | 
| 587 |  |       if ((inputlen = utf8_to_utf32(label, labellen, input, countof(input))) < 0) | 
| 588 |  |         return 1; | 
| 589 |  |  | 
| 590 |  |       memcpy(out + outlen, "xn--", 4); | 
| 591 |  |       outlen += 4; | 
| 592 |  |  | 
| 593 |  |       labellen = outsize - outlen - (e != NULL) - 1; // -1 to leave space for the trailing \0 | 
| 594 |  |       if (punycode_encode(inputlen, input, &labellen, out + outlen)) | 
| 595 |  |         return 1; | 
| 596 |  |       outlen += labellen; | 
| 597 |  |     } | 
| 598 |  |  | 
| 599 |  |     if (e) { | 
| 600 |  |       label = e + 1; | 
| 601 |  |       out[outlen++] = '.'; | 
| 602 |  |     } | 
| 603 |  |     out[outlen] = 0; | 
| 604 |  |   } | 
| 605 |  |  | 
| 606 |  |   return 0; | 
| 607 |  | } | 
| 608 |  | #endif | 
| 609 |  |  | 
| 610 |  | static int isspace_ascii(const char c) | 
| 611 | 0 | { | 
| 612 | 0 |   return c == ' ' || c == '\t' || c == '\r' || c == '\n'; | 
| 613 | 0 | } | 
| 614 |  |  | 
| 615 |  | static int str_is_ascii(const char *s) | 
| 616 | 0 | { | 
| 617 | 0 |   while (*s && *((unsigned char *)s) < 128) s++; | 
| 618 |  | 
 | 
| 619 | 0 |   return !*s; | 
| 620 | 0 | } | 
| 621 |  |  | 
| 622 |  | #if defined(WITH_LIBIDN) | 
| 623 |  | /* | 
| 624 |  |  * Work around a libidn <= 1.30 vulnerability. | 
| 625 |  |  * | 
| 626 |  |  * The function checks for a valid UTF-8 character sequence before | 
| 627 |  |  * passing it to idna_to_ascii_8z(). | 
| 628 |  |  * | 
| 629 |  |  * [1] https://lists.gnu.org/archive/html/help-libidn/2015-05/msg00002.html | 
| 630 |  |  * [2] https://lists.gnu.org/archive/html/bug-wget/2015-06/msg00002.html | 
| 631 |  |  * [3] https://curl.haxx.se/mail/lib-2015-06/0143.html | 
| 632 |  |  */ | 
| 633 |  | static int utf8_is_valid(const char *utf8) | 
| 634 |  | { | 
| 635 |  |   const unsigned char *s = (const unsigned char *) utf8; | 
| 636 |  |  | 
| 637 |  |   while (*s) { | 
| 638 |  |     if ((*s & 0x80) == 0) /* 0xxxxxxx ASCII char */ | 
| 639 |  |       s++; | 
| 640 |  |     else if ((*s & 0xE0) == 0xC0) /* 110xxxxx 10xxxxxx */ { | 
| 641 |  |       if ((s[1] & 0xC0) != 0x80) | 
| 642 |  |         return 0; | 
| 643 |  |       s += 2; | 
| 644 |  |     } else if ((*s & 0xF0) == 0xE0) /* 1110xxxx 10xxxxxx 10xxxxxx */ { | 
| 645 |  |       if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80) | 
| 646 |  |         return 0; | 
| 647 |  |       s += 3; | 
| 648 |  |     } else if ((*s & 0xF8) == 0xF0) /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ { | 
| 649 |  |       if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] & 0xC0) != 0x80) | 
| 650 |  |         return 0; | 
| 651 |  |       s += 4; | 
| 652 |  |     } else | 
| 653 |  |       return 0; | 
| 654 |  |   } | 
| 655 |  |  | 
| 656 |  |   return 1; | 
| 657 |  | } | 
| 658 |  | #endif | 
| 659 |  |  | 
| 660 |  | typedef void *psl_idna_t; | 
| 661 |  |  | 
| 662 |  | static psl_idna_t *psl_idna_open(void) | 
| 663 | 0 | { | 
| 664 |  | #if defined(WITH_LIBICU) | 
| 665 |  |   UErrorCode status = 0; | 
| 666 |  |   return (void *)uidna_openUTS46(UIDNA_USE_STD3_RULES | UIDNA_NONTRANSITIONAL_TO_ASCII, &status); | 
| 667 |  | #endif | 
| 668 | 0 |   return NULL; | 
| 669 | 0 | } | 
| 670 |  |  | 
| 671 |  | static void psl_idna_close(psl_idna_t *idna PSL_UNUSED) | 
| 672 | 0 | { | 
| 673 |  | #if defined(WITH_LIBICU) | 
| 674 |  |   if (idna) | 
| 675 |  |     uidna_close((UIDNA *)idna); | 
| 676 |  | #endif | 
| 677 | 0 | } | 
| 678 |  |  | 
| 679 |  | static int psl_idna_toASCII(psl_idna_t *idna PSL_UNUSED, const char *utf8, char **ascii) | 
| 680 | 0 | { | 
| 681 | 0 |   int ret = -1; | 
| 682 |  | 
 | 
| 683 |  | #if defined(WITH_LIBICU) | 
| 684 |  |   /* IDNA2008 UTS#46 punycode conversion */ | 
| 685 |  |   if (idna) { | 
| 686 |  |     char lookupname_buf[128] = "", *lookupname = lookupname_buf; | 
| 687 |  |     UErrorCode status = 0; | 
| 688 |  |     UIDNAInfo info = UIDNA_INFO_INITIALIZER; | 
| 689 |  |     UChar utf16_dst[128], utf16_src_buf[128]; | 
| 690 |  |     UChar *utf16_src = utf16_src_buf; | 
| 691 |  |     int32_t utf16_src_length, bytes_written; | 
| 692 |  |     int32_t utf16_dst_length; | 
| 693 |  |  | 
| 694 |  |     u_strFromUTF8(utf16_src, countof(utf16_src_buf), &utf16_src_length, utf8, -1, &status); | 
| 695 |  |     if (!U_SUCCESS(status)) goto cleanup; /* UTF-8 to UTF-16 conversion failed */ | 
| 696 |  |  | 
| 697 |  |     if (utf16_src_length >= (int) countof(utf16_src_buf)) { | 
| 698 |  |       utf16_src = malloc((utf16_src_length + 1) * sizeof(UChar)); | 
| 699 |  |       if (!utf16_src) goto cleanup; | 
| 700 |  |  | 
| 701 |  |       u_strFromUTF8(utf16_src, utf16_src_length, NULL, utf8, -1, &status); | 
| 702 |  |       if (!U_SUCCESS(status)) goto cleanup; /* UTF-8 to UTF-16 conversion failed */ | 
| 703 |  |  | 
| 704 |  |       utf16_src[utf16_src_length] = 0; /* u_strFromUTF8() doesn't 0-terminate if dest is filled up */ | 
| 705 |  |     } | 
| 706 |  |  | 
| 707 |  |     utf16_dst_length = uidna_nameToASCII((UIDNA *)idna, utf16_src, utf16_src_length, utf16_dst, countof(utf16_dst), &info, &status); | 
| 708 |  |     if (!U_SUCCESS(status)) goto cleanup; /* to ASCII conversion failed */ | 
| 709 |  |  | 
| 710 |  |     u_strToUTF8(lookupname, sizeof(lookupname_buf), &bytes_written, utf16_dst, utf16_dst_length, &status); | 
| 711 |  |     if (!U_SUCCESS(status)) goto cleanup; /* UTF-16 to UTF-8 conversion failed */ | 
| 712 |  |  | 
| 713 |  |     if (bytes_written >= (int) sizeof(lookupname_buf)) { | 
| 714 |  |       lookupname = malloc(bytes_written + 1); | 
| 715 |  |       if (!lookupname) goto cleanup; | 
| 716 |  |  | 
| 717 |  |       u_strToUTF8(lookupname, bytes_written, NULL, utf16_dst, utf16_dst_length, &status); | 
| 718 |  |       if (!U_SUCCESS(status)) goto cleanup; /* UTF-16 to UTF-8 conversion failed */ | 
| 719 |  |  | 
| 720 |  |       lookupname[bytes_written] = 0; /* u_strToUTF8() doesn't 0-terminate if dest is filled up */ | 
| 721 |  |     } else { | 
| 722 |  |       if (!(lookupname = strdup(lookupname))) | 
| 723 |  |         goto cleanup; | 
| 724 |  |     } | 
| 725 |  |  | 
| 726 |  |     if (ascii) { | 
| 727 |  |       *ascii = lookupname; | 
| 728 |  |       lookupname = NULL; | 
| 729 |  |     } | 
| 730 |  |  | 
| 731 |  |     ret = 0; | 
| 732 |  |  | 
| 733 |  | cleanup: | 
| 734 |  |     if (lookupname != lookupname_buf) | 
| 735 |  |       free(lookupname); | 
| 736 |  |     if (utf16_src != utf16_src_buf) | 
| 737 |  |       free(utf16_src); | 
| 738 |  |   } | 
| 739 |  | #elif defined(WITH_LIBIDN2) | 
| 740 | 0 | #if IDN2_VERSION_NUMBER >= 0x00140000 | 
| 741 | 0 |   int rc; | 
| 742 |  |  | 
| 743 |  |   /* IDN2_TRANSITIONAL automatically converts to lowercase | 
| 744 |  |    * IDN2_NFC_INPUT converts to NFC before toASCII conversion | 
| 745 |  |    * Since IDN2_TRANSITIONAL implicitly does NFC conversion, we don't need | 
| 746 |  |    * the additional IDN2_NFC_INPUT. But just for the unlikely case that the linked | 
| 747 |  |    * library is not matching the headers when building and it doesn't support TR46, | 
| 748 |  |    * we provide IDN2_NFC_INPUT. */ | 
| 749 |  | 
 | 
| 750 | 0 |   if ((rc = idn2_lookup_u8((uint8_t *)utf8, (uint8_t **)ascii, IDN2_NFC_INPUT | IDN2_NONTRANSITIONAL)) == IDN2_OK) | 
| 751 | 0 |     ret = 0; | 
| 752 |  |   /* else | 
| 753 |  |     fprintf(stderr, "toASCII(%s) failed (%d): %s\n", lower, rc, idn2_strerror(rc)); */ | 
| 754 |  | #else | 
| 755 |  |   int rc; | 
| 756 |  |   uint8_t *lower; | 
| 757 |  |   size_t len = u8_strlen((uint8_t *)utf8) + 1; | 
| 758 |  |  | 
| 759 |  |   /* we need a conversion to lowercase */ | 
| 760 |  |   if (!(lower = u8_tolower((uint8_t *)utf8, len, 0, UNINORM_NFKC, NULL, &len))) { | 
| 761 |  |     /* fprintf(stderr, "u8_tolower(%s) failed (%d)\n", utf8, errno); */ | 
| 762 |  |     return -1; | 
| 763 |  |   } | 
| 764 |  |  | 
| 765 |  |   if ((rc = idn2_lookup_u8(lower, (uint8_t **)ascii, 0)) == IDN2_OK) { | 
| 766 |  |     ret = 0; | 
| 767 |  |   } /* else | 
| 768 |  |     fprintf(stderr, "toASCII(%s) failed (%d): %s\n", lower, rc, idn2_strerror(rc)); */ | 
| 769 |  |  | 
| 770 |  |   free(lower); | 
| 771 |  | #endif | 
| 772 |  | #elif defined(WITH_LIBIDN) | 
| 773 |  |   int rc; | 
| 774 |  |  | 
| 775 |  |   if (!utf8_is_valid(utf8)) { | 
| 776 |  |     /* fprintf(stderr, "Invalid UTF-8 sequence not converted: '%s'\n", utf8); */ | 
| 777 |  |     return -1; | 
| 778 |  |   } | 
| 779 |  |  | 
| 780 |  |   /* idna_to_ascii_8z() automatically converts UTF-8 to lowercase */ | 
| 781 |  |  | 
| 782 |  |   if ((rc = idna_to_ascii_8z(utf8, ascii, IDNA_USE_STD3_ASCII_RULES)) == IDNA_SUCCESS) { | 
| 783 |  |     ret = 0; | 
| 784 |  |   } /* else | 
| 785 |  |     fprintf(stderr, "toASCII failed (%d): %s\n", rc, idna_strerror(rc)); */ | 
| 786 |  | #else | 
| 787 |  |   char lookupname[128]; | 
| 788 |  |  | 
| 789 |  |   if (domain_to_punycode(utf8, lookupname, sizeof(lookupname)) == 0) { | 
| 790 |  |     if (ascii) | 
| 791 |  |       if ((*ascii = strdup(lookupname))) | 
| 792 |  |         ret = 0; | 
| 793 |  |   } | 
| 794 |  | #endif | 
| 795 |  | 
 | 
| 796 | 0 |   return ret; | 
| 797 | 0 | } | 
| 798 |  |  | 
| 799 |  | static void add_punycode_if_needed(psl_idna_t *idna, psl_vector_t *v, psl_entry_t *e) | 
| 800 | 0 | { | 
| 801 | 0 |   char *lookupname; | 
| 802 |  | 
 | 
| 803 | 0 |   if (str_is_ascii(e->label_buf)) | 
| 804 | 0 |     return; | 
| 805 |  |  | 
| 806 | 0 |   if (psl_idna_toASCII(idna, e->label_buf, &lookupname) == 0) { | 
| 807 | 0 |     if (strcmp(e->label_buf, lookupname)) { | 
| 808 | 0 |       psl_entry_t suffix, *suffixp; | 
| 809 |  |  | 
| 810 |  |       /* fprintf(stderr, "toASCII '%s' -> '%s'\n", e->label_buf, lookupname); */ | 
| 811 | 0 |       if (suffix_init(&suffix, lookupname, strlen(lookupname)) == 0) { | 
| 812 | 0 |         suffix.flags = e->flags; | 
| 813 | 0 |         if ((suffixp = vector_get(v, vector_add(v, &suffix)))) | 
| 814 | 0 |           suffixp->label = suffixp->label_buf; /* set label to changed address */ | 
| 815 | 0 |       } | 
| 816 | 0 |     } /* else ignore */ | 
| 817 |  | 
 | 
| 818 | 0 |     free(lookupname); | 
| 819 | 0 |   } | 
| 820 | 0 | } | 
| 821 |  |  | 
| 822 |  | /* prototypes */ | 
| 823 |  | int LookupStringInFixedSet(const unsigned char* graph, size_t length, const char* key, size_t key_length); | 
| 824 |  | int GetUtfMode(const unsigned char *graph, size_t length); | 
| 825 |  |  | 
| 826 |  | static int is_public_suffix(const psl_ctx_t *psl, const char *domain, int type) | 
| 827 | 0 | { | 
| 828 | 0 |   psl_entry_t suffix; | 
| 829 | 0 |   const char *p; | 
| 830 | 0 |   char *punycode = NULL; | 
| 831 | 0 |   int need_conversion = 0; | 
| 832 |  |  | 
| 833 |  |   /* this function should be called without leading dots, just make sure */ | 
| 834 | 0 |   if (*domain == '.') | 
| 835 | 0 |     domain++; | 
| 836 |  | 
 | 
| 837 | 0 |   suffix.nlabels = 1; | 
| 838 |  | 
 | 
| 839 | 0 |   for (p = domain; *p; p++) { | 
| 840 | 0 |     if (*p == '.') { | 
| 841 | 0 |       if (suffix.nlabels == 255) // weird input, avoid 8bit overflow | 
| 842 | 0 |         return 0; | 
| 843 | 0 |       suffix.nlabels++; | 
| 844 | 0 |     } | 
| 845 | 0 |     else if (*((unsigned char *)p) >= 128) | 
| 846 | 0 |       need_conversion = 1; /* in case domain is non-ascii we need a toASCII conversion */ | 
| 847 | 0 |   } | 
| 848 |  |  | 
| 849 | 0 |   if (suffix.nlabels == 1) { | 
| 850 |  |     /* TLD, this is the prevailing '*' match. If type excludes the '*' rule, continue. | 
| 851 |  |      */ | 
| 852 | 0 |     if (!(type & PSL_TYPE_NO_STAR_RULE)) | 
| 853 | 0 |       return 1; | 
| 854 | 0 |   } | 
| 855 |  |  | 
| 856 | 0 |   type &= ~PSL_TYPE_NO_STAR_RULE; | 
| 857 |  | 
 | 
| 858 | 0 |   if (psl->utf8 || psl == &builtin_psl) | 
| 859 | 0 |     need_conversion = 0; | 
| 860 |  | 
 | 
| 861 | 0 |   if (need_conversion) { | 
| 862 | 0 |     psl_idna_t *idna = psl_idna_open(); | 
| 863 |  | 
 | 
| 864 | 0 |     if (psl_idna_toASCII(idna, domain, &punycode) == 0) { | 
| 865 | 0 |       suffix.label = punycode; | 
| 866 | 0 |       suffix.length = strlen(punycode); | 
| 867 | 0 |     } else { | 
| 868 |  |       /* fallback */ | 
| 869 |  | 
 | 
| 870 | 0 |       suffix.label = domain; | 
| 871 | 0 |       suffix.length = p - suffix.label; | 
| 872 | 0 |     } | 
| 873 |  | 
 | 
| 874 | 0 |     psl_idna_close(idna); | 
| 875 | 0 |   } else { | 
| 876 | 0 |     suffix.label = domain; | 
| 877 | 0 |     suffix.length = p - suffix.label; | 
| 878 | 0 |   } | 
| 879 |  | 
 | 
| 880 | 0 |   if (psl == &builtin_psl || psl->dafsa) { | 
| 881 | 0 |     size_t dafsa_size = psl == &builtin_psl ? sizeof(kDafsa) : psl->dafsa_size; | 
| 882 | 0 |     const unsigned char *dafsa = psl == &builtin_psl ? kDafsa : psl->dafsa; | 
| 883 | 0 |     int rc = LookupStringInFixedSet(dafsa, dafsa_size, suffix.label, suffix.length); | 
| 884 | 0 |     if (rc != -1) { | 
| 885 |  |       /* check for correct rule type */ | 
| 886 | 0 |       if (type == PSL_TYPE_ICANN && !(rc & PRIV_PSL_FLAG_ICANN)) | 
| 887 | 0 |         goto suffix_no; | 
| 888 | 0 |       else if (type == PSL_TYPE_PRIVATE && !(rc & PRIV_PSL_FLAG_PRIVATE)) | 
| 889 | 0 |         goto suffix_no; | 
| 890 |  |  | 
| 891 | 0 |       if (rc & PRIV_PSL_FLAG_EXCEPTION) | 
| 892 | 0 |         goto suffix_no; | 
| 893 |  |  | 
| 894 |  |       /* wildcard *.foo.bar implicitly make foo.bar a public suffix */ | 
| 895 |  |       /* definitely a match, no matter if the found rule is a wildcard or not */ | 
| 896 | 0 |       goto suffix_yes; | 
| 897 | 0 |     } | 
| 898 | 0 |     if ((suffix.label = strchr(suffix.label, '.'))) { | 
| 899 | 0 |       suffix.label++; | 
| 900 | 0 |       suffix.length = strlen(suffix.label); | 
| 901 | 0 |       suffix.nlabels--; | 
| 902 |  | 
 | 
| 903 | 0 |       rc = LookupStringInFixedSet(dafsa, dafsa_size, suffix.label, suffix.length); | 
| 904 | 0 |       if (rc != -1) { | 
| 905 |  |         /* check for correct rule type */ | 
| 906 | 0 |         if (type == PSL_TYPE_ICANN && !(rc & PRIV_PSL_FLAG_ICANN)) | 
| 907 | 0 |           goto suffix_no; | 
| 908 | 0 |         else if (type == PSL_TYPE_PRIVATE && !(rc & PRIV_PSL_FLAG_PRIVATE)) | 
| 909 | 0 |           goto suffix_no; | 
| 910 |  |  | 
| 911 | 0 |         if (rc & PRIV_PSL_FLAG_WILDCARD) | 
| 912 | 0 |           goto suffix_yes; | 
| 913 | 0 |       } | 
| 914 | 0 |     } | 
| 915 | 0 |   } else { | 
| 916 | 0 |     psl_entry_t *rule = vector_get(psl->suffixes, 0); | 
| 917 |  | 
 | 
| 918 | 0 |     if (!rule || rule->nlabels < suffix.nlabels - 1) | 
| 919 | 0 |       goto suffix_no; | 
| 920 |  |  | 
| 921 | 0 |     rule = vector_get(psl->suffixes, vector_find(psl->suffixes, &suffix)); | 
| 922 |  | 
 | 
| 923 | 0 |     if (rule) { | 
| 924 |  |       /* check for correct rule type */ | 
| 925 | 0 |       if (type == PSL_TYPE_ICANN && !(rule->flags & PRIV_PSL_FLAG_ICANN)) | 
| 926 | 0 |         goto suffix_no; | 
| 927 | 0 |       else if (type == PSL_TYPE_PRIVATE && !(rule->flags & PRIV_PSL_FLAG_PRIVATE)) | 
| 928 | 0 |         goto suffix_no; | 
| 929 |  |  | 
| 930 | 0 |       if (rule->flags & PRIV_PSL_FLAG_EXCEPTION) | 
| 931 | 0 |         goto suffix_no; | 
| 932 |  |  | 
| 933 |  |       /* wildcard *.foo.bar implicitly make foo.bar a public suffix */ | 
| 934 |  |       /* definitely a match, no matter if the found rule is a wildcard or not */ | 
| 935 | 0 |       goto suffix_yes; | 
| 936 | 0 |     } | 
| 937 |  |  | 
| 938 | 0 |     if ((suffix.label = strchr(suffix.label, '.'))) { | 
| 939 | 0 |       suffix.label++; | 
| 940 | 0 |       suffix.length = strlen(suffix.label); | 
| 941 | 0 |       suffix.nlabels--; | 
| 942 |  | 
 | 
| 943 | 0 |       rule = vector_get(psl->suffixes, vector_find(psl->suffixes, &suffix)); | 
| 944 |  | 
 | 
| 945 | 0 |       if (rule) { | 
| 946 |  |         /* check for correct rule type */ | 
| 947 | 0 |         if (type == PSL_TYPE_ICANN && !(rule->flags & PRIV_PSL_FLAG_ICANN)) | 
| 948 | 0 |           goto suffix_no; | 
| 949 | 0 |         else if (type == PSL_TYPE_PRIVATE && !(rule->flags & PRIV_PSL_FLAG_PRIVATE)) | 
| 950 | 0 |           goto suffix_no; | 
| 951 |  |  | 
| 952 | 0 |         if (rule->flags & PRIV_PSL_FLAG_WILDCARD) | 
| 953 | 0 |           goto suffix_yes; | 
| 954 | 0 |       } | 
| 955 | 0 |     } | 
| 956 | 0 |   } | 
| 957 |  |  | 
| 958 | 0 | suffix_no: | 
| 959 | 0 |   if (punycode) | 
| 960 | 0 |     free(punycode); | 
| 961 | 0 |   return 0; | 
| 962 |  |  | 
| 963 | 0 | suffix_yes: | 
| 964 | 0 |   if (punycode) | 
| 965 | 0 |     free(punycode); | 
| 966 | 0 |   return 1; | 
| 967 | 0 | } | 
| 968 |  |  | 
| 969 |  | /** | 
| 970 |  |  * psl_is_public_suffix: | 
| 971 |  |  * @psl: PSL context | 
| 972 |  |  * @domain: Domain string | 
| 973 |  |  * | 
| 974 |  |  * This function checks if @domain is a public suffix by the means of the | 
| 975 |  |  * [Mozilla Public Suffix List](https://publicsuffix.org). | 
| 976 |  |  * | 
| 977 |  |  * For cookie domain checking see psl_is_cookie_domain_acceptable(). | 
| 978 |  |  * | 
| 979 |  |  * International @domain names have to be either in UTF-8 (lowercase + NFKC) or in ASCII/ACE format (punycode). | 
| 980 |  |  * Other encodings likely result in incorrect return values. | 
| 981 |  |  * Use helper function psl_str_to_utf8lower() for normalization @domain. | 
| 982 |  |  * | 
| 983 |  |  * @psl is a context returned by either psl_load_file(), psl_load_fp() or | 
| 984 |  |  * psl_builtin(). | 
| 985 |  |  * | 
| 986 |  |  * Returns: 1 if domain is a public suffix, 0 if not. | 
| 987 |  |  * | 
| 988 |  |  * Since: 0.1 | 
| 989 |  |  */ | 
| 990 |  | int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain) | 
| 991 | 0 | { | 
| 992 | 0 |   if (!psl || !domain) | 
| 993 | 0 |     return 1; | 
| 994 |  |  | 
| 995 | 0 |   return is_public_suffix(psl, domain, PSL_TYPE_ANY); | 
| 996 | 0 | } | 
| 997 |  |  | 
| 998 |  | /** | 
| 999 |  |  * psl_is_public_suffix2: | 
| 1000 |  |  * @psl: PSL context | 
| 1001 |  |  * @domain: Domain string | 
| 1002 |  |  * @type: Domain type | 
| 1003 |  |  * | 
| 1004 |  |  * This function checks if @domain is a public suffix by the means of the | 
| 1005 |  |  * [Mozilla Public Suffix List](https://publicsuffix.org). | 
| 1006 |  |  * | 
| 1007 |  |  * @type specifies the PSL section where to perform the lookup. Valid values are | 
| 1008 |  |  * %PSL_TYPE_PRIVATE, %PSL_TYPE_ICANN, %PSL_TYPE_NO_STAR_RULE, and %PSL_TYPE_ANY. | 
| 1009 |  |  * | 
| 1010 |  |  * %PSL_TYPE_NO_STAR_RULE switches of the 'prevailing star rule' (see | 
| 1011 |  |  * [List](https://publicsuffix.org/list) under 'Algorithm' 2.). | 
| 1012 |  |  * Applying the flag means that TLDs not explicitly listed in the PSL are *not* treated as public suffixes. | 
| 1013 |  |  * | 
| 1014 |  |  * International @domain names have to be either in UTF-8 (lowercase + NFKC) or in ASCII/ACE format (punycode). | 
| 1015 |  |  * Other encodings likely result in incorrect return values. | 
| 1016 |  |  * Use helper function psl_str_to_utf8lower() for normalization @domain. | 
| 1017 |  |  * | 
| 1018 |  |  * @psl is a context returned by either psl_load_file(), psl_load_fp() or | 
| 1019 |  |  * psl_builtin(). | 
| 1020 |  |  * | 
| 1021 |  |  * Returns: 1 if domain is a public suffix, 0 if not. | 
| 1022 |  |  * | 
| 1023 |  |  * Since: 0.1 | 
| 1024 |  |  */ | 
| 1025 |  | int psl_is_public_suffix2(const psl_ctx_t *psl, const char *domain, int type) | 
| 1026 | 0 | { | 
| 1027 | 0 |   if (!psl || !domain) | 
| 1028 | 0 |     return 1; | 
| 1029 |  |  | 
| 1030 | 0 |   return is_public_suffix(psl, domain, type); | 
| 1031 | 0 | } | 
| 1032 |  |  | 
| 1033 |  | /** | 
| 1034 |  |  * psl_unregistrable_domain: | 
| 1035 |  |  * @psl: PSL context | 
| 1036 |  |  * @domain: Domain string | 
| 1037 |  |  * | 
| 1038 |  |  * This function finds the longest public suffix part of @domain by the means | 
| 1039 |  |  * of the [Mozilla Public Suffix List](https://publicsuffix.org). | 
| 1040 |  |  * | 
| 1041 |  |  * International @domain names have to be either in UTF-8 (lowercase + NFKC) or in ASCII/ACE format (punycode). | 
| 1042 |  |  * Other encodings likely result in incorrect return values. | 
| 1043 |  |  * Use helper function psl_str_to_utf8lower() for normalization @domain. | 
| 1044 |  |  * | 
| 1045 |  |  * @psl is a context returned by either psl_load_file(), psl_load_fp() or | 
| 1046 |  |  * psl_builtin(). | 
| 1047 |  |  * | 
| 1048 |  |  * Returns: Pointer to longest public suffix part of @domain or %NULL if @domain | 
| 1049 |  |  * does not contain a public suffix (or if @psl is %NULL). | 
| 1050 |  |  * | 
| 1051 |  |  * Since: 0.1 | 
| 1052 |  |  */ | 
| 1053 |  | const char *psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain) | 
| 1054 | 0 | { | 
| 1055 | 0 |   int nlabels = 0; | 
| 1056 | 0 |   const char *p; | 
| 1057 |  | 
 | 
| 1058 | 0 |   if (!psl || !domain) | 
| 1059 | 0 |     return NULL; | 
| 1060 |  |  | 
| 1061 |  |   /* | 
| 1062 |  |    * In the main loop we introduce a O(N^2) behavior to avoid code duplication. | 
| 1063 |  |    * To avoid nasty CPU hogging, we limit the lookup to max. 8 domain labels to the right. | 
| 1064 |  |    */ | 
| 1065 | 0 |   for (p = domain + strlen(domain) - 1; p >= domain; p--) { | 
| 1066 | 0 |     if (*p == '.' && ++nlabels > 8) { | 
| 1067 | 0 |       domain = p + 1; | 
| 1068 | 0 |       break; | 
| 1069 | 0 |     } | 
| 1070 | 0 |   } | 
| 1071 |  |  | 
| 1072 |  |   /* | 
| 1073 |  |    *  We check from left to right to catch special PSL entries like 'forgot.his.name': | 
| 1074 |  |    *   'forgot.his.name' and 'name' are in the PSL while 'his.name' is not. | 
| 1075 |  |    */ | 
| 1076 |  | 
 | 
| 1077 | 0 |   while (!is_public_suffix(psl, domain, 0)) { | 
| 1078 | 0 |     if ((domain = strchr(domain, '.'))) | 
| 1079 | 0 |       domain++; | 
| 1080 | 0 |     else | 
| 1081 | 0 |       break; /* prevent endless loop if is_public_suffix() is broken. */ | 
| 1082 | 0 |   } | 
| 1083 |  | 
 | 
| 1084 | 0 |   return domain; | 
| 1085 | 0 | } | 
| 1086 |  |  | 
| 1087 |  | /** | 
| 1088 |  |  * psl_registrable_domain: | 
| 1089 |  |  * @psl: PSL context | 
| 1090 |  |  * @domain: Domain string | 
| 1091 |  |  * | 
| 1092 |  |  * This function finds the shortest private suffix part of @domain by the means | 
| 1093 |  |  * of the [Mozilla Public Suffix List](https://publicsuffix.org). | 
| 1094 |  |  * | 
| 1095 |  |  * International @domain names have to be either in UTF-8 (lowercase + NFKC) or in ASCII/ACE format (punycode). | 
| 1096 |  |  * Other encodings likely result in incorrect return values. | 
| 1097 |  |  * Use helper function psl_str_to_utf8lower() for normalization @domain. | 
| 1098 |  |  * | 
| 1099 |  |  * @psl is a context returned by either psl_load_file(), psl_load_fp() or | 
| 1100 |  |  * psl_builtin(). | 
| 1101 |  |  * | 
| 1102 |  |  * Returns: Pointer to shortest private suffix part of @domain or %NULL if @domain | 
| 1103 |  |  * does not contain a private suffix (or if @psl is %NULL). | 
| 1104 |  |  * | 
| 1105 |  |  * Since: 0.1 | 
| 1106 |  |  */ | 
| 1107 |  | const char *psl_registrable_domain(const psl_ctx_t *psl, const char *domain) | 
| 1108 | 0 | { | 
| 1109 | 0 |   const char *p, *regdom = NULL; | 
| 1110 | 0 |   int nlabels = 0; | 
| 1111 |  | 
 | 
| 1112 | 0 |   if (!psl || !domain || *domain == '.') | 
| 1113 | 0 |     return NULL; | 
| 1114 |  |  | 
| 1115 |  |   /* | 
| 1116 |  |    * In the main loop we introduce a O(N^2) behavior to avoid code duplication. | 
| 1117 |  |    * To avoid nasty CPU hogging, we limit the lookup to max. 8 domain labels to the right. | 
| 1118 |  |    */ | 
| 1119 | 0 |   for (p = domain + strlen(domain) - 1; p >= domain; p--) { | 
| 1120 | 0 |     if (*p == '.' && ++nlabels > 8) { | 
| 1121 | 0 |       domain = p + 1; | 
| 1122 | 0 |       break; | 
| 1123 | 0 |     } | 
| 1124 | 0 |   } | 
| 1125 |  |  | 
| 1126 |  |   /* | 
| 1127 |  |    *  We check from left to right to catch special PSL entries like 'forgot.his.name': | 
| 1128 |  |    *   'forgot.his.name' and 'name' are in the PSL while 'his.name' is not. | 
| 1129 |  |    */ | 
| 1130 |  | 
 | 
| 1131 | 0 |   while (!is_public_suffix(psl, domain, 0)) { | 
| 1132 | 0 |     if ((p = strchr(domain, '.'))) { | 
| 1133 | 0 |       regdom = domain; | 
| 1134 | 0 |       domain = p + 1; | 
| 1135 | 0 |     } else | 
| 1136 | 0 |       break; /* prevent endless loop if is_public_suffix() is broken. */ | 
| 1137 | 0 |   } | 
| 1138 |  | 
 | 
| 1139 | 0 |   return regdom; | 
| 1140 | 0 | } | 
| 1141 |  |  | 
| 1142 |  | /** | 
| 1143 |  |  * psl_load_file: | 
| 1144 |  |  * @fname: Name of PSL file | 
| 1145 |  |  * | 
| 1146 |  |  * This function loads the public suffixes file named @fname. | 
| 1147 |  |  * To free the allocated resources, call psl_free(). | 
| 1148 |  |  * | 
| 1149 |  |  * The suffixes are expected to be UTF-8 encoded (lowercase + NFKC) if they are international. | 
| 1150 |  |  * | 
| 1151 |  |  * Returns: Pointer to a PSL context or %NULL on failure. | 
| 1152 |  |  * | 
| 1153 |  |  * Since: 0.1 | 
| 1154 |  |  */ | 
| 1155 |  | psl_ctx_t *psl_load_file(const char *fname) | 
| 1156 | 0 | { | 
| 1157 | 0 |   FILE *fp; | 
| 1158 | 0 |   psl_ctx_t *psl = NULL; | 
| 1159 |  | 
 | 
| 1160 | 0 |   if (!fname) | 
| 1161 | 0 |     return NULL; | 
| 1162 |  |  | 
| 1163 | 0 |   if ((fp = fopen(fname, "rb"))) { | 
| 1164 | 0 |     psl = psl_load_fp(fp); | 
| 1165 | 0 |     fclose(fp); | 
| 1166 | 0 |   } | 
| 1167 |  | 
 | 
| 1168 | 0 |   return psl; | 
| 1169 | 0 | } | 
| 1170 |  |  | 
| 1171 |  | /** | 
| 1172 |  |  * psl_load_fp: | 
| 1173 |  |  * @fp: %FILE pointer | 
| 1174 |  |  * | 
| 1175 |  |  * This function loads the public suffixes from a %FILE pointer. | 
| 1176 |  |  * To free the allocated resources, call psl_free(). | 
| 1177 |  |  * | 
| 1178 |  |  * The suffixes are expected to be UTF-8 encoded (lowercase + NFKC) if they are international. | 
| 1179 |  |  * | 
| 1180 |  |  * Returns: Pointer to a PSL context or %NULL on failure. | 
| 1181 |  |  * | 
| 1182 |  |  * Since: 0.1 | 
| 1183 |  |  */ | 
| 1184 |  | psl_ctx_t *psl_load_fp(FILE *fp) | 
| 1185 | 0 | { | 
| 1186 | 0 |   psl_ctx_t *psl; | 
| 1187 | 0 |   psl_entry_t suffix, *suffixp; | 
| 1188 | 0 |   char buf[256], *linep, *p; | 
| 1189 | 0 |   int type = 0, is_dafsa; | 
| 1190 | 0 |   psl_idna_t *idna; | 
| 1191 |  | 
 | 
| 1192 | 0 |   if (!fp) | 
| 1193 | 0 |     return NULL; | 
| 1194 |  |  | 
| 1195 | 0 |   if (!(psl = calloc(1, sizeof(psl_ctx_t)))) | 
| 1196 | 0 |     return NULL; | 
| 1197 |  |  | 
| 1198 |  |   /* read first line to allow ASCII / DAFSA detection */ | 
| 1199 | 0 |   if (!(linep = fgets(buf, sizeof(buf) - 1, fp))) | 
| 1200 | 0 |     goto fail; | 
| 1201 |  |  | 
| 1202 | 0 |   is_dafsa = strlen(buf) == 16 && !strncmp(buf, ".DAFSA@PSL_", 11); | 
| 1203 |  | 
 | 
| 1204 | 0 |   if (is_dafsa) { | 
| 1205 | 0 |     void *m; | 
| 1206 | 0 |     size_t size = 65536, n, len = 0; | 
| 1207 | 0 |     int version = atoi(buf + 11); | 
| 1208 |  | 
 | 
| 1209 | 0 |     if (version != 0) | 
| 1210 | 0 |       goto fail; | 
| 1211 |  |  | 
| 1212 | 0 |     if (!(psl->dafsa = malloc(size))) | 
| 1213 | 0 |       goto fail; | 
| 1214 |  |  | 
| 1215 | 0 |     memcpy(psl->dafsa, buf, len); | 
| 1216 |  | 
 | 
| 1217 | 0 |     while ((n = fread(psl->dafsa + len, 1, size - len, fp)) > 0) { | 
| 1218 | 0 |       len += n; | 
| 1219 | 0 |       if (len >= size) { | 
| 1220 | 0 |         if (!(m = realloc(psl->dafsa, size *= 2))) | 
| 1221 | 0 |           goto fail; | 
| 1222 | 0 |         psl->dafsa = m; | 
| 1223 | 0 |       } | 
| 1224 | 0 |     } | 
| 1225 |  |  | 
| 1226 |  |     /* release unused memory */ | 
| 1227 | 0 |     if ((m = realloc(psl->dafsa, len))) | 
| 1228 | 0 |       psl->dafsa = m; | 
| 1229 | 0 |     else if (!len) | 
| 1230 | 0 |       psl->dafsa = NULL; /* realloc() just free'd psl->dafsa */ | 
| 1231 |  | 
 | 
| 1232 | 0 |     psl->dafsa_size = len; | 
| 1233 | 0 |     psl->utf8 = !!GetUtfMode(psl->dafsa, len); | 
| 1234 |  | 
 | 
| 1235 | 0 |     return psl; | 
| 1236 | 0 |   } | 
| 1237 |  |  | 
| 1238 | 0 |   idna = psl_idna_open(); | 
| 1239 |  |  | 
| 1240 |  |   /* | 
| 1241 |  |    *  as of 02.11.2012, the list at https://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions. | 
| 1242 |  |    *  as of 19.02.2014, the list at https://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions. | 
| 1243 |  |    *  as of 07.10.2018, the list at https://publicsuffix.org/list/ contains ~8600 rules and 8 exceptions. | 
| 1244 |  |    */ | 
| 1245 | 0 |   psl->suffixes = vector_alloc(8*1024, suffix_compare_array); | 
| 1246 | 0 |   psl->utf8 = 1; /* we put UTF-8 and punycode rules in the lookup vector */ | 
| 1247 |  | 
 | 
| 1248 | 0 |   do { | 
| 1249 | 0 |     while (isspace_ascii(*linep)) linep++; /* ignore leading whitespace */ | 
| 1250 | 0 |     if (!*linep) continue; /* skip empty lines */ | 
| 1251 |  |  | 
| 1252 | 0 |     if (*linep == '/' && linep[1] == '/') { | 
| 1253 | 0 |       if (!type) { | 
| 1254 | 0 |         if (strstr(linep + 2, "===BEGIN ICANN DOMAINS===")) | 
| 1255 | 0 |           type = PRIV_PSL_FLAG_ICANN; | 
| 1256 | 0 |         else if (!type && strstr(linep + 2, "===BEGIN PRIVATE DOMAINS===")) | 
| 1257 | 0 |           type = PRIV_PSL_FLAG_PRIVATE; | 
| 1258 | 0 |       } | 
| 1259 | 0 |       else if (type == PRIV_PSL_FLAG_ICANN && strstr(linep + 2, "===END ICANN DOMAINS===")) | 
| 1260 | 0 |         type = 0; | 
| 1261 | 0 |       else if (type == PRIV_PSL_FLAG_PRIVATE && strstr(linep + 2, "===END PRIVATE DOMAINS===")) | 
| 1262 | 0 |         type = 0; | 
| 1263 |  | 
 | 
| 1264 | 0 |       continue; /* skip comments */ | 
| 1265 | 0 |     } | 
| 1266 |  |  | 
| 1267 |  |     /* parse suffix rule */ | 
| 1268 | 0 |     for (p = linep; *linep && !isspace_ascii(*linep);) linep++; | 
| 1269 | 0 |     *linep = 0; | 
| 1270 |  | 
 | 
| 1271 | 0 |     if (*p == '!') { | 
| 1272 | 0 |       p++; | 
| 1273 | 0 |       suffix.flags = PRIV_PSL_FLAG_EXCEPTION | type; | 
| 1274 | 0 |       psl->nexceptions++; | 
| 1275 | 0 |     } else if (*p == '*') { | 
| 1276 | 0 |       if (*++p != '.') { | 
| 1277 |  |         /* fprintf(stderr, "Unsupported kind of rule (ignored): %s\n", p - 1); */ | 
| 1278 | 0 |         continue; | 
| 1279 | 0 |       } | 
| 1280 | 0 |       p++; | 
| 1281 |  |       /* wildcard *.foo.bar implicitly make foo.bar a public suffix */ | 
| 1282 | 0 |       suffix.flags = PRIV_PSL_FLAG_WILDCARD | PRIV_PSL_FLAG_PLAIN | type; | 
| 1283 | 0 |       psl->nwildcards++; | 
| 1284 | 0 |       psl->nsuffixes++; | 
| 1285 | 0 |     } else { | 
| 1286 | 0 |       suffix.flags = PRIV_PSL_FLAG_PLAIN | type; | 
| 1287 | 0 |       psl->nsuffixes++; | 
| 1288 | 0 |     } | 
| 1289 |  |  | 
| 1290 | 0 |     if (suffix_init(&suffix, p, linep - p) == 0) { | 
| 1291 | 0 |       int index; | 
| 1292 |  | 
 | 
| 1293 | 0 |       if ((index = vector_find(psl->suffixes, &suffix)) >= 0) { | 
| 1294 |  |         /* Found existing entry: | 
| 1295 |  |          * Combination of exception and plain rule is ambiguous | 
| 1296 |  |          * !foo.bar | 
| 1297 |  |          * foo.bar | 
| 1298 |  |          * | 
| 1299 |  |          * Allowed: | 
| 1300 |  |          * !foo.bar + *.foo.bar | 
| 1301 |  |          * foo.bar + *.foo.bar | 
| 1302 |  |          * | 
| 1303 |  |          * We do not check here, let's do it later. | 
| 1304 |  |          */ | 
| 1305 |  | 
 | 
| 1306 | 0 |         suffixp = vector_get(psl->suffixes, index); | 
| 1307 | 0 |         suffixp->flags |= suffix.flags; | 
| 1308 | 0 |       } else { | 
| 1309 |  |         /* New entry */ | 
| 1310 | 0 |         suffixp = vector_get(psl->suffixes, vector_add(psl->suffixes, &suffix)); | 
| 1311 | 0 |       } | 
| 1312 |  | 
 | 
| 1313 | 0 |       if (suffixp) { | 
| 1314 | 0 |         suffixp->label = suffixp->label_buf; /* set label to changed address */ | 
| 1315 | 0 |         add_punycode_if_needed(idna, psl->suffixes, suffixp); | 
| 1316 | 0 |       } | 
| 1317 | 0 |     } | 
| 1318 | 0 |   } while ((linep = fgets(buf, sizeof(buf), fp))); | 
| 1319 |  |  | 
| 1320 | 0 |   vector_sort(psl->suffixes); | 
| 1321 |  | 
 | 
| 1322 | 0 |   psl_idna_close(idna); | 
| 1323 |  | 
 | 
| 1324 | 0 |   return psl; | 
| 1325 |  |  | 
| 1326 | 0 | fail: | 
| 1327 | 0 |   psl_free(psl); | 
| 1328 | 0 |   return NULL; | 
| 1329 | 0 | } | 
| 1330 |  |  | 
| 1331 |  | /** | 
| 1332 |  |  * psl_free: | 
| 1333 |  |  * @psl: PSL context pointer | 
| 1334 |  |  * | 
| 1335 |  |  * This function frees the the PSL context that has been retrieved via | 
| 1336 |  |  * psl_load_fp() or psl_load_file(). | 
| 1337 |  |  * | 
| 1338 |  |  * Since: 0.1 | 
| 1339 |  |  */ | 
| 1340 |  | void psl_free(psl_ctx_t *psl) | 
| 1341 | 0 | { | 
| 1342 | 0 |   if (psl && psl != &builtin_psl) { | 
| 1343 | 0 |     vector_free(&psl->suffixes); | 
| 1344 | 0 |     free(psl->dafsa); | 
| 1345 | 0 |     free(psl); | 
| 1346 | 0 |   } | 
| 1347 | 0 | } | 
| 1348 |  |  | 
| 1349 |  | /** | 
| 1350 |  |  * psl_builtin: | 
| 1351 |  |  * | 
| 1352 |  |  * This function returns the PSL context that has been generated and built in at compile-time. | 
| 1353 |  |  * You don't have to free the returned context explicitly. | 
| 1354 |  |  * | 
| 1355 |  |  * The builtin data also contains punycode entries, one for each international domain name. | 
| 1356 |  |  * | 
| 1357 |  |  * If the generation of built-in data has been disabled during compilation, %NULL will be returned. | 
| 1358 |  |  * When using the builtin psl context, you can provide UTF-8 (lowercase + NFKC) or ASCII/ACE (punycode) | 
| 1359 |  |  * representations of domains to functions like psl_is_public_suffix(). | 
| 1360 |  |  * | 
| 1361 |  |  * Returns: Pointer to the built in PSL data or %NULL if this data is not available. | 
| 1362 |  |  * | 
| 1363 |  |  * Since: 0.1 | 
| 1364 |  |  */ | 
| 1365 |  | const psl_ctx_t *psl_builtin(void) | 
| 1366 | 0 | { | 
| 1367 | 0 | #ifdef ENABLE_BUILTIN | 
| 1368 | 0 |   return &builtin_psl; | 
| 1369 |  | #else | 
| 1370 |  |   return NULL; | 
| 1371 |  | #endif | 
| 1372 | 0 | } | 
| 1373 |  |  | 
| 1374 |  | /** | 
| 1375 |  |  * psl_suffix_count: | 
| 1376 |  |  * @psl: PSL context pointer | 
| 1377 |  |  * | 
| 1378 |  |  * This function returns number of public suffixes maintained by @psl. | 
| 1379 |  |  * The number of exceptions within the Public Suffix List are not included. | 
| 1380 |  |  * | 
| 1381 |  |  * If the information is not available, the return value is -1 (since 0.19). | 
| 1382 |  |  * This is the case with DAFSA blobs or if @psl is %NULL. | 
| 1383 |  |  * | 
| 1384 |  |  * Returns: Number of public suffixes entries in PSL context or -1 if this information is not available. | 
| 1385 |  |  * | 
| 1386 |  |  * Since: 0.1 | 
| 1387 |  |  */ | 
| 1388 |  | int psl_suffix_count(const psl_ctx_t *psl) | 
| 1389 | 0 | { | 
| 1390 | 0 |   if (psl == &builtin_psl) | 
| 1391 | 0 |     return _psl_nsuffixes; | 
| 1392 | 0 |   else if (psl) | 
| 1393 | 0 |     return psl->dafsa ? -1 : psl->nsuffixes; | 
| 1394 | 0 |   else | 
| 1395 | 0 |     return -1; | 
| 1396 | 0 | } | 
| 1397 |  |  | 
| 1398 |  | /** | 
| 1399 |  |  * psl_suffix_exception_count: | 
| 1400 |  |  * @psl: PSL context pointer | 
| 1401 |  |  * | 
| 1402 |  |  * This function returns number of public suffix exceptions maintained by @psl. | 
| 1403 |  |  * | 
| 1404 |  |  * If the information is not available, the return value is -1 (since 0.19). | 
| 1405 |  |  * This is the case with DAFSA blobs or if @psl is %NULL. | 
| 1406 |  |  * | 
| 1407 |  |  * Returns: Number of public suffix exceptions in PSL context or -1 if this information is not available. | 
| 1408 |  |  * | 
| 1409 |  |  * Since: 0.1 | 
| 1410 |  |  */ | 
| 1411 |  | int psl_suffix_exception_count(const psl_ctx_t *psl) | 
| 1412 | 0 | { | 
| 1413 | 0 |   if (psl == &builtin_psl) | 
| 1414 | 0 |     return _psl_nexceptions; | 
| 1415 | 0 |   else if (psl) | 
| 1416 | 0 |     return psl->dafsa ? -1 : psl->nexceptions; | 
| 1417 | 0 |   else | 
| 1418 | 0 |     return -1; | 
| 1419 | 0 | } | 
| 1420 |  |  | 
| 1421 |  | /** | 
| 1422 |  |  * psl_suffix_wildcard_count: | 
| 1423 |  |  * @psl: PSL context pointer | 
| 1424 |  |  * | 
| 1425 |  |  * This function returns number of public suffix wildcards maintained by @psl. | 
| 1426 |  |  * | 
| 1427 |  |  * If the information is not available, the return value is -1 (since 0.19). | 
| 1428 |  |  * This is the case with DAFSA blobs or if @psl is %NULL. | 
| 1429 |  |  * | 
| 1430 |  |  * Returns: Number of public suffix wildcards in PSL context or -1 if this information is not available. | 
| 1431 |  |  * | 
| 1432 |  |  * Since: 0.10.0 | 
| 1433 |  |  */ | 
| 1434 |  | int psl_suffix_wildcard_count(const psl_ctx_t *psl) | 
| 1435 | 0 | { | 
| 1436 | 0 |   if (psl == &builtin_psl) | 
| 1437 | 0 |     return _psl_nwildcards; | 
| 1438 | 0 |   else if (psl) | 
| 1439 | 0 |     return psl->dafsa ? -1 : psl->nwildcards; | 
| 1440 | 0 |   else | 
| 1441 | 0 |     return -1; | 
| 1442 | 0 | } | 
| 1443 |  |  | 
| 1444 |  | /** | 
| 1445 |  |  * psl_builtin_file_time: | 
| 1446 |  |  * | 
| 1447 |  |  * This function returns the mtime of the Public Suffix List file that has been built in. | 
| 1448 |  |  * | 
| 1449 |  |  * If the generation of built-in data has been disabled during compilation, 0 will be returned. | 
| 1450 |  |  * | 
| 1451 |  |  * Returns: time_t value or 0. | 
| 1452 |  |  * | 
| 1453 |  |  * Since: 0.1 | 
| 1454 |  |  */ | 
| 1455 |  | time_t psl_builtin_file_time(void) | 
| 1456 | 0 | { | 
| 1457 | 0 |   return _psl_file_time; | 
| 1458 | 0 | } | 
| 1459 |  |  | 
| 1460 |  | /** | 
| 1461 |  |  * psl_builtin_sha1sum: | 
| 1462 |  |  * | 
| 1463 |  |  * This function returns the SHA1 checksum of the Public Suffix List file that has been built in. | 
| 1464 |  |  * The returned string is in lowercase hex encoding, e.g. "2af1e9e3044eda0678bb05949d7cca2f769901d8". | 
| 1465 |  |  * | 
| 1466 |  |  * If the generation of built-in data has been disabled during compilation, an empty string will be returned. | 
| 1467 |  |  * | 
| 1468 |  |  * Returns: String containing SHA1 checksum or an empty string. | 
| 1469 |  |  * | 
| 1470 |  |  * Since: 0.1 | 
| 1471 |  |  */ | 
| 1472 |  | const char *psl_builtin_sha1sum(void) | 
| 1473 | 0 | { | 
| 1474 | 0 |   return _psl_sha1_checksum; | 
| 1475 | 0 | } | 
| 1476 |  |  | 
| 1477 |  | /** | 
| 1478 |  |  * psl_builtin_filename: | 
| 1479 |  |  * | 
| 1480 |  |  * This function returns the file name of the Public Suffix List file that has been built in. | 
| 1481 |  |  * | 
| 1482 |  |  * If the generation of built-in data has been disabled during compilation, an empty string will be returned. | 
| 1483 |  |  * | 
| 1484 |  |  * Returns: String containing the PSL file name or an empty string. | 
| 1485 |  |  * | 
| 1486 |  |  * Since: 0.1 | 
| 1487 |  |  */ | 
| 1488 |  | const char *psl_builtin_filename(void) | 
| 1489 | 0 | { | 
| 1490 | 0 |   return _psl_filename; | 
| 1491 | 0 | } | 
| 1492 |  |  | 
| 1493 |  | /** | 
| 1494 |  |  * psl_builtin_outdated: | 
| 1495 |  |  * | 
| 1496 |  |  * This function checks if the built-in data is older than the file it has been created from. | 
| 1497 |  |  * If it is, it might be a good idea for the application to reload the PSL. | 
| 1498 |  |  * The mtime is taken as reference. | 
| 1499 |  |  * | 
| 1500 |  |  * If the PSL file does not exist, it is assumed that the built-in data is not outdated. | 
| 1501 |  |  * | 
| 1502 |  |  * Returns: 1 if the built-in is outdated, 0 otherwise. | 
| 1503 |  |  * | 
| 1504 |  |  * Since: 0.10.0 | 
| 1505 |  |  */ | 
| 1506 |  | int psl_builtin_outdated(void) | 
| 1507 | 0 | { | 
| 1508 | 0 |   struct stat st; | 
| 1509 |  | 
 | 
| 1510 | 0 |   if (stat(_psl_filename, &st) == 0 && st.st_mtime > _psl_file_time) | 
| 1511 | 0 |     return 1; | 
| 1512 |  |  | 
| 1513 | 0 |   return 0; | 
| 1514 | 0 | } | 
| 1515 |  |  | 
| 1516 |  | /** | 
| 1517 |  |  * psl_dist_filename: | 
| 1518 |  |  * | 
| 1519 |  |  * This function returns the file name of the distribution/system PSL data file. | 
| 1520 |  |  * This file will be considered by psl_latest(). | 
| 1521 |  |  * | 
| 1522 |  |  * Return the filename that is set by ./configure --with-psl-distfile, or an empty string. | 
| 1523 |  |  * | 
| 1524 |  |  * Returns: String containing a PSL file name or an empty string. | 
| 1525 |  |  * | 
| 1526 |  |  * Since: 0.16 | 
| 1527 |  |  */ | 
| 1528 |  | const char *psl_dist_filename(void) | 
| 1529 | 0 | { | 
| 1530 | 0 |   return _psl_dist_filename; | 
| 1531 | 0 | } | 
| 1532 |  |  | 
| 1533 |  | /** | 
| 1534 |  |  * psl_get_version: | 
| 1535 |  |  * | 
| 1536 |  |  * Get libpsl version. | 
| 1537 |  |  * | 
| 1538 |  |  * Returns: String containing version of libpsl. | 
| 1539 |  |  * | 
| 1540 |  |  * Since: 0.2.5 | 
| 1541 |  |  **/ | 
| 1542 |  | const char *psl_get_version(void) | 
| 1543 | 0 | { | 
| 1544 |  | #ifdef WITH_LIBICU | 
| 1545 |  |   return PACKAGE_VERSION " (+libicu/" U_ICU_VERSION ")"; | 
| 1546 |  | #elif defined(WITH_LIBIDN2) | 
| 1547 | 0 |   return PACKAGE_VERSION " (+libidn2/" IDN2_VERSION ")"; | 
| 1548 |  | #elif defined(WITH_LIBIDN) | 
| 1549 |  |   return PACKAGE_VERSION " (+libidn/" STRINGPREP_VERSION ")"; | 
| 1550 |  | #else | 
| 1551 |  |   return PACKAGE_VERSION " (no IDNA support)"; | 
| 1552 |  | #endif | 
| 1553 | 0 | } | 
| 1554 |  |  | 
| 1555 |  | /** | 
| 1556 |  |  * psl_check_version_number: | 
| 1557 |  |  * @version: Version number (hex) to check against. | 
| 1558 |  |  * | 
| 1559 |  |  * Check the given version number is at minimum the current library version number. | 
| 1560 |  |  * The version number must be a hexadecimal number like 0x000a01 (V0.10.1). | 
| 1561 |  |  * | 
| 1562 |  |  * Returns: Returns the library version number if the given version number is at least | 
| 1563 |  |  * the version of the library, else return 0; If the argument is 0, the function returns | 
| 1564 |  |  * the library version number without performing a check. | 
| 1565 |  |  * | 
| 1566 |  |  * Since: 0.11.0 | 
| 1567 |  |  **/ | 
| 1568 |  | int psl_check_version_number(int version) | 
| 1569 | 0 | { | 
| 1570 | 0 |   if (version) { | 
| 1571 | 0 |     int major = version >> 16; | 
| 1572 | 0 |     int minor = (version >> 8) & 0xFF; | 
| 1573 | 0 |     int patch = version & 0xFF; | 
| 1574 |  | 
 | 
| 1575 | 0 |     if (major < PSL_VERSION_MAJOR | 
| 1576 | 0 |       || (major == PSL_VERSION_MAJOR && minor < PSL_VERSION_MINOR) | 
| 1577 | 0 |       || (major == PSL_VERSION_MAJOR && minor == PSL_VERSION_MINOR && patch < PSL_VERSION_PATCH)) | 
| 1578 | 0 |     { | 
| 1579 | 0 |       return 0; | 
| 1580 | 0 |     } | 
| 1581 | 0 |   } | 
| 1582 |  |  | 
| 1583 | 0 |   return PSL_VERSION_NUMBER; | 
| 1584 | 0 | } | 
| 1585 |  |  | 
| 1586 |  | /* return whether hostname is an IP address or not */ | 
| 1587 |  | static int isip(const char *hostname) | 
| 1588 | 0 | { | 
| 1589 |  | #ifdef _WIN32 | 
| 1590 |  |   WCHAR wName[INET6_ADDRSTRLEN+1]; | 
| 1591 |  |  | 
| 1592 |  |   struct sockaddr_in  addr  = {0}; | 
| 1593 |  |   struct sockaddr_in6 addr6 = {0}; | 
| 1594 |  |  | 
| 1595 |  |   INT size  = sizeof(addr); | 
| 1596 |  |   INT size6 = sizeof(addr6); | 
| 1597 |  |  | 
| 1598 |  |   if (!MultiByteToWideChar(CP_UTF8, 0, hostname, -1, wName, countof(wName))) | 
| 1599 |  |     return 0; | 
| 1600 |  |  | 
| 1601 |  |   return (WSAStringToAddressW(wName, AF_INET,  NULL, (struct sockaddr *)&addr,  &size) != SOCKET_ERROR) | | 
| 1602 |  |          (WSAStringToAddressW(wName, AF_INET6, NULL, (struct sockaddr *)&addr6, &size6) != SOCKET_ERROR); | 
| 1603 |  | #else | 
| 1604 | 0 |   struct in_addr addr; | 
| 1605 | 0 |   struct in6_addr addr6; | 
| 1606 |  | 
 | 
| 1607 | 0 |   return inet_pton(AF_INET, hostname, &addr) || inet_pton(AF_INET6, hostname, &addr6); | 
| 1608 | 0 | #endif | 
| 1609 | 0 | } | 
| 1610 |  |  | 
| 1611 |  | /** | 
| 1612 |  |  * psl_is_cookie_domain_acceptable: | 
| 1613 |  |  * @psl: PSL context pointer | 
| 1614 |  |  * @hostname: The request hostname. | 
| 1615 |  |  * @cookie_domain: The domain value from a cookie | 
| 1616 |  |  * | 
| 1617 |  |  * This helper function checks whether @cookie_domain is an acceptable cookie domain value for the request | 
| 1618 |  |  * @hostname. | 
| 1619 |  |  * | 
| 1620 |  |  * For international domain names both, @hostname and @cookie_domain, have to be either in UTF-8 (lowercase + NFKC) | 
| 1621 |  |  * or in ASCII/ACE (punycode) format. Other encodings or mixing UTF-8 and punycode likely result in incorrect return values. | 
| 1622 |  |  * | 
| 1623 |  |  * Use helper function psl_str_to_utf8lower() for normalization of @hostname and @cookie_domain. | 
| 1624 |  |  * | 
| 1625 |  |  * Hint for Windows users: | 
| 1626 |  |  * Please make sure the calling application has called WSAStartup() before calling psl_is_cookie_domain_acceptable(). | 
| 1627 |  |  * | 
| 1628 |  |  * Examples: | 
| 1629 |  |  * 1. Cookie domain 'example.com' would be acceptable for hostname 'www.example.com', | 
| 1630 |  |  * but '.com' or 'com' would NOT be acceptable since 'com' is a public suffix. | 
| 1631 |  |  * | 
| 1632 |  |  * 2. Cookie domain 'his.name' would be acceptable for hostname 'remember.his.name', | 
| 1633 |  |  *  but NOT for 'forgot.his.name' since 'forgot.his.name' is a public suffix. | 
| 1634 |  |  * | 
| 1635 |  |  * Returns: 1 if acceptable, 0 if not acceptable. | 
| 1636 |  |  * | 
| 1637 |  |  * Since: 0.1 | 
| 1638 |  |  */ | 
| 1639 |  | int psl_is_cookie_domain_acceptable(const psl_ctx_t *psl, const char *hostname, const char *cookie_domain) | 
| 1640 | 0 | { | 
| 1641 | 0 |   const char *p; | 
| 1642 | 0 |   size_t hostname_length, cookie_domain_length; | 
| 1643 |  | 
 | 
| 1644 | 0 |   if (!psl || !hostname || !cookie_domain) | 
| 1645 | 0 |     return 0; | 
| 1646 |  |  | 
| 1647 | 0 |   while (*cookie_domain == '.') | 
| 1648 | 0 |     cookie_domain++; | 
| 1649 |  | 
 | 
| 1650 | 0 |   if (!strcmp(hostname, cookie_domain)) | 
| 1651 | 0 |     return 1; /* an exact match is acceptable (and pretty common) */ | 
| 1652 |  |  | 
| 1653 | 0 |   if (isip(hostname)) | 
| 1654 | 0 |     return 0; /* Hostname is an IP address and these must match fully (RFC 6265, 5.1.3) */ | 
| 1655 |  |  | 
| 1656 | 0 |   cookie_domain_length = strlen(cookie_domain); | 
| 1657 | 0 |   hostname_length = strlen(hostname); | 
| 1658 |  | 
 | 
| 1659 | 0 |   if (cookie_domain_length >= hostname_length) | 
| 1660 | 0 |     return 0; /* cookie_domain is too long */ | 
| 1661 |  |  | 
| 1662 | 0 |   p = hostname + hostname_length - cookie_domain_length; | 
| 1663 | 0 |   if (!strcmp(p, cookie_domain) && p[-1] == '.') { | 
| 1664 |  |     /* OK, cookie_domain matches, but it must be longer than the longest public suffix in 'hostname' */ | 
| 1665 |  | 
 | 
| 1666 | 0 |     if (!(p = psl_unregistrable_domain(psl, hostname))) | 
| 1667 | 0 |       return 1; | 
| 1668 |  |  | 
| 1669 | 0 |     if (cookie_domain_length > strlen(p)) | 
| 1670 | 0 |       return 1; | 
| 1671 | 0 |   } | 
| 1672 |  |  | 
| 1673 | 0 |   return 0; | 
| 1674 | 0 | } | 
| 1675 |  |  | 
| 1676 |  | /** | 
| 1677 |  |  * psl_free_string: | 
| 1678 |  |  * @str: pointer to lowercase string returned by psl_str_to_utf8lower() | 
| 1679 |  |  * | 
| 1680 |  |  * This function free()'s the memory allocated by psl_str_to_utf8lower() when | 
| 1681 |  |  * returning a lowercase string | 
| 1682 |  |  * | 
| 1683 |  |  * Since: 0.19 | 
| 1684 |  |  */ | 
| 1685 |  | void psl_free_string(char *str) | 
| 1686 | 0 | { | 
| 1687 | 0 |   if (str) | 
| 1688 | 0 |     free(str); | 
| 1689 | 0 | } | 
| 1690 |  |  | 
| 1691 |  | /** | 
| 1692 |  |  * psl_str_to_utf8lower: | 
| 1693 |  |  * @str: string to convert | 
| 1694 |  |  * @encoding: charset encoding of @str, e.g. 'iso-8859-1' or %NULL | 
| 1695 |  |  * @locale: locale of @str for to lowercase conversion, e.g. 'de' or %NULL | 
| 1696 |  |  * @lower: return value containing the converted string | 
| 1697 |  |  * | 
| 1698 |  |  * This helper function converts a string to UTF-8 lowercase + NFKC representation. | 
| 1699 |  |  * Lowercase + NFKC UTF-8 is needed as input to the domain checking functions. | 
| 1700 |  |  * | 
| 1701 |  |  * @lower stays unchanged on error. | 
| 1702 |  |  * | 
| 1703 |  |  * When returning PSL_SUCCESS, the return value 'lower' must be freed after usage. | 
| 1704 |  |  * | 
| 1705 |  |  * Returns: psl_error_t value. | 
| 1706 |  |  *   PSL_SUCCESS: Success | 
| 1707 |  |  *   PSL_ERR_INVALID_ARG: @str is a %NULL value. | 
| 1708 |  |  *   PSL_ERR_CONVERTER: Failed to open the unicode converter with name @encoding | 
| 1709 |  |  *   PSL_ERR_TO_UTF16: Failed to convert @str to unicode | 
| 1710 |  |  *   PSL_ERR_TO_LOWER: Failed to convert unicode to lowercase | 
| 1711 |  |  *   PSL_ERR_TO_UTF8: Failed to convert unicode to UTF-8 | 
| 1712 |  |  *   PSL_ERR_NO_MEM: Failed to allocate memory | 
| 1713 |  |  * | 
| 1714 |  |  * Since: 0.4 | 
| 1715 |  |  */ | 
| 1716 |  | psl_error_t psl_str_to_utf8lower(const char *str, const char *encoding PSL_UNUSED, const char *locale PSL_UNUSED, char **lower) | 
| 1717 | 0 | { | 
| 1718 | 0 |   int ret = PSL_ERR_INVALID_ARG; | 
| 1719 |  | 
 | 
| 1720 | 0 |   if (!str) | 
| 1721 | 0 |     return PSL_ERR_INVALID_ARG; | 
| 1722 |  |  | 
| 1723 |  |   /* shortcut to avoid costly conversion */ | 
| 1724 | 0 |   if (str_is_ascii(str)) { | 
| 1725 | 0 |     if (lower) { | 
| 1726 | 0 |       char *p, *tmp; | 
| 1727 |  | 
 | 
| 1728 | 0 |       if (!(tmp = strdup(str))) | 
| 1729 | 0 |         return PSL_ERR_NO_MEM; | 
| 1730 |  |  | 
| 1731 | 0 |       *lower = tmp; | 
| 1732 |  |  | 
| 1733 |  |       /* convert ASCII string to lowercase */ | 
| 1734 | 0 |       for (p = *lower; *p; p++) | 
| 1735 | 0 |         if (isupper(*p)) | 
| 1736 | 0 |           *p = tolower(*p); | 
| 1737 | 0 |     } | 
| 1738 | 0 |     return PSL_SUCCESS; | 
| 1739 | 0 |   } | 
| 1740 |  |  | 
| 1741 |  | #ifdef WITH_LIBICU | 
| 1742 |  |   do { | 
| 1743 |  |   size_t str_length = strlen(str); | 
| 1744 |  |   UErrorCode status = 0; | 
| 1745 |  |   UChar *utf16_dst, *utf16_lower; | 
| 1746 |  |   int32_t utf16_dst_length; | 
| 1747 |  |   char *utf8_lower; | 
| 1748 |  |   UConverter *uconv; | 
| 1749 |  |  | 
| 1750 |  |   if (str_length < 256) { | 
| 1751 |  |     /* C89 allocation */ | 
| 1752 |  |     utf16_dst   = alloca(sizeof(UChar) * (str_length * 2 + 1)); | 
| 1753 |  |     utf16_lower = alloca(sizeof(UChar) * (str_length * 2 + 1)); | 
| 1754 |  |     utf8_lower  = alloca(str_length * 6 + 1); | 
| 1755 |  |   } else { | 
| 1756 |  |     utf16_dst   = malloc(sizeof(UChar) * (str_length * 2 + 1)); | 
| 1757 |  |     utf16_lower = malloc(sizeof(UChar) * (str_length * 2 + 1)); | 
| 1758 |  |     utf8_lower  = malloc(str_length * 6 + 1); | 
| 1759 |  |  | 
| 1760 |  |     if (!utf16_dst || !utf16_lower || !utf8_lower) { | 
| 1761 |  |       ret = PSL_ERR_NO_MEM; | 
| 1762 |  |       goto out; | 
| 1763 |  |     } | 
| 1764 |  |   } | 
| 1765 |  |  | 
| 1766 |  |   uconv = ucnv_open(encoding, &status); | 
| 1767 |  |   if (U_SUCCESS(status)) { | 
| 1768 |  |     utf16_dst_length = ucnv_toUChars(uconv, utf16_dst, str_length * 2 + 1, str, str_length, &status); | 
| 1769 |  |     ucnv_close(uconv); | 
| 1770 |  |  | 
| 1771 |  |     if (U_SUCCESS(status)) { | 
| 1772 |  |       int32_t utf16_lower_length = u_strToLower(utf16_lower, str_length * 2 + 1, utf16_dst, utf16_dst_length, locale, &status); | 
| 1773 |  |       if (U_SUCCESS(status)) { | 
| 1774 |  |         u_strToUTF8(utf8_lower, str_length * 6 + 1, NULL, utf16_lower, utf16_lower_length, &status); | 
| 1775 |  |         if (U_SUCCESS(status)) { | 
| 1776 |  |           ret = PSL_SUCCESS; | 
| 1777 |  |           if (lower) { | 
| 1778 |  |             char *tmp = strdup(utf8_lower); | 
| 1779 |  |  | 
| 1780 |  |             if (tmp) | 
| 1781 |  |               *lower = tmp; | 
| 1782 |  |             else | 
| 1783 |  |               ret = PSL_ERR_NO_MEM; | 
| 1784 |  |           } | 
| 1785 |  |         } else { | 
| 1786 |  |           ret = PSL_ERR_TO_UTF8; | 
| 1787 |  |           /* fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); */ | 
| 1788 |  |         } | 
| 1789 |  |       } else { | 
| 1790 |  |         ret = PSL_ERR_TO_LOWER; | 
| 1791 |  |         /* fprintf(stderr, "Failed to convert UTF-16 to lowercase (status %d)\n", status); */ | 
| 1792 |  |       } | 
| 1793 |  |     } else { | 
| 1794 |  |       ret = PSL_ERR_TO_UTF16; | 
| 1795 |  |       /* fprintf(stderr, "Failed to convert string to UTF-16 (status %d)\n", status); */ | 
| 1796 |  |     } | 
| 1797 |  |   } else { | 
| 1798 |  |     ret = PSL_ERR_CONVERTER; | 
| 1799 |  |     /* fprintf(stderr, "Failed to open converter for '%s' (status %d)\n", encoding, status); */ | 
| 1800 |  |   } | 
| 1801 |  | out: | 
| 1802 |  |   if (str_length >= 256) { | 
| 1803 |  |     free(utf16_dst); | 
| 1804 |  |     free(utf16_lower); | 
| 1805 |  |     free(utf8_lower); | 
| 1806 |  |   } | 
| 1807 |  |   } while (0); | 
| 1808 |  | #elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN) | 
| 1809 | 0 |   do { | 
| 1810 |  |     /* find out local charset encoding */ | 
| 1811 | 0 |     if (!encoding) { | 
| 1812 | 0 | #ifdef HAVE_NL_LANGINFO | 
| 1813 | 0 |       encoding = nl_langinfo(CODESET); | 
| 1814 |  | #elif defined _WIN32 | 
| 1815 |  |       static char buf[16]; | 
| 1816 |  |       snprintf(buf, sizeof(buf), "CP%u", GetACP()); | 
| 1817 |  |       encoding = buf; | 
| 1818 |  | #endif | 
| 1819 | 0 |       if (!encoding || !*encoding) | 
| 1820 | 0 |         encoding = "ASCII"; | 
| 1821 | 0 |     } | 
| 1822 |  |  | 
| 1823 |  |     /* convert to UTF-8 */ | 
| 1824 | 0 |     if (strcasecmp(encoding, "utf-8")) { | 
| 1825 | 0 |       iconv_t cd = iconv_open("utf-8", encoding); | 
| 1826 |  | 
 | 
| 1827 | 0 |       if (cd != (iconv_t)-1) { | 
| 1828 | 0 |         char *tmp = (char *)str; /* iconv won't change where str points to, but changes tmp itself */ | 
| 1829 | 0 |         size_t tmp_len = strlen(str) + 1; | 
| 1830 | 0 |         size_t dst_len = tmp_len * 6, dst_len_tmp = dst_len; | 
| 1831 | 0 |         char *dst = malloc(dst_len + 1), *dst_tmp = dst; | 
| 1832 |  | 
 | 
| 1833 | 0 |         if (!dst) { | 
| 1834 | 0 |           ret = PSL_ERR_NO_MEM; | 
| 1835 | 0 |         } | 
| 1836 | 0 |         else if (iconv(cd, (WINICONV_CONST char **)&tmp, &tmp_len, &dst_tmp, &dst_len_tmp) != (size_t)-1 | 
| 1837 | 0 |           && iconv(cd, NULL, NULL, &dst_tmp, &dst_len_tmp) != (size_t)-1) | 
| 1838 | 0 |         { | 
| 1839 |  |           /* start size for u8_tolower internal memory allocation. | 
| 1840 |  |            * u8_tolower() does not terminate the result string. we have 0 byte included in above tmp_len | 
| 1841 |  |            * and thus in len. */ | 
| 1842 | 0 |           size_t len = dst_len - dst_len_tmp; | 
| 1843 |  | 
 | 
| 1844 | 0 |           if ((tmp = (char *)u8_tolower((uint8_t *)dst, len, 0, UNINORM_NFKC, NULL, &len))) { | 
| 1845 | 0 |             ret = PSL_SUCCESS; | 
| 1846 | 0 |             if (lower) { | 
| 1847 | 0 |               *lower = tmp; | 
| 1848 | 0 |               tmp = NULL; | 
| 1849 | 0 |             } else | 
| 1850 | 0 |               free(tmp); | 
| 1851 | 0 |           } else { | 
| 1852 | 0 |             ret = PSL_ERR_TO_LOWER; | 
| 1853 |  |             /* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */ | 
| 1854 | 0 |           } | 
| 1855 | 0 |         } else { | 
| 1856 | 0 |           ret = PSL_ERR_TO_UTF8; | 
| 1857 |  |           /* fprintf(stderr, "Failed to convert '%s' string into '%s' (%d)\n", src_encoding, dst_encoding, errno); */ | 
| 1858 | 0 |         } | 
| 1859 |  | 
 | 
| 1860 | 0 |         free(dst); | 
| 1861 | 0 |         iconv_close(cd); | 
| 1862 | 0 |       } else { | 
| 1863 | 0 |         ret = PSL_ERR_TO_UTF8; | 
| 1864 |  |         /* fprintf(stderr, "Failed to prepare encoding '%s' into '%s' (%d)\n", src_encoding, dst_encoding, errno); */ | 
| 1865 | 0 |       } | 
| 1866 | 0 |     } else { | 
| 1867 |  |       /* we need a conversion to lowercase */ | 
| 1868 | 0 |       uint8_t *tmp; | 
| 1869 |  |  | 
| 1870 |  |       /* start size for u8_tolower internal memory allocation. | 
| 1871 |  |        * u8_tolower() does not terminate the result string, so include terminating 0 byte in len. */ | 
| 1872 | 0 |       size_t len = u8_strlen((uint8_t *)str) + 1; | 
| 1873 |  | 
 | 
| 1874 | 0 |       if ((tmp = u8_tolower((uint8_t *)str, len, 0, UNINORM_NFKC, NULL, &len))) { | 
| 1875 | 0 |         ret = PSL_SUCCESS; | 
| 1876 | 0 |         if (lower) { | 
| 1877 | 0 |           *lower = (char*)tmp; | 
| 1878 | 0 |           tmp = NULL; | 
| 1879 | 0 |         } else | 
| 1880 | 0 |           free(tmp); | 
| 1881 | 0 |       } else { | 
| 1882 | 0 |         ret = PSL_ERR_TO_LOWER; | 
| 1883 |  |         /* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */ | 
| 1884 | 0 |       } | 
| 1885 | 0 |     } | 
| 1886 |  | 
 | 
| 1887 | 0 |   } while (0); | 
| 1888 | 0 | #endif | 
| 1889 |  | 
 | 
| 1890 | 0 |   return ret; | 
| 1891 | 0 | } | 
| 1892 |  |  | 
| 1893 |  | /* if file is newer than the builtin data, insert it reverse sorted by mtime */ | 
| 1894 |  | static int insert_file(const char *fname, const char **psl_fname, time_t *psl_mtime, int n) | 
| 1895 | 0 | { | 
| 1896 | 0 |   struct stat st; | 
| 1897 | 0 |   int it; | 
| 1898 |  | 
 | 
| 1899 | 0 |   if (fname && *fname && stat(fname, &st) == 0 && st.st_mtime > _psl_file_time) { | 
| 1900 |  |     /* add file name and mtime to end of array */ | 
| 1901 | 0 |     psl_fname[n] = fname; | 
| 1902 | 0 |     psl_mtime[n++] = st.st_mtime; | 
| 1903 |  |  | 
| 1904 |  |     /* move the new entry to it's correct position */ | 
| 1905 | 0 |     for (it = n - 2; it >= 0 && st.st_mtime > psl_mtime[it]; it--) { | 
| 1906 | 0 |       psl_fname[it + 1] = psl_fname[it]; | 
| 1907 | 0 |       psl_mtime[it + 1] = psl_mtime[it]; | 
| 1908 | 0 |       psl_fname[it] = fname; | 
| 1909 | 0 |       psl_mtime[it] = st.st_mtime; | 
| 1910 | 0 |     } | 
| 1911 | 0 |   } | 
| 1912 |  | 
 | 
| 1913 | 0 |   return n; | 
| 1914 | 0 | } | 
| 1915 |  |  | 
| 1916 |  | /** | 
| 1917 |  |  * psl_latest: | 
| 1918 |  |  * @fname: Name of PSL file or %NULL | 
| 1919 |  |  * | 
| 1920 |  |  * This function loads the the latest available PSL data from either | 
| 1921 |  |  * - @fname (application specific filename, may be %NULL) | 
| 1922 |  |  * - location specified during built-time (filename from ./configure --with-psl-distfile) | 
| 1923 |  |  * - built-in PSL data (generated from ./configure --with-psl-file) | 
| 1924 |  |  * - location of built-in data (filename from ./configure --with-psl-file) | 
| 1925 |  |  * | 
| 1926 |  |  * If none of the above is available, the function returns %NULL. | 
| 1927 |  |  * | 
| 1928 |  |  * To free the allocated resources, call psl_free(). | 
| 1929 |  |  * | 
| 1930 |  |  * Returns: Pointer to a PSL context or %NULL on failure. | 
| 1931 |  |  * | 
| 1932 |  |  * Since: 0.16 | 
| 1933 |  |  */ | 
| 1934 |  | psl_ctx_t *psl_latest(const char *fname) | 
| 1935 | 0 | { | 
| 1936 | 0 |   psl_ctx_t *psl; | 
| 1937 | 0 |   const char *psl_fname[3]; | 
| 1938 | 0 |   time_t psl_mtime[3]; | 
| 1939 | 0 |   int it, ntimes; | 
| 1940 |  | 
 | 
| 1941 | 0 |   psl_fname[0] = NULL; /* silence gcc 6.2 false warning */ | 
| 1942 |  |  | 
| 1943 |  |   /* create array of PSL files reverse sorted by mtime (latest first) */ | 
| 1944 | 0 |   ntimes = insert_file(fname, psl_fname, psl_mtime, 0); | 
| 1945 | 0 |   ntimes = insert_file(_psl_dist_filename, psl_fname, psl_mtime, ntimes); | 
| 1946 | 0 |   ntimes = insert_file(_psl_filename, psl_fname, psl_mtime, ntimes); | 
| 1947 |  |  | 
| 1948 |  |   /* load PSL data from the latest file, falling back to the second recent, ... */ | 
| 1949 | 0 |   for (psl = NULL, it = 0; it < ntimes; it++) { | 
| 1950 | 0 |     if (psl_mtime[it] > _psl_file_time) | 
| 1951 | 0 |       if ((psl = psl_load_file(psl_fname[it]))) | 
| 1952 | 0 |         break; | 
| 1953 | 0 |   } | 
| 1954 |  |  | 
| 1955 |  |   /* if file loading failed or there is no file newer than the builtin data, | 
| 1956 |  |    * then return the builtin data. */ | 
| 1957 | 0 |   return psl ? psl : (psl_ctx_t *) psl_builtin(); | 
| 1958 | 0 | } |