Coverage Report

Created: 2025-06-13 06:06

/src/postgres/src/backend/commands/collationcmds.c
Line
Count
Source (jump to first uncovered line)
1
/*-------------------------------------------------------------------------
2
 *
3
 * collationcmds.c
4
 *    collation-related commands support code
5
 *
6
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7
 * Portions Copyright (c) 1994, Regents of the University of California
8
 *
9
 *
10
 * IDENTIFICATION
11
 *    src/backend/commands/collationcmds.c
12
 *
13
 *-------------------------------------------------------------------------
14
 */
15
#include "postgres.h"
16
17
#include "access/htup_details.h"
18
#include "access/table.h"
19
#include "access/xact.h"
20
#include "catalog/indexing.h"
21
#include "catalog/namespace.h"
22
#include "catalog/objectaccess.h"
23
#include "catalog/pg_collation.h"
24
#include "catalog/pg_database.h"
25
#include "catalog/pg_namespace.h"
26
#include "commands/collationcmds.h"
27
#include "commands/comment.h"
28
#include "commands/dbcommands.h"
29
#include "commands/defrem.h"
30
#include "common/string.h"
31
#include "mb/pg_wchar.h"
32
#include "miscadmin.h"
33
#include "utils/acl.h"
34
#include "utils/builtins.h"
35
#include "utils/lsyscache.h"
36
#include "utils/pg_locale.h"
37
#include "utils/rel.h"
38
#include "utils/syscache.h"
39
40
41
typedef struct
42
{
43
  char     *localename;   /* name of locale, as per "locale -a" */
44
  char     *alias;      /* shortened alias for same */
45
  int     enc;      /* encoding */
46
} CollAliasData;
47
48
49
/*
50
 * CREATE COLLATION
51
 */
52
ObjectAddress
53
DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_exists)
54
0
{
55
0
  char     *collName;
56
0
  Oid     collNamespace;
57
0
  AclResult aclresult;
58
0
  ListCell   *pl;
59
0
  DefElem    *fromEl = NULL;
60
0
  DefElem    *localeEl = NULL;
61
0
  DefElem    *lccollateEl = NULL;
62
0
  DefElem    *lcctypeEl = NULL;
63
0
  DefElem    *providerEl = NULL;
64
0
  DefElem    *deterministicEl = NULL;
65
0
  DefElem    *rulesEl = NULL;
66
0
  DefElem    *versionEl = NULL;
67
0
  char     *collcollate;
68
0
  char     *collctype;
69
0
  const char *colllocale;
70
0
  char     *collicurules;
71
0
  bool    collisdeterministic;
72
0
  int     collencoding;
73
0
  char    collprovider;
74
0
  char     *collversion = NULL;
75
0
  Oid     newoid;
76
0
  ObjectAddress address;
77
78
0
  collNamespace = QualifiedNameGetCreationNamespace(names, &collName);
79
80
0
  aclresult = object_aclcheck(NamespaceRelationId, collNamespace, GetUserId(), ACL_CREATE);
81
0
  if (aclresult != ACLCHECK_OK)
82
0
    aclcheck_error(aclresult, OBJECT_SCHEMA,
83
0
             get_namespace_name(collNamespace));
84
85
0
  foreach(pl, parameters)
86
0
  {
87
0
    DefElem    *defel = lfirst_node(DefElem, pl);
88
0
    DefElem   **defelp;
89
90
0
    if (strcmp(defel->defname, "from") == 0)
91
0
      defelp = &fromEl;
92
0
    else if (strcmp(defel->defname, "locale") == 0)
93
0
      defelp = &localeEl;
94
0
    else if (strcmp(defel->defname, "lc_collate") == 0)
95
0
      defelp = &lccollateEl;
96
0
    else if (strcmp(defel->defname, "lc_ctype") == 0)
97
0
      defelp = &lcctypeEl;
98
0
    else if (strcmp(defel->defname, "provider") == 0)
99
0
      defelp = &providerEl;
100
0
    else if (strcmp(defel->defname, "deterministic") == 0)
101
0
      defelp = &deterministicEl;
102
0
    else if (strcmp(defel->defname, "rules") == 0)
103
0
      defelp = &rulesEl;
104
0
    else if (strcmp(defel->defname, "version") == 0)
105
0
      defelp = &versionEl;
106
0
    else
107
0
    {
108
0
      ereport(ERROR,
109
0
          (errcode(ERRCODE_SYNTAX_ERROR),
110
0
           errmsg("collation attribute \"%s\" not recognized",
111
0
              defel->defname),
112
0
           parser_errposition(pstate, defel->location)));
113
0
      break;
114
0
    }
115
0
    if (*defelp != NULL)
116
0
      errorConflictingDefElem(defel, pstate);
117
0
    *defelp = defel;
118
0
  }
119
120
0
  if (localeEl && (lccollateEl || lcctypeEl))
121
0
    ereport(ERROR,
122
0
        errcode(ERRCODE_SYNTAX_ERROR),
123
0
        errmsg("conflicting or redundant options"),
124
0
        errdetail("LOCALE cannot be specified together with LC_COLLATE or LC_CTYPE."));
125
126
0
  if (fromEl && list_length(parameters) != 1)
127
0
    ereport(ERROR,
128
0
        errcode(ERRCODE_SYNTAX_ERROR),
129
0
        errmsg("conflicting or redundant options"),
130
0
        errdetail("FROM cannot be specified together with any other options."));
131
132
0
  if (fromEl)
133
0
  {
134
0
    Oid     collid;
135
0
    HeapTuple tp;
136
0
    Datum   datum;
137
0
    bool    isnull;
138
139
0
    collid = get_collation_oid(defGetQualifiedName(fromEl), false);
140
0
    tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
141
0
    if (!HeapTupleIsValid(tp))
142
0
      elog(ERROR, "cache lookup failed for collation %u", collid);
143
144
0
    collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider;
145
0
    collisdeterministic = ((Form_pg_collation) GETSTRUCT(tp))->collisdeterministic;
146
0
    collencoding = ((Form_pg_collation) GETSTRUCT(tp))->collencoding;
147
148
0
    datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collcollate, &isnull);
149
0
    if (!isnull)
150
0
      collcollate = TextDatumGetCString(datum);
151
0
    else
152
0
      collcollate = NULL;
153
154
0
    datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collctype, &isnull);
155
0
    if (!isnull)
156
0
      collctype = TextDatumGetCString(datum);
157
0
    else
158
0
      collctype = NULL;
159
160
0
    datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_colllocale, &isnull);
161
0
    if (!isnull)
162
0
      colllocale = TextDatumGetCString(datum);
163
0
    else
164
0
      colllocale = NULL;
165
166
    /*
167
     * When the ICU locale comes from an existing collation, do not
168
     * canonicalize to a language tag.
169
     */
170
171
0
    datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull);
172
0
    if (!isnull)
173
0
      collicurules = TextDatumGetCString(datum);
174
0
    else
175
0
      collicurules = NULL;
176
177
0
    ReleaseSysCache(tp);
178
179
    /*
180
     * Copying the "default" collation is not allowed because most code
181
     * checks for DEFAULT_COLLATION_OID instead of COLLPROVIDER_DEFAULT,
182
     * and so having a second collation with COLLPROVIDER_DEFAULT would
183
     * not work and potentially confuse or crash some code.  This could be
184
     * fixed with some legwork.
185
     */
186
0
    if (collprovider == COLLPROVIDER_DEFAULT)
187
0
      ereport(ERROR,
188
0
          (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
189
0
           errmsg("collation \"default\" cannot be copied")));
190
0
  }
191
0
  else
192
0
  {
193
0
    char     *collproviderstr = NULL;
194
195
0
    collcollate = NULL;
196
0
    collctype = NULL;
197
0
    colllocale = NULL;
198
0
    collicurules = NULL;
199
200
0
    if (providerEl)
201
0
      collproviderstr = defGetString(providerEl);
202
203
0
    if (deterministicEl)
204
0
      collisdeterministic = defGetBoolean(deterministicEl);
205
0
    else
206
0
      collisdeterministic = true;
207
208
0
    if (rulesEl)
209
0
      collicurules = defGetString(rulesEl);
210
211
0
    if (versionEl)
212
0
      collversion = defGetString(versionEl);
213
214
0
    if (collproviderstr)
215
0
    {
216
0
      if (pg_strcasecmp(collproviderstr, "builtin") == 0)
217
0
        collprovider = COLLPROVIDER_BUILTIN;
218
0
      else if (pg_strcasecmp(collproviderstr, "icu") == 0)
219
0
        collprovider = COLLPROVIDER_ICU;
220
0
      else if (pg_strcasecmp(collproviderstr, "libc") == 0)
221
0
        collprovider = COLLPROVIDER_LIBC;
222
0
      else
223
0
        ereport(ERROR,
224
0
            (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
225
0
             errmsg("unrecognized collation provider: %s",
226
0
                collproviderstr)));
227
0
    }
228
0
    else
229
0
      collprovider = COLLPROVIDER_LIBC;
230
231
0
    if (localeEl)
232
0
    {
233
0
      if (collprovider == COLLPROVIDER_LIBC)
234
0
      {
235
0
        collcollate = defGetString(localeEl);
236
0
        collctype = defGetString(localeEl);
237
0
      }
238
0
      else
239
0
        colllocale = defGetString(localeEl);
240
0
    }
241
242
0
    if (lccollateEl)
243
0
      collcollate = defGetString(lccollateEl);
244
245
0
    if (lcctypeEl)
246
0
      collctype = defGetString(lcctypeEl);
247
248
0
    if (collprovider == COLLPROVIDER_BUILTIN)
249
0
    {
250
0
      if (!colllocale)
251
0
        ereport(ERROR,
252
0
            (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
253
0
             errmsg("parameter \"%s\" must be specified",
254
0
                "locale")));
255
256
0
      colllocale = builtin_validate_locale(GetDatabaseEncoding(),
257
0
                         colllocale);
258
0
    }
259
0
    else if (collprovider == COLLPROVIDER_LIBC)
260
0
    {
261
0
      if (!collcollate)
262
0
        ereport(ERROR,
263
0
            (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
264
0
             errmsg("parameter \"%s\" must be specified",
265
0
                "lc_collate")));
266
267
0
      if (!collctype)
268
0
        ereport(ERROR,
269
0
            (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
270
0
             errmsg("parameter \"%s\" must be specified",
271
0
                "lc_ctype")));
272
0
    }
273
0
    else if (collprovider == COLLPROVIDER_ICU)
274
0
    {
275
0
      if (!colllocale)
276
0
        ereport(ERROR,
277
0
            (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
278
0
             errmsg("parameter \"%s\" must be specified",
279
0
                "locale")));
280
281
      /*
282
       * During binary upgrade, preserve the locale string. Otherwise,
283
       * canonicalize to a language tag.
284
       */
285
0
      if (!IsBinaryUpgrade)
286
0
      {
287
0
        char     *langtag = icu_language_tag(colllocale,
288
0
                             icu_validation_level);
289
290
0
        if (langtag && strcmp(colllocale, langtag) != 0)
291
0
        {
292
0
          ereport(NOTICE,
293
0
              (errmsg("using standard form \"%s\" for ICU locale \"%s\"",
294
0
                  langtag, colllocale)));
295
296
0
          colllocale = langtag;
297
0
        }
298
0
      }
299
300
0
      icu_validate_locale(colllocale);
301
0
    }
302
303
    /*
304
     * Nondeterministic collations are currently only supported with ICU
305
     * because that's the only case where it can actually make a
306
     * difference. So we can save writing the code for the other
307
     * providers.
308
     */
309
0
    if (!collisdeterministic && collprovider != COLLPROVIDER_ICU)
310
0
      ereport(ERROR,
311
0
          (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
312
0
           errmsg("nondeterministic collations not supported with this provider")));
313
314
0
    if (collicurules && collprovider != COLLPROVIDER_ICU)
315
0
      ereport(ERROR,
316
0
          (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
317
0
           errmsg("ICU rules cannot be specified unless locale provider is ICU")));
318
319
0
    if (collprovider == COLLPROVIDER_BUILTIN)
320
0
    {
321
0
      collencoding = builtin_locale_encoding(colllocale);
322
0
    }
323
0
    else if (collprovider == COLLPROVIDER_ICU)
324
0
    {
325
0
#ifdef USE_ICU
326
      /*
327
       * We could create ICU collations with collencoding == database
328
       * encoding, but it seems better to use -1 so that it matches the
329
       * way initdb would create ICU collations.  However, only allow
330
       * one to be created when the current database's encoding is
331
       * supported.  Otherwise the collation is useless, plus we get
332
       * surprising behaviors like not being able to drop the collation.
333
       *
334
       * Skip this test when !USE_ICU, because the error we want to
335
       * throw for that isn't thrown till later.
336
       */
337
0
      if (!is_encoding_supported_by_icu(GetDatabaseEncoding()))
338
0
        ereport(ERROR,
339
0
            (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
340
0
             errmsg("current database's encoding is not supported with this provider")));
341
0
#endif
342
0
      collencoding = -1;
343
0
    }
344
0
    else
345
0
    {
346
0
      collencoding = GetDatabaseEncoding();
347
0
      check_encoding_locale_matches(collencoding, collcollate, collctype);
348
0
    }
349
0
  }
350
351
0
  if (!collversion)
352
0
  {
353
0
    const char *locale;
354
355
0
    if (collprovider == COLLPROVIDER_LIBC)
356
0
      locale = collcollate;
357
0
    else
358
0
      locale = colllocale;
359
360
0
    collversion = get_collation_actual_version(collprovider, locale);
361
0
  }
362
363
0
  newoid = CollationCreate(collName,
364
0
               collNamespace,
365
0
               GetUserId(),
366
0
               collprovider,
367
0
               collisdeterministic,
368
0
               collencoding,
369
0
               collcollate,
370
0
               collctype,
371
0
               colllocale,
372
0
               collicurules,
373
0
               collversion,
374
0
               if_not_exists,
375
0
               false);  /* not quiet */
376
377
0
  if (!OidIsValid(newoid))
378
0
    return InvalidObjectAddress;
379
380
  /* Check that the locales can be loaded. */
381
0
  CommandCounterIncrement();
382
0
  (void) pg_newlocale_from_collation(newoid);
383
384
0
  ObjectAddressSet(address, CollationRelationId, newoid);
385
386
0
  return address;
387
0
}
388
389
/*
390
 * Subroutine for ALTER COLLATION SET SCHEMA and RENAME
391
 *
392
 * Is there a collation with the same name of the given collation already in
393
 * the given namespace?  If so, raise an appropriate error message.
394
 */
395
void
396
IsThereCollationInNamespace(const char *collname, Oid nspOid)
397
0
{
398
  /* make sure the name doesn't already exist in new schema */
399
0
  if (SearchSysCacheExists3(COLLNAMEENCNSP,
400
0
                CStringGetDatum(collname),
401
0
                Int32GetDatum(GetDatabaseEncoding()),
402
0
                ObjectIdGetDatum(nspOid)))
403
0
    ereport(ERROR,
404
0
        (errcode(ERRCODE_DUPLICATE_OBJECT),
405
0
         errmsg("collation \"%s\" for encoding \"%s\" already exists in schema \"%s\"",
406
0
            collname, GetDatabaseEncodingName(),
407
0
            get_namespace_name(nspOid))));
408
409
  /* mustn't match an any-encoding entry, either */
410
0
  if (SearchSysCacheExists3(COLLNAMEENCNSP,
411
0
                CStringGetDatum(collname),
412
0
                Int32GetDatum(-1),
413
0
                ObjectIdGetDatum(nspOid)))
414
0
    ereport(ERROR,
415
0
        (errcode(ERRCODE_DUPLICATE_OBJECT),
416
0
         errmsg("collation \"%s\" already exists in schema \"%s\"",
417
0
            collname, get_namespace_name(nspOid))));
418
0
}
419
420
/*
421
 * ALTER COLLATION
422
 */
423
ObjectAddress
424
AlterCollation(AlterCollationStmt *stmt)
425
0
{
426
0
  Relation  rel;
427
0
  Oid     collOid;
428
0
  HeapTuple tup;
429
0
  Form_pg_collation collForm;
430
0
  Datum   datum;
431
0
  bool    isnull;
432
0
  char     *oldversion;
433
0
  char     *newversion;
434
0
  ObjectAddress address;
435
436
0
  rel = table_open(CollationRelationId, RowExclusiveLock);
437
0
  collOid = get_collation_oid(stmt->collname, false);
438
439
0
  if (collOid == DEFAULT_COLLATION_OID)
440
0
    ereport(ERROR,
441
0
        (errmsg("cannot refresh version of default collation"),
442
    /* translator: %s is an SQL command */
443
0
         errhint("Use %s instead.",
444
0
             "ALTER DATABASE ... REFRESH COLLATION VERSION")));
445
446
0
  if (!object_ownercheck(CollationRelationId, collOid, GetUserId()))
447
0
    aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_COLLATION,
448
0
             NameListToString(stmt->collname));
449
450
0
  tup = SearchSysCacheCopy1(COLLOID, ObjectIdGetDatum(collOid));
451
0
  if (!HeapTupleIsValid(tup))
452
0
    elog(ERROR, "cache lookup failed for collation %u", collOid);
453
454
0
  collForm = (Form_pg_collation) GETSTRUCT(tup);
455
0
  datum = SysCacheGetAttr(COLLOID, tup, Anum_pg_collation_collversion, &isnull);
456
0
  oldversion = isnull ? NULL : TextDatumGetCString(datum);
457
458
0
  if (collForm->collprovider == COLLPROVIDER_LIBC)
459
0
    datum = SysCacheGetAttrNotNull(COLLOID, tup, Anum_pg_collation_collcollate);
460
0
  else
461
0
    datum = SysCacheGetAttrNotNull(COLLOID, tup, Anum_pg_collation_colllocale);
462
463
0
  newversion = get_collation_actual_version(collForm->collprovider,
464
0
                        TextDatumGetCString(datum));
465
466
  /* cannot change from NULL to non-NULL or vice versa */
467
0
  if ((!oldversion && newversion) || (oldversion && !newversion))
468
0
    elog(ERROR, "invalid collation version change");
469
0
  else if (oldversion && newversion && strcmp(newversion, oldversion) != 0)
470
0
  {
471
0
    bool    nulls[Natts_pg_collation];
472
0
    bool    replaces[Natts_pg_collation];
473
0
    Datum   values[Natts_pg_collation];
474
475
0
    ereport(NOTICE,
476
0
        (errmsg("changing version from %s to %s",
477
0
            oldversion, newversion)));
478
479
0
    memset(values, 0, sizeof(values));
480
0
    memset(nulls, false, sizeof(nulls));
481
0
    memset(replaces, false, sizeof(replaces));
482
483
0
    values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(newversion);
484
0
    replaces[Anum_pg_collation_collversion - 1] = true;
485
486
0
    tup = heap_modify_tuple(tup, RelationGetDescr(rel),
487
0
                values, nulls, replaces);
488
0
  }
489
0
  else
490
0
    ereport(NOTICE,
491
0
        (errmsg("version has not changed")));
492
493
0
  CatalogTupleUpdate(rel, &tup->t_self, tup);
494
495
0
  InvokeObjectPostAlterHook(CollationRelationId, collOid, 0);
496
497
0
  ObjectAddressSet(address, CollationRelationId, collOid);
498
499
0
  heap_freetuple(tup);
500
0
  table_close(rel, NoLock);
501
502
0
  return address;
503
0
}
504
505
506
Datum
507
pg_collation_actual_version(PG_FUNCTION_ARGS)
508
0
{
509
0
  Oid     collid = PG_GETARG_OID(0);
510
0
  char    provider;
511
0
  char     *locale;
512
0
  char     *version;
513
0
  Datum   datum;
514
515
0
  if (collid == DEFAULT_COLLATION_OID)
516
0
  {
517
    /* retrieve from pg_database */
518
519
0
    HeapTuple dbtup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
520
521
0
    if (!HeapTupleIsValid(dbtup))
522
0
      ereport(ERROR,
523
0
          (errcode(ERRCODE_UNDEFINED_OBJECT),
524
0
           errmsg("database with OID %u does not exist", MyDatabaseId)));
525
526
0
    provider = ((Form_pg_database) GETSTRUCT(dbtup))->datlocprovider;
527
528
0
    if (provider == COLLPROVIDER_LIBC)
529
0
    {
530
0
      datum = SysCacheGetAttrNotNull(DATABASEOID, dbtup, Anum_pg_database_datcollate);
531
0
      locale = TextDatumGetCString(datum);
532
0
    }
533
0
    else
534
0
    {
535
0
      datum = SysCacheGetAttrNotNull(DATABASEOID, dbtup, Anum_pg_database_datlocale);
536
0
      locale = TextDatumGetCString(datum);
537
0
    }
538
539
0
    ReleaseSysCache(dbtup);
540
0
  }
541
0
  else
542
0
  {
543
    /* retrieve from pg_collation */
544
545
0
    HeapTuple colltp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
546
547
0
    if (!HeapTupleIsValid(colltp))
548
0
      ereport(ERROR,
549
0
          (errcode(ERRCODE_UNDEFINED_OBJECT),
550
0
           errmsg("collation with OID %u does not exist", collid)));
551
552
0
    provider = ((Form_pg_collation) GETSTRUCT(colltp))->collprovider;
553
0
    Assert(provider != COLLPROVIDER_DEFAULT);
554
555
0
    if (provider == COLLPROVIDER_LIBC)
556
0
    {
557
0
      datum = SysCacheGetAttrNotNull(COLLOID, colltp, Anum_pg_collation_collcollate);
558
0
      locale = TextDatumGetCString(datum);
559
0
    }
560
0
    else
561
0
    {
562
0
      datum = SysCacheGetAttrNotNull(COLLOID, colltp, Anum_pg_collation_colllocale);
563
0
      locale = TextDatumGetCString(datum);
564
0
    }
565
566
0
    ReleaseSysCache(colltp);
567
0
  }
568
569
0
  version = get_collation_actual_version(provider, locale);
570
0
  if (version)
571
0
    PG_RETURN_TEXT_P(cstring_to_text(version));
572
0
  else
573
0
    PG_RETURN_NULL();
574
0
}
575
576
577
/* will we use "locale -a" in pg_import_system_collations? */
578
#if !defined(WIN32)
579
#define READ_LOCALE_A_OUTPUT
580
#endif
581
582
/* will we use EnumSystemLocalesEx in pg_import_system_collations? */
583
#ifdef WIN32
584
#define ENUM_SYSTEM_LOCALE
585
#endif
586
587
588
#ifdef READ_LOCALE_A_OUTPUT
589
/*
590
 * "Normalize" a libc locale name, stripping off encoding tags such as
591
 * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
592
 * -> "br_FR@euro").  Return true if a new, different name was
593
 * generated.
594
 */
595
static bool
596
normalize_libc_locale_name(char *new, const char *old)
597
0
{
598
0
  char     *n = new;
599
0
  const char *o = old;
600
0
  bool    changed = false;
601
602
0
  while (*o)
603
0
  {
604
0
    if (*o == '.')
605
0
    {
606
      /* skip over encoding tag such as ".utf8" or ".UTF-8" */
607
0
      o++;
608
0
      while ((*o >= 'A' && *o <= 'Z')
609
0
           || (*o >= 'a' && *o <= 'z')
610
0
           || (*o >= '0' && *o <= '9')
611
0
           || (*o == '-'))
612
0
        o++;
613
0
      changed = true;
614
0
    }
615
0
    else
616
0
      *n++ = *o++;
617
0
  }
618
0
  *n = '\0';
619
620
0
  return changed;
621
0
}
622
623
/*
624
 * qsort comparator for CollAliasData items
625
 */
626
static int
627
cmpaliases(const void *a, const void *b)
628
0
{
629
0
  const CollAliasData *ca = (const CollAliasData *) a;
630
0
  const CollAliasData *cb = (const CollAliasData *) b;
631
632
  /* comparing localename is enough because other fields are derived */
633
0
  return strcmp(ca->localename, cb->localename);
634
0
}
635
#endif              /* READ_LOCALE_A_OUTPUT */
636
637
638
#ifdef USE_ICU
639
/*
640
 * Get a comment (specifically, the display name) for an ICU locale.
641
 * The result is a palloc'd string, or NULL if we can't get a comment
642
 * or find that it's not all ASCII.  (We can *not* accept non-ASCII
643
 * comments, because the contents of template0 must be encoding-agnostic.)
644
 */
645
static char *
646
get_icu_locale_comment(const char *localename)
647
0
{
648
0
  UErrorCode  status;
649
0
  UChar   displayname[128];
650
0
  int32   len_uchar;
651
0
  int32   i;
652
0
  char     *result;
653
654
0
  status = U_ZERO_ERROR;
655
0
  len_uchar = uloc_getDisplayName(localename, "en",
656
0
                  displayname, lengthof(displayname),
657
0
                  &status);
658
0
  if (U_FAILURE(status))
659
0
    return NULL;     /* no good reason to raise an error */
660
661
  /* Check for non-ASCII comment (can't use pg_is_ascii for this) */
662
0
  for (i = 0; i < len_uchar; i++)
663
0
  {
664
0
    if (displayname[i] > 127)
665
0
      return NULL;
666
0
  }
667
668
  /* OK, transcribe */
669
0
  result = palloc(len_uchar + 1);
670
0
  for (i = 0; i < len_uchar; i++)
671
0
    result[i] = displayname[i];
672
0
  result[len_uchar] = '\0';
673
674
0
  return result;
675
0
}
676
#endif              /* USE_ICU */
677
678
679
/*
680
 * Create a new collation using the input locale 'locale'. (subroutine for
681
 * pg_import_system_collations())
682
 *
683
 * 'nspid' is the namespace id where the collation will be created.
684
 *
685
 * 'nvalidp' is incremented if the locale has a valid encoding.
686
 *
687
 * 'ncreatedp' is incremented if the collation is actually created.  If the
688
 * collation already exists it will quietly do nothing.
689
 *
690
 * The returned value is the encoding of the locale, -1 if the locale is not
691
 * valid for creating a collation.
692
 *
693
 */
694
pg_attribute_unused()
695
static int
696
create_collation_from_locale(const char *locale, int nspid,
697
               int *nvalidp, int *ncreatedp)
698
0
{
699
0
  int     enc;
700
0
  Oid     collid;
701
702
  /*
703
   * Some systems have locale names that don't consist entirely of ASCII
704
   * letters (such as "bokm&aring;l" or "fran&ccedil;ais"). This is pretty
705
   * silly, since we need the locale itself to interpret the non-ASCII
706
   * characters. We can't do much with those, so we filter them out.
707
   */
708
0
  if (!pg_is_ascii(locale))
709
0
  {
710
0
    elog(DEBUG1, "skipping locale with non-ASCII name: \"%s\"", locale);
711
0
    return -1;
712
0
  }
713
714
0
  enc = pg_get_encoding_from_locale(locale, false);
715
0
  if (enc < 0)
716
0
  {
717
0
    elog(DEBUG1, "skipping locale with unrecognized encoding: \"%s\"", locale);
718
0
    return -1;
719
0
  }
720
0
  if (!PG_VALID_BE_ENCODING(enc))
721
0
  {
722
0
    elog(DEBUG1, "skipping locale with client-only encoding: \"%s\"", locale);
723
0
    return -1;
724
0
  }
725
0
  if (enc == PG_SQL_ASCII)
726
0
    return -1;       /* C/POSIX are already in the catalog */
727
728
  /* count valid locales found in operating system */
729
0
  (*nvalidp)++;
730
731
  /*
732
   * Create a collation named the same as the locale, but quietly doing
733
   * nothing if it already exists.  This is the behavior we need even at
734
   * initdb time, because some versions of "locale -a" can report the same
735
   * locale name more than once.  And it's convenient for later import runs,
736
   * too, since you just about always want to add on new locales without a
737
   * lot of chatter about existing ones.
738
   */
739
0
  collid = CollationCreate(locale, nspid, GetUserId(),
740
0
               COLLPROVIDER_LIBC, true, enc,
741
0
               locale, locale, NULL, NULL,
742
0
               get_collation_actual_version(COLLPROVIDER_LIBC, locale),
743
0
               true, true);
744
0
  if (OidIsValid(collid))
745
0
  {
746
0
    (*ncreatedp)++;
747
748
    /* Must do CCI between inserts to handle duplicates correctly */
749
0
    CommandCounterIncrement();
750
0
  }
751
752
0
  return enc;
753
0
}
754
755
756
#ifdef ENUM_SYSTEM_LOCALE
757
/* parameter to be passed to the callback function win32_read_locale() */
758
typedef struct
759
{
760
  Oid     nspid;
761
  int      *ncreatedp;
762
  int      *nvalidp;
763
} CollParam;
764
765
/*
766
 * Callback function for EnumSystemLocalesEx() in
767
 * pg_import_system_collations().  Creates a collation for every valid locale
768
 * and a POSIX alias collation.
769
 *
770
 * The callback contract is to return TRUE to continue enumerating and FALSE
771
 * to stop enumerating.  We always want to continue.
772
 */
773
static BOOL CALLBACK
774
win32_read_locale(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
775
{
776
  CollParam  *param = (CollParam *) lparam;
777
  char    localebuf[NAMEDATALEN];
778
  int     result;
779
  int     enc;
780
781
  (void) dwFlags;
782
783
  result = WideCharToMultiByte(CP_ACP, 0, pStr, -1, localebuf, NAMEDATALEN,
784
                 NULL, NULL);
785
786
  if (result == 0)
787
  {
788
    if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
789
      elog(DEBUG1, "skipping locale with too-long name: \"%s\"", localebuf);
790
    return TRUE;
791
  }
792
  if (localebuf[0] == '\0')
793
    return TRUE;
794
795
  enc = create_collation_from_locale(localebuf, param->nspid,
796
                     param->nvalidp, param->ncreatedp);
797
  if (enc < 0)
798
    return TRUE;
799
800
  /*
801
   * Windows will use hyphens between language and territory, where POSIX
802
   * uses an underscore. Simply create a POSIX alias.
803
   */
804
  if (strchr(localebuf, '-'))
805
  {
806
    char    alias[NAMEDATALEN];
807
    Oid     collid;
808
809
    strcpy(alias, localebuf);
810
    for (char *p = alias; *p; p++)
811
      if (*p == '-')
812
        *p = '_';
813
814
    collid = CollationCreate(alias, param->nspid, GetUserId(),
815
                 COLLPROVIDER_LIBC, true, enc,
816
                 localebuf, localebuf, NULL, NULL,
817
                 get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
818
                 true, true);
819
    if (OidIsValid(collid))
820
    {
821
      (*param->ncreatedp)++;
822
823
      CommandCounterIncrement();
824
    }
825
  }
826
827
  return TRUE;
828
}
829
#endif              /* ENUM_SYSTEM_LOCALE */
830
831
832
/*
833
 * pg_import_system_collations: add known system collations to pg_collation
834
 */
835
Datum
836
pg_import_system_collations(PG_FUNCTION_ARGS)
837
0
{
838
0
  Oid     nspid = PG_GETARG_OID(0);
839
0
  int     ncreated = 0;
840
841
0
  if (!superuser())
842
0
    ereport(ERROR,
843
0
        (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
844
0
         errmsg("must be superuser to import system collations")));
845
846
0
  if (!SearchSysCacheExists1(NAMESPACEOID, ObjectIdGetDatum(nspid)))
847
0
    ereport(ERROR,
848
0
        (errcode(ERRCODE_UNDEFINED_SCHEMA),
849
0
         errmsg("schema with OID %u does not exist", nspid)));
850
851
  /* Load collations known to libc, using "locale -a" to enumerate them */
852
0
#ifdef READ_LOCALE_A_OUTPUT
853
0
  {
854
0
    FILE     *locale_a_handle;
855
0
    char    localebuf[LOCALE_NAME_BUFLEN];
856
0
    int     nvalid = 0;
857
0
    Oid     collid;
858
0
    CollAliasData *aliases;
859
0
    int     naliases,
860
0
          maxaliases,
861
0
          i;
862
863
    /* expansible array of aliases */
864
0
    maxaliases = 100;
865
0
    aliases = (CollAliasData *) palloc(maxaliases * sizeof(CollAliasData));
866
0
    naliases = 0;
867
868
0
    locale_a_handle = OpenPipeStream("locale -a", "r");
869
0
    if (locale_a_handle == NULL)
870
0
      ereport(ERROR,
871
0
          (errcode_for_file_access(),
872
0
           errmsg("could not execute command \"%s\": %m",
873
0
              "locale -a")));
874
875
0
    while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
876
0
    {
877
0
      size_t    len;
878
0
      int     enc;
879
0
      char    alias[LOCALE_NAME_BUFLEN];
880
881
0
      len = strlen(localebuf);
882
883
0
      if (len == 0 || localebuf[len - 1] != '\n')
884
0
      {
885
0
        elog(DEBUG1, "skipping locale with too-long name: \"%s\"", localebuf);
886
0
        continue;
887
0
      }
888
0
      localebuf[len - 1] = '\0';
889
890
0
      enc = create_collation_from_locale(localebuf, nspid, &nvalid, &ncreated);
891
0
      if (enc < 0)
892
0
        continue;
893
894
      /*
895
       * Generate aliases such as "en_US" in addition to "en_US.utf8"
896
       * for ease of use.  Note that collation names are unique per
897
       * encoding only, so this doesn't clash with "en_US" for LATIN1,
898
       * say.
899
       *
900
       * However, it might conflict with a name we'll see later in the
901
       * "locale -a" output.  So save up the aliases and try to add them
902
       * after we've read all the output.
903
       */
904
0
      if (normalize_libc_locale_name(alias, localebuf))
905
0
      {
906
0
        if (naliases >= maxaliases)
907
0
        {
908
0
          maxaliases *= 2;
909
0
          aliases = (CollAliasData *)
910
0
            repalloc(aliases, maxaliases * sizeof(CollAliasData));
911
0
        }
912
0
        aliases[naliases].localename = pstrdup(localebuf);
913
0
        aliases[naliases].alias = pstrdup(alias);
914
0
        aliases[naliases].enc = enc;
915
0
        naliases++;
916
0
      }
917
0
    }
918
919
    /*
920
     * We don't check the return value of this, because we want to support
921
     * the case where there "locale" command does not exist.  (This is
922
     * unusual but can happen on minimalized Linux distributions, for
923
     * example.)  We will warn below if no locales could be found.
924
     */
925
0
    ClosePipeStream(locale_a_handle);
926
927
    /*
928
     * Before processing the aliases, sort them by locale name.  The point
929
     * here is that if "locale -a" gives us multiple locale names with the
930
     * same encoding and base name, say "en_US.utf8" and "en_US.utf-8", we
931
     * want to pick a deterministic one of them.  First in ASCII sort
932
     * order is a good enough rule.  (Before PG 10, the code corresponding
933
     * to this logic in initdb.c had an additional ordering rule, to
934
     * prefer the locale name exactly matching the alias, if any.  We
935
     * don't need to consider that here, because we would have already
936
     * created such a pg_collation entry above, and that one will win.)
937
     */
938
0
    if (naliases > 1)
939
0
      qsort(aliases, naliases, sizeof(CollAliasData), cmpaliases);
940
941
    /* Now add aliases, ignoring any that match pre-existing entries */
942
0
    for (i = 0; i < naliases; i++)
943
0
    {
944
0
      char     *locale = aliases[i].localename;
945
0
      char     *alias = aliases[i].alias;
946
0
      int     enc = aliases[i].enc;
947
948
0
      collid = CollationCreate(alias, nspid, GetUserId(),
949
0
                   COLLPROVIDER_LIBC, true, enc,
950
0
                   locale, locale, NULL, NULL,
951
0
                   get_collation_actual_version(COLLPROVIDER_LIBC, locale),
952
0
                   true, true);
953
0
      if (OidIsValid(collid))
954
0
      {
955
0
        ncreated++;
956
957
0
        CommandCounterIncrement();
958
0
      }
959
0
    }
960
961
    /* Give a warning if "locale -a" seems to be malfunctioning */
962
0
    if (nvalid == 0)
963
0
      ereport(WARNING,
964
0
          (errmsg("no usable system locales were found")));
965
0
  }
966
0
#endif              /* READ_LOCALE_A_OUTPUT */
967
968
  /*
969
   * Load collations known to ICU
970
   *
971
   * We use uloc_countAvailable()/uloc_getAvailable() rather than
972
   * ucol_countAvailable()/ucol_getAvailable().  The former returns a full
973
   * set of language+region combinations, whereas the latter only returns
974
   * language+region combinations if they are distinct from the language's
975
   * base collation.  So there might not be a de-DE or en-GB, which would be
976
   * confusing.
977
   */
978
0
#ifdef USE_ICU
979
0
  {
980
0
    int     i;
981
982
    /*
983
     * Start the loop at -1 to sneak in the root locale without too much
984
     * code duplication.
985
     */
986
0
    for (i = -1; i < uloc_countAvailable(); i++)
987
0
    {
988
0
      const char *name;
989
0
      char     *langtag;
990
0
      char     *icucomment;
991
0
      Oid     collid;
992
993
0
      if (i == -1)
994
0
        name = "";   /* ICU root locale */
995
0
      else
996
0
        name = uloc_getAvailable(i);
997
998
0
      langtag = icu_language_tag(name, ERROR);
999
1000
      /*
1001
       * Be paranoid about not allowing any non-ASCII strings into
1002
       * pg_collation
1003
       */
1004
0
      if (!pg_is_ascii(langtag))
1005
0
        continue;
1006
1007
0
      collid = CollationCreate(psprintf("%s-x-icu", langtag),
1008
0
                   nspid, GetUserId(),
1009
0
                   COLLPROVIDER_ICU, true, -1,
1010
0
                   NULL, NULL, langtag, NULL,
1011
0
                   get_collation_actual_version(COLLPROVIDER_ICU, langtag),
1012
0
                   true, true);
1013
0
      if (OidIsValid(collid))
1014
0
      {
1015
0
        ncreated++;
1016
1017
0
        CommandCounterIncrement();
1018
1019
0
        icucomment = get_icu_locale_comment(name);
1020
0
        if (icucomment)
1021
0
          CreateComments(collid, CollationRelationId, 0,
1022
0
                   icucomment);
1023
0
      }
1024
0
    }
1025
0
  }
1026
0
#endif              /* USE_ICU */
1027
1028
  /* Load collations known to WIN32 */
1029
#ifdef ENUM_SYSTEM_LOCALE
1030
  {
1031
    int     nvalid = 0;
1032
    CollParam param;
1033
1034
    param.nspid = nspid;
1035
    param.ncreatedp = &ncreated;
1036
    param.nvalidp = &nvalid;
1037
1038
    /*
1039
     * Enumerate the locales that are either installed on or supported by
1040
     * the OS.
1041
     */
1042
    if (!EnumSystemLocalesEx(win32_read_locale, LOCALE_ALL,
1043
                 (LPARAM) &param, NULL))
1044
      _dosmaperr(GetLastError());
1045
1046
    /* Give a warning if EnumSystemLocalesEx seems to be malfunctioning */
1047
    if (nvalid == 0)
1048
      ereport(WARNING,
1049
          (errmsg("no usable system locales were found")));
1050
  }
1051
#endif              /* ENUM_SYSTEM_LOCALE */
1052
1053
0
  PG_RETURN_INT32(ncreated);
1054
0
}