Coverage Report

Created: 2025-06-13 06:34

/src/icu/icu4c/source/common/putil.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
******************************************************************************
5
*
6
*   Copyright (C) 1997-2016, International Business Machines
7
*   Corporation and others.  All Rights Reserved.
8
*
9
******************************************************************************
10
*
11
*  FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
12
*
13
*   Date        Name        Description
14
*   04/14/97    aliu        Creation.
15
*   04/24/97    aliu        Added getDefaultDataDirectory() and
16
*                            getDefaultLocaleID().
17
*   04/28/97    aliu        Rewritten to assume Unix and apply general methods
18
*                            for assumed case.  Non-UNIX platforms must be
19
*                            special-cased.  Rewrote numeric methods dealing
20
*                            with NaN and Infinity to be platform independent
21
*                             over all IEEE 754 platforms.
22
*   05/13/97    aliu        Restored sign of timezone
23
*                            (semantics are hours West of GMT)
24
*   06/16/98    erm         Added IEEE_754 stuff, cleaned up isInfinite, isNan,
25
*                             nextDouble..
26
*   07/22/98    stephen     Added remainder, max, min, trunc
27
*   08/13/98    stephen     Added isNegativeInfinity, isPositiveInfinity
28
*   08/24/98    stephen     Added longBitsFromDouble
29
*   09/08/98    stephen     Minor changes for Mac Port
30
*   03/02/99    stephen     Removed openFile().  Added AS400 support.
31
*                            Fixed EBCDIC tables
32
*   04/15/99    stephen     Converted to C.
33
*   06/28/99    stephen     Removed mutex locking in u_isBigEndian().
34
*   08/04/99    jeffrey R.  Added OS/2 changes
35
*   11/15/99    helena      Integrated S/390 IEEE support.
36
*   04/26/01    Barry N.    OS/400 support for uprv_getDefaultLocaleID
37
*   08/15/01    Steven H.   OS/400 support for uprv_getDefaultCodepage
38
*   01/03/08    Steven L.   Fake Time Support
39
******************************************************************************
40
*/
41
42
// Defines _XOPEN_SOURCE for access to POSIX functions.
43
// Must be before any other #includes.
44
#include "uposixdefs.h"
45
46
// First, the platform type. Need this for U_PLATFORM.
47
#include "unicode/platform.h"
48
49
/*
50
 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
51
 */
52
#include <time.h>
53
54
#if !U_PLATFORM_USES_ONLY_WIN32_API
55
#include <sys/time.h>
56
#endif
57
58
/* include the rest of the ICU headers */
59
#include "unicode/putil.h"
60
#include "unicode/ustring.h"
61
#include "putilimp.h"
62
#include "uassert.h"
63
#include "umutex.h"
64
#include "cmemory.h"
65
#include "cstring.h"
66
#include "locmap.h"
67
#include "ucln_cmn.h"
68
#include "charstr.h"
69
70
/* Include standard headers. */
71
#include <stdio.h>
72
#include <stdlib.h>
73
#include <string.h>
74
#include <math.h>
75
#include <locale.h>
76
#include <float.h>
77
78
#ifndef U_COMMON_IMPLEMENTATION
79
#error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see https://unicode-org.github.io/icu/userguide/icu/howtouseicu.html
80
#endif
81
82
83
/* include system headers */
84
#if U_PLATFORM_USES_ONLY_WIN32_API
85
    /*
86
     * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW.
87
     * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API)
88
     * to use native APIs as much as possible?
89
     */
90
#ifndef WIN32_LEAN_AND_MEAN
91
#   define WIN32_LEAN_AND_MEAN
92
#endif
93
#   define VC_EXTRALEAN
94
#   define NOUSER
95
#   define NOSERVICE
96
#   define NOIME
97
#   define NOMCX
98
#   include <windows.h>
99
#   include "unicode/uloc.h"
100
#   include "wintz.h"
101
#elif U_PLATFORM == U_PF_OS400
102
#   include <float.h>
103
#   include <qusec.h>       /* error code structure */
104
#   include <qusrjobi.h>
105
#   include <qliept.h>      /* EPT_CALL macro  - this include must be after all other "QSYSINCs" */
106
#   include <mih/testptr.h> /* For uprv_maximumPtr */
107
#elif U_PLATFORM == U_PF_OS390
108
#   include "unicode/ucnv.h"   /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
109
#elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS
110
#   include <limits.h>
111
#   include <unistd.h>
112
#   if U_PLATFORM == U_PF_SOLARIS
113
#       ifndef _XPG4_2
114
#           define _XPG4_2
115
#       endif
116
#   elif U_PLATFORM == U_PF_ANDROID
117
#       include <sys/system_properties.h>
118
#       include <dlfcn.h>
119
#   endif
120
#elif U_PLATFORM == U_PF_QNX
121
#   include <sys/neutrino.h>
122
#endif
123
124
125
/*
126
 * Only include langinfo.h if we have a way to get the codeset. If we later
127
 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
128
 *
129
 */
130
131
#if U_HAVE_NL_LANGINFO_CODESET
132
#include <langinfo.h>
133
#endif
134
135
/**
136
 * Simple things (presence of functions, etc) should just go in configure.in and be added to
137
 * icucfg.h via autoheader.
138
 */
139
#if U_PLATFORM_IMPLEMENTS_POSIX
140
#   if U_PLATFORM == U_PF_OS400
141
#    define HAVE_DLFCN_H 0
142
#    define HAVE_DLOPEN 0
143
#   else
144
#   ifndef HAVE_DLFCN_H
145
#    define HAVE_DLFCN_H 1
146
#   endif
147
#   ifndef HAVE_DLOPEN
148
#    define HAVE_DLOPEN 1
149
#   endif
150
#   endif
151
#   ifndef HAVE_GETTIMEOFDAY
152
#    define HAVE_GETTIMEOFDAY 1
153
#   endif
154
#else
155
#   define HAVE_DLFCN_H 0
156
#   define HAVE_DLOPEN 0
157
#   define HAVE_GETTIMEOFDAY 0
158
#endif
159
160
U_NAMESPACE_USE
161
162
/* Define the extension for data files, again... */
163
#define DATA_TYPE "dat"
164
165
/* Leave this copyright notice here! */
166
static const char copyright[] = U_COPYRIGHT_STRING;
167
168
/* floating point implementations ------------------------------------------- */
169
170
/* We return QNAN rather than SNAN*/
171
#define SIGN 0x80000000U
172
173
/* Make it easy to define certain types of constants */
174
typedef union {
175
    int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
176
    double d64;
177
} BitPatternConversion;
178
static const BitPatternConversion gNan = {static_cast<int64_t>(INT64_C(0x7FF8000000000000))};
179
static const BitPatternConversion gInf = {static_cast<int64_t>(INT64_C(0x7FF0000000000000))};
180
181
/*---------------------------------------------------------------------------
182
  Platform utilities
183
  Our general strategy is to assume we're on a POSIX platform.  Platforms which
184
  are non-POSIX must declare themselves so.  The default POSIX implementation
185
  will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
186
  functions).
187
  ---------------------------------------------------------------------------*/
188
189
#if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400
190
#   undef U_POSIX_LOCALE
191
#else
192
#   define U_POSIX_LOCALE    1
193
#endif
194
195
/*
196
    WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
197
    can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
198
*/
199
#if !IEEE_754
200
static char*
201
u_topNBytesOfDouble(double* d, int n)
202
{
203
#if U_IS_BIG_ENDIAN
204
    return (char*)d;
205
#else
206
    return (char*)(d + 1) - n;
207
#endif
208
}
209
210
static char*
211
u_bottomNBytesOfDouble(double* d, int n)
212
{
213
#if U_IS_BIG_ENDIAN
214
    return (char*)(d + 1) - n;
215
#else
216
    return (char*)d;
217
#endif
218
}
219
#endif   /* !IEEE_754 */
220
221
#if IEEE_754
222
static UBool
223
0
u_signBit(double d) {
224
0
    uint8_t hiByte;
225
#if U_IS_BIG_ENDIAN
226
    hiByte = *(uint8_t *)&d;
227
#else
228
0
    hiByte = *(reinterpret_cast<uint8_t*>(&d) + sizeof(double) - 1);
229
0
#endif
230
0
    return (hiByte & 0x80) != 0;
231
0
}
232
#endif
233
234
235
236
#if defined (U_DEBUG_FAKETIME)
237
/* Override the clock to test things without having to move the system clock.
238
 * Assumes POSIX gettimeofday() will function
239
 */
240
UDate fakeClock_t0 = 0; /** Time to start the clock from **/
241
UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
242
UBool fakeClock_set = false; /** True if fake clock has spun up **/
243
244
static UDate getUTCtime_real() {
245
    struct timeval posixTime;
246
    gettimeofday(&posixTime, nullptr);
247
    return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
248
}
249
250
static UDate getUTCtime_fake() {
251
    static UMutex fakeClockMutex;
252
    umtx_lock(&fakeClockMutex);
253
    if(!fakeClock_set) {
254
        UDate real = getUTCtime_real();
255
        const char *fake_start = getenv("U_FAKETIME_START");
256
        if((fake_start!=nullptr) && (fake_start[0]!=0)) {
257
            sscanf(fake_start,"%lf",&fakeClock_t0);
258
            fakeClock_dt = fakeClock_t0 - real;
259
            fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
260
                    "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
261
                    fakeClock_t0, fake_start, fakeClock_dt, real);
262
        } else {
263
          fakeClock_dt = 0;
264
            fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
265
                    "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
266
        }
267
        fakeClock_set = true;
268
    }
269
    umtx_unlock(&fakeClockMutex);
270
271
    return getUTCtime_real() + fakeClock_dt;
272
}
273
#endif
274
275
#if U_PLATFORM_USES_ONLY_WIN32_API
276
typedef union {
277
    int64_t int64;
278
    FILETIME fileTime;
279
} FileTimeConversion;   /* This is like a ULARGE_INTEGER */
280
281
/* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
282
#define EPOCH_BIAS  INT64_C(116444736000000000)
283
#define HECTONANOSECOND_PER_MILLISECOND   10000
284
285
#endif
286
287
/*---------------------------------------------------------------------------
288
  Universal Implementations
289
  These are designed to work on all platforms.  Try these, and if they
290
  don't work on your platform, then special case your platform with new
291
  implementations.
292
---------------------------------------------------------------------------*/
293
294
U_CAPI UDate U_EXPORT2
295
uprv_getUTCtime()
296
0
{
297
#if defined(U_DEBUG_FAKETIME)
298
    return getUTCtime_fake(); /* Hook for overriding the clock */
299
#else
300
0
    return uprv_getRawUTCtime();
301
0
#endif
302
0
}
303
304
/* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
305
U_CAPI UDate U_EXPORT2
306
uprv_getRawUTCtime()
307
0
{
308
#if U_PLATFORM_USES_ONLY_WIN32_API
309
310
    FileTimeConversion winTime;
311
    GetSystemTimeAsFileTime(&winTime.fileTime);
312
    return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
313
#else
314
315
0
#if HAVE_GETTIMEOFDAY
316
0
    struct timeval posixTime;
317
0
    gettimeofday(&posixTime, nullptr);
318
0
    return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
319
#else
320
    time_t epochtime;
321
    time(&epochtime);
322
    return (UDate)epochtime * U_MILLIS_PER_SECOND;
323
#endif
324
325
0
#endif
326
0
}
327
328
/*-----------------------------------------------------------------------------
329
  IEEE 754
330
  These methods detect and return NaN and infinity values for doubles
331
  conforming to IEEE 754.  Platforms which support this standard include X86,
332
  Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
333
  If this doesn't work on your platform, you have non-IEEE floating-point, and
334
  will need to code your own versions.  A naive implementation is to return 0.0
335
  for getNaN and getInfinity, and false for isNaN and isInfinite.
336
  ---------------------------------------------------------------------------*/
337
338
U_CAPI UBool U_EXPORT2
339
uprv_isNaN(double number)
340
0
{
341
0
#if IEEE_754
342
0
    BitPatternConversion convertedNumber;
343
0
    convertedNumber.d64 = number;
344
    /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
345
0
    return (convertedNumber.i64 & U_INT64_MAX) > gInf.i64;
346
347
#elif U_PLATFORM == U_PF_OS390
348
    uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
349
                        sizeof(uint32_t));
350
    uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
351
                        sizeof(uint32_t));
352
353
    return ((highBits & 0x7F080000L) == 0x7F080000L) &&
354
      (lowBits == 0x00000000L);
355
356
#else
357
    /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
358
    /* you'll need to replace this default implementation with what's correct*/
359
    /* for your platform.*/
360
    return number != number;
361
#endif
362
0
}
363
364
U_CAPI UBool U_EXPORT2
365
uprv_isInfinite(double number)
366
0
{
367
0
#if IEEE_754
368
0
    BitPatternConversion convertedNumber;
369
0
    convertedNumber.d64 = number;
370
    /* Infinity is exactly 0x7FF0000000000000U. */
371
0
    return (convertedNumber.i64 & U_INT64_MAX) == gInf.i64;
372
#elif U_PLATFORM == U_PF_OS390
373
    uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
374
                        sizeof(uint32_t));
375
    uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
376
                        sizeof(uint32_t));
377
378
    return ((highBits  & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
379
380
#else
381
    /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
382
    /* value, you'll need to replace this default implementation with what's*/
383
    /* correct for your platform.*/
384
    return number == (2.0 * number);
385
#endif
386
0
}
387
388
U_CAPI UBool U_EXPORT2
389
uprv_isPositiveInfinity(double number)
390
0
{
391
0
#if IEEE_754 || U_PLATFORM == U_PF_OS390
392
0
    return number > 0 && uprv_isInfinite(number);
393
#else
394
    return uprv_isInfinite(number);
395
#endif
396
0
}
397
398
U_CAPI UBool U_EXPORT2
399
uprv_isNegativeInfinity(double number)
400
0
{
401
0
#if IEEE_754 || U_PLATFORM == U_PF_OS390
402
0
    return number < 0 && uprv_isInfinite(number);
403
404
#else
405
    uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
406
                        sizeof(uint32_t));
407
    return((highBits & SIGN) && uprv_isInfinite(number));
408
409
#endif
410
0
}
411
412
U_CAPI double U_EXPORT2
413
uprv_getNaN()
414
0
{
415
0
#if IEEE_754 || U_PLATFORM == U_PF_OS390
416
0
    return gNan.d64;
417
#else
418
    /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
419
    /* you'll need to replace this default implementation with what's correct*/
420
    /* for your platform.*/
421
    return 0.0;
422
#endif
423
0
}
424
425
U_CAPI double U_EXPORT2
426
uprv_getInfinity()
427
0
{
428
0
#if IEEE_754 || U_PLATFORM == U_PF_OS390
429
0
    return gInf.d64;
430
#else
431
    /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
432
    /* value, you'll need to replace this default implementation with what's*/
433
    /* correct for your platform.*/
434
    return 0.0;
435
#endif
436
0
}
437
438
U_CAPI double U_EXPORT2
439
uprv_floor(double x)
440
0
{
441
0
    return floor(x);
442
0
}
443
444
U_CAPI double U_EXPORT2
445
uprv_ceil(double x)
446
0
{
447
0
    return ceil(x);
448
0
}
449
450
U_CAPI double U_EXPORT2
451
uprv_round(double x)
452
0
{
453
0
    return uprv_floor(x + 0.5);
454
0
}
455
456
U_CAPI double U_EXPORT2
457
uprv_fabs(double x)
458
0
{
459
0
    return fabs(x);
460
0
}
461
462
U_CAPI double U_EXPORT2
463
uprv_modf(double x, double* y)
464
0
{
465
0
    return modf(x, y);
466
0
}
467
468
U_CAPI double U_EXPORT2
469
uprv_fmod(double x, double y)
470
0
{
471
0
    return fmod(x, y);
472
0
}
473
474
U_CAPI double U_EXPORT2
475
uprv_pow(double x, double y)
476
0
{
477
    /* This is declared as "double pow(double x, double y)" */
478
0
    return pow(x, y);
479
0
}
480
481
U_CAPI double U_EXPORT2
482
uprv_pow10(int32_t x)
483
0
{
484
0
    return pow(10.0, (double)x);
485
0
}
486
487
U_CAPI double U_EXPORT2
488
uprv_fmax(double x, double y)
489
0
{
490
0
#if IEEE_754
491
    /* first handle NaN*/
492
0
    if(uprv_isNaN(x) || uprv_isNaN(y))
493
0
        return uprv_getNaN();
494
495
    /* check for -0 and 0*/
496
0
    if(x == 0.0 && y == 0.0 && u_signBit(x))
497
0
        return y;
498
499
0
#endif
500
501
    /* this should work for all flt point w/o NaN and Inf special cases */
502
0
    return (x > y ? x : y);
503
0
}
504
505
U_CAPI double U_EXPORT2
506
uprv_fmin(double x, double y)
507
0
{
508
0
#if IEEE_754
509
    /* first handle NaN*/
510
0
    if(uprv_isNaN(x) || uprv_isNaN(y))
511
0
        return uprv_getNaN();
512
513
    /* check for -0 and 0*/
514
0
    if(x == 0.0 && y == 0.0 && u_signBit(y))
515
0
        return y;
516
517
0
#endif
518
519
    /* this should work for all flt point w/o NaN and Inf special cases */
520
0
    return (x > y ? y : x);
521
0
}
522
523
U_CAPI UBool U_EXPORT2
524
0
uprv_add32_overflow(int32_t a, int32_t b, int32_t* res) {
525
    // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_add_overflow.
526
    // This function could be optimized by calling one of those primitives.
527
0
    auto a64 = static_cast<int64_t>(a);
528
0
    auto b64 = static_cast<int64_t>(b);
529
0
    int64_t res64 = a64 + b64;
530
0
    *res = static_cast<int32_t>(res64);
531
0
    return res64 != *res;
532
0
}
533
534
U_CAPI UBool U_EXPORT2
535
0
uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res) {
536
    // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_mul_overflow.
537
    // This function could be optimized by calling one of those primitives.
538
0
    auto a64 = static_cast<int64_t>(a);
539
0
    auto b64 = static_cast<int64_t>(b);
540
0
    int64_t res64 = a64 * b64;
541
0
    *res = static_cast<int32_t>(res64);
542
0
    return res64 != *res;
543
0
}
544
545
/**
546
 * Truncates the given double.
547
 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
548
 * This is different than calling floor() or ceil():
549
 * floor(3.3) = 3, floor(-3.3) = -4
550
 * ceil(3.3) = 4, ceil(-3.3) = -3
551
 */
552
U_CAPI double U_EXPORT2
553
uprv_trunc(double d)
554
0
{
555
0
#if IEEE_754
556
    /* handle error cases*/
557
0
    if(uprv_isNaN(d))
558
0
        return uprv_getNaN();
559
0
    if(uprv_isInfinite(d))
560
0
        return uprv_getInfinity();
561
562
0
    if(u_signBit(d))    /* Signbit() picks up -0.0;  d<0 does not. */
563
0
        return ceil(d);
564
0
    else
565
0
        return floor(d);
566
567
#else
568
    return d >= 0 ? floor(d) : ceil(d);
569
570
#endif
571
0
}
572
573
/**
574
 * Return the largest positive number that can be represented by an integer
575
 * type of arbitrary bit length.
576
 */
577
U_CAPI double U_EXPORT2
578
uprv_maxMantissa()
579
0
{
580
0
    return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
581
0
}
582
583
U_CAPI double U_EXPORT2
584
uprv_log(double d)
585
0
{
586
0
    return log(d);
587
0
}
588
589
U_CAPI void * U_EXPORT2
590
uprv_maximumPtr(void * base)
591
0
{
592
#if U_PLATFORM == U_PF_OS400
593
    /*
594
     * With the provided function we should never be out of range of a given segment
595
     * (a traditional/typical segment that is).  Our segments have 5 bytes for the
596
     * id and 3 bytes for the offset.  The key is that the casting takes care of
597
     * only retrieving the offset portion minus x1000.  Hence, the smallest offset
598
     * seen in a program is x001000 and when casted to an int would be 0.
599
     * That's why we can only add 0xffefff.  Otherwise, we would exceed the segment.
600
     *
601
     * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
602
     * non-TERASPACE.  If it is TERASPACE it is 2GB - 4k(header information).
603
     * This function determines the activation based on the pointer that is passed in and
604
     * calculates the appropriate maximum available size for
605
     * each pointer type (TERASPACE and non-TERASPACE)
606
     *
607
     * Unlike other operating systems, the pointer model isn't determined at
608
     * compile time on i5/OS.
609
     */
610
    if ((base != nullptr) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
611
        /* if it is a TERASPACE pointer the max is 2GB - 4k */
612
        return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
613
    }
614
    /* otherwise 16MB since nullptr ptr is not checkable or the ptr is not TERASPACE */
615
    return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
616
617
#else
618
0
    return U_MAX_PTR(base);
619
0
#endif
620
0
}
621
622
/*---------------------------------------------------------------------------
623
  Platform-specific Implementations
624
  Try these, and if they don't work on your platform, then special case your
625
  platform with new implementations.
626
  ---------------------------------------------------------------------------*/
627
628
/* Generic time zone layer -------------------------------------------------- */
629
630
/* Time zone utilities */
631
U_CAPI void U_EXPORT2
632
uprv_tzset()
633
0
{
634
0
#if defined(U_TZSET)
635
0
    U_TZSET();
636
#else
637
    /* no initialization*/
638
#endif
639
0
}
640
641
U_CAPI int32_t U_EXPORT2
642
uprv_timezone()
643
0
{
644
0
#ifdef U_TIMEZONE
645
0
    return U_TIMEZONE;
646
#else
647
    time_t t, t1, t2;
648
    struct tm tmrec;
649
    int32_t tdiff = 0;
650
651
    time(&t);
652
    uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
653
#if U_PLATFORM != U_PF_IPHONE
654
    UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
655
#endif
656
    t1 = mktime(&tmrec);                 /* local time in seconds*/
657
    uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
658
    t2 = mktime(&tmrec);                 /* GMT (or UTC) in seconds*/
659
    tdiff = t2 - t1;
660
661
#if U_PLATFORM != U_PF_IPHONE
662
    /* imitate NT behaviour, which returns same timezone offset to GMT for
663
       winter and summer.
664
       This does not work on all platforms. For instance, on glibc on Linux
665
       and on Mac OS 10.5, tdiff calculated above remains the same
666
       regardless of whether DST is in effect or not. iOS is another
667
       platform where this does not work. Linux + glibc and Mac OS 10.5
668
       have U_TIMEZONE defined so that this code is not reached.
669
    */
670
    if (dst_checked)
671
        tdiff += 3600;
672
#endif
673
    return tdiff;
674
#endif
675
0
}
676
677
/* Note that U_TZNAME does *not* have to be tzname, but if it is,
678
   some platforms need to have it declared here. */
679
680
#if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED)
681
/* RS6000 and others reject char **tzname.  */
682
extern U_IMPORT char *U_TZNAME[];
683
#endif
684
685
#if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
686
/* These platforms are likely to use Olson timezone IDs. */
687
/* common targets of the symbolic link at TZDEFAULT are:
688
 * "/usr/share/zoneinfo/<olsonID>" default, older Linux distros, macOS to 10.12
689
 * "../usr/share/zoneinfo/<olsonID>" newer Linux distros: Red Hat Enterprise Linux 7, Ubuntu 16, SuSe Linux 12
690
 * "/usr/share/lib/zoneinfo/<olsonID>" Solaris
691
 * "../usr/share/lib/zoneinfo/<olsonID>" Solaris
692
 * "/var/db/timezone/zoneinfo/<olsonID>" macOS 10.13
693
 * To avoid checking lots of paths, just check that the target path
694
 * before the <olsonID> ends with "/zoneinfo/", and the <olsonID> is valid.
695
 */
696
697
#define CHECK_LOCALTIME_LINK 1
698
#if U_PLATFORM_IS_DARWIN_BASED
699
#include <tzfile.h>
700
#define TZZONEINFO      (TZDIR "/")
701
#elif U_PLATFORM == U_PF_SOLARIS
702
#define TZDEFAULT       "/etc/localtime"
703
#define TZZONEINFO      "/usr/share/lib/zoneinfo/"
704
#define TZ_ENV_CHECK    "localtime"
705
#else
706
0
#define TZDEFAULT       "/etc/localtime"
707
0
#define TZZONEINFO      "/usr/share/zoneinfo/"
708
#endif
709
#define TZZONEINFOTAIL  "/zoneinfo/"
710
#if U_HAVE_DIRENT_H
711
#define TZFILE_SKIP     "posixrules" /* tz file to skip when searching. */
712
/* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
713
   symlinked to /etc/localtime, which makes searchForTZFile return
714
   'localtime' when it's the first match. */
715
#define TZFILE_SKIP2    "localtime"
716
#define SEARCH_TZFILE
717
#include <dirent.h>  /* Needed to search through system timezone files */
718
#endif
719
static char gTimeZoneBuffer[PATH_MAX];
720
static const char *gTimeZoneBufferPtr = nullptr;
721
#endif
722
723
#if !U_PLATFORM_USES_ONLY_WIN32_API
724
0
#define isNonDigit(ch) (ch < '0' || '9' < ch)
725
0
#define isDigit(ch) ('0' <= ch && ch <= '9')
726
0
static UBool isValidOlsonID(const char *id) {
727
0
    int32_t idx = 0;
728
0
    int32_t idxMax = 0;
729
730
    /* Determine if this is something like Iceland (Olson ID)
731
    or AST4ADT (non-Olson ID) */
732
0
    while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
733
0
        idx++;
734
0
    }
735
736
    /* Allow at maximum 2 numbers at the end of the id to support zone id's
737
    like GMT+11. */
738
0
    idxMax = idx + 2;
739
0
    while (id[idx] && isDigit(id[idx]) && idx < idxMax) {
740
0
        idx++;
741
0
    }
742
743
    /* If we went through the whole string, then it might be okay.
744
    The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
745
    "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
746
    The rest of the time it could be an Olson ID. George */
747
0
    return id[idx] == 0
748
0
        || uprv_strcmp(id, "PST8PDT") == 0
749
0
        || uprv_strcmp(id, "MST7MDT") == 0
750
0
        || uprv_strcmp(id, "CST6CDT") == 0
751
0
        || uprv_strcmp(id, "EST5EDT") == 0;
752
0
}
753
754
/* On some Unix-like OS, 'posix' subdirectory in
755
   /usr/share/zoneinfo replicates the top-level contents. 'right'
756
   subdirectory has the same set of files, but individual files
757
   are different from those in the top-level directory or 'posix'
758
   because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
759
   has files for UTC.
760
   When the first match for /etc/localtime is in either of them
761
   (usually in posix because 'right' has different file contents),
762
   or TZ environment variable points to one of them, createTimeZone
763
   fails because, say, 'posix/America/New_York' is not an Olson
764
   timezone id ('America/New_York' is). So, we have to skip
765
   'posix/' and 'right/' at the beginning. */
766
0
static void skipZoneIDPrefix(const char** id) {
767
0
    if (uprv_strncmp(*id, "posix/", 6) == 0
768
0
        || uprv_strncmp(*id, "right/", 6) == 0)
769
0
    {
770
0
        *id += 6;
771
0
    }
772
0
}
773
#endif
774
775
#if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API
776
777
#define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
778
typedef struct OffsetZoneMapping {
779
    int32_t offsetSeconds;
780
    int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/
781
    const char *stdID;
782
    const char *dstID;
783
    const char *olsonID;
784
} OffsetZoneMapping;
785
786
enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 };
787
788
/*
789
This list tries to disambiguate a set of abbreviated timezone IDs and offsets
790
and maps it to an Olson ID.
791
Before adding anything to this list, take a look at
792
icu/source/tools/tzcode/tz.alias
793
Sometimes no daylight savings (0) is important to define due to aliases.
794
This list can be tested with icu/source/test/compat/tzone.pl
795
More values could be added to daylightType to increase precision.
796
*/
797
static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
798
    {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
799
    {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
800
    {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
801
    {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
802
    {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
803
    {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
804
    {-36000, 2, "EST", "EST", "Australia/Sydney"},
805
    {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
806
    {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
807
    {-34200, 2, "CST", "CST", "Australia/South"},
808
    {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
809
    {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
810
    {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
811
    {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
812
    {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
813
    {-28800, 2, "WST", "WST", "Australia/West"},
814
    {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
815
    {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
816
    {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
817
    {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
818
    {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
819
    {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
820
    {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
821
    {-14400, 1, "AZT", "AZST", "Asia/Baku"},
822
    {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
823
    {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
824
    {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
825
    {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
826
    {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
827
    {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
828
    {-3600, 0, "CET", "WEST", "Africa/Algiers"},
829
    {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
830
    {0, 1, "GMT", "IST", "Europe/Dublin"},
831
    {0, 1, "GMT", "BST", "Europe/London"},
832
    {0, 0, "WET", "WEST", "Africa/Casablanca"},
833
    {0, 0, "WET", "WET", "Africa/El_Aaiun"},
834
    {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
835
    {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
836
    {10800, 1, "PMST", "PMDT", "America/Miquelon"},
837
    {10800, 2, "UYT", "UYST", "America/Montevideo"},
838
    {10800, 1, "WGT", "WGST", "America/Godthab"},
839
    {10800, 2, "BRT", "BRST", "Brazil/East"},
840
    {12600, 1, "NST", "NDT", "America/St_Johns"},
841
    {14400, 1, "AST", "ADT", "Canada/Atlantic"},
842
    {14400, 2, "AMT", "AMST", "America/Cuiaba"},
843
    {14400, 2, "CLT", "CLST", "Chile/Continental"},
844
    {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
845
    {14400, 2, "PYT", "PYST", "America/Asuncion"},
846
    {18000, 1, "CST", "CDT", "America/Havana"},
847
    {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
848
    {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
849
    {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
850
    {21600, 0, "CST", "CDT", "America/Guatemala"},
851
    {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
852
    {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
853
    {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
854
    {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
855
    {32400, 1, "AKST", "AKDT", "US/Alaska"},
856
    {36000, 1, "HAST", "HADT", "US/Aleutian"}
857
};
858
859
/*#define DEBUG_TZNAME*/
860
861
static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
862
0
{
863
0
    int32_t idx;
864
#ifdef DEBUG_TZNAME
865
    fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
866
#endif
867
0
    for (idx = 0; idx < UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++)
868
0
    {
869
0
        if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
870
0
            && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
871
0
            && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
872
0
            && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
873
0
        {
874
0
            return OFFSET_ZONE_MAPPINGS[idx].olsonID;
875
0
        }
876
0
    }
877
0
    return nullptr;
878
0
}
879
#endif
880
881
#ifdef SEARCH_TZFILE
882
0
#define MAX_READ_SIZE 512
883
884
typedef struct DefaultTZInfo {
885
    char* defaultTZBuffer;
886
    int64_t defaultTZFileSize;
887
    FILE* defaultTZFilePtr;
888
    UBool defaultTZstatus;
889
    int32_t defaultTZPosition;
890
} DefaultTZInfo;
891
892
/*
893
 * This method compares the two files given to see if they are a match.
894
 * It is currently use to compare two TZ files.
895
 */
896
0
static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
897
0
    FILE* file;
898
0
    int64_t sizeFile;
899
0
    int64_t sizeFileLeft;
900
0
    int32_t sizeFileRead;
901
0
    int32_t sizeFileToRead;
902
0
    char bufferFile[MAX_READ_SIZE];
903
0
    UBool result = true;
904
905
0
    if (tzInfo->defaultTZFilePtr == nullptr) {
906
0
        tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
907
0
    }
908
0
    file = fopen(TZFileName, "r");
909
910
0
    tzInfo->defaultTZPosition = 0; /* reset position to begin search */
911
912
0
    if (file != nullptr && tzInfo->defaultTZFilePtr != nullptr) {
913
        /* First check that the file size are equal. */
914
0
        if (tzInfo->defaultTZFileSize == 0) {
915
0
            fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
916
0
            tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
917
0
        }
918
0
        fseek(file, 0, SEEK_END);
919
0
        sizeFile = ftell(file);
920
0
        sizeFileLeft = sizeFile;
921
922
0
        if (sizeFile != tzInfo->defaultTZFileSize) {
923
0
            result = false;
924
0
        } else {
925
            /* Store the data from the files in separate buffers and
926
             * compare each byte to determine equality.
927
             */
928
0
            if (tzInfo->defaultTZBuffer == nullptr) {
929
0
                rewind(tzInfo->defaultTZFilePtr);
930
0
                tzInfo->defaultTZBuffer = static_cast<char*>(uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize));
931
0
                sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
932
0
            }
933
0
            rewind(file);
934
0
            while(sizeFileLeft > 0) {
935
0
                uprv_memset(bufferFile, 0, MAX_READ_SIZE);
936
0
                sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
937
938
0
                sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
939
0
                if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
940
0
                    result = false;
941
0
                    break;
942
0
                }
943
0
                sizeFileLeft -= sizeFileRead;
944
0
                tzInfo->defaultTZPosition += sizeFileRead;
945
0
            }
946
0
        }
947
0
    } else {
948
0
        result = false;
949
0
    }
950
951
0
    if (file != nullptr) {
952
0
        fclose(file);
953
0
    }
954
955
0
    return result;
956
0
}
957
958
959
/* dirent also lists two entries: "." and ".." that we can safely ignore. */
960
#define SKIP1 "."
961
#define SKIP2 ".."
962
static UBool U_CALLCONV putil_cleanup();
963
static CharString *gSearchTZFileResult = nullptr;
964
965
/*
966
 * This method recursively traverses the directory given for a matching TZ file and returns the first match.
967
 * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results.
968
 */
969
0
static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
970
0
    DIR* dirp = nullptr;
971
0
    struct dirent* dirEntry = nullptr;
972
0
    char* result = nullptr;
973
0
    UErrorCode status = U_ZERO_ERROR;
974
975
    /* Save the current path */
976
0
    CharString curpath(path, -1, status);
977
0
    if (U_FAILURE(status)) {
978
0
        goto cleanupAndReturn;
979
0
    }
980
981
0
    dirp = opendir(path);
982
0
    if (dirp == nullptr) {
983
0
        goto cleanupAndReturn;
984
0
    }
985
986
0
    if (gSearchTZFileResult == nullptr) {
987
0
        gSearchTZFileResult = new CharString;
988
0
        if (gSearchTZFileResult == nullptr) {
989
0
            goto cleanupAndReturn;
990
0
        }
991
0
        ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
992
0
    }
993
994
    /* Check each entry in the directory. */
995
0
    while((dirEntry = readdir(dirp)) != nullptr) {
996
0
        const char* dirName = dirEntry->d_name;
997
0
        if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0
998
0
            && uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
999
            /* Create a newpath with the new entry to test each entry in the directory. */
1000
0
            CharString newpath(curpath, status);
1001
0
            newpath.append(dirName, -1, status);
1002
0
            if (U_FAILURE(status)) {
1003
0
                break;
1004
0
            }
1005
1006
0
            DIR* subDirp = nullptr;
1007
0
            if ((subDirp = opendir(newpath.data())) != nullptr) {
1008
                /* If this new path is a directory, make a recursive call with the newpath. */
1009
0
                closedir(subDirp);
1010
0
                newpath.append('/', status);
1011
0
                if (U_FAILURE(status)) {
1012
0
                    break;
1013
0
                }
1014
0
                result = searchForTZFile(newpath.data(), tzInfo);
1015
                /*
1016
                 Have to get out here. Otherwise, we'd keep looking
1017
                 and return the first match in the top-level directory
1018
                 if there's a match in the top-level. If not, this function
1019
                 would return nullptr and set gTimeZoneBufferPtr to nullptr in initDefault().
1020
                 It worked without this in most cases because we have a fallback of calling
1021
                 localtime_r to figure out the default timezone.
1022
                */
1023
0
                if (result != nullptr)
1024
0
                    break;
1025
0
            } else {
1026
0
                if(compareBinaryFiles(TZDEFAULT, newpath.data(), tzInfo)) {
1027
0
                    int32_t amountToSkip = sizeof(TZZONEINFO) - 1;
1028
0
                    if (amountToSkip > newpath.length()) {
1029
0
                        amountToSkip = newpath.length();
1030
0
                    }
1031
0
                    const char* zoneid = newpath.data() + amountToSkip;
1032
0
                    skipZoneIDPrefix(&zoneid);
1033
0
                    gSearchTZFileResult->clear();
1034
0
                    gSearchTZFileResult->append(zoneid, -1, status);
1035
0
                    if (U_FAILURE(status)) {
1036
0
                        break;
1037
0
                    }
1038
0
                    result = gSearchTZFileResult->data();
1039
                    /* Get out after the first one found. */
1040
0
                    break;
1041
0
                }
1042
0
            }
1043
0
        }
1044
0
    }
1045
1046
0
  cleanupAndReturn:
1047
0
    if (dirp) {
1048
0
        closedir(dirp);
1049
0
    }
1050
0
    return result;
1051
0
}
1052
#endif
1053
1054
#if U_PLATFORM == U_PF_ANDROID
1055
typedef int(system_property_read_callback)(const prop_info* info,
1056
                                           void (*callback)(void* cookie,
1057
                                                            const char* name,
1058
                                                            const char* value,
1059
                                                            uint32_t serial),
1060
                                           void* cookie);
1061
typedef int(system_property_get)(const char*, char*);
1062
1063
static char gAndroidTimeZone[PROP_VALUE_MAX] = { '\0' };
1064
1065
static void u_property_read(void* cookie, const char* name, const char* value,
1066
                            uint32_t serial) {
1067
    uprv_strcpy((char* )cookie, value);
1068
}
1069
#endif
1070
1071
U_CAPI void U_EXPORT2
1072
uprv_tzname_clear_cache()
1073
0
{
1074
#if U_PLATFORM == U_PF_ANDROID
1075
    /* Android's timezone is stored in system property. */
1076
    gAndroidTimeZone[0] = '\0';
1077
    void* libc = dlopen("libc.so", RTLD_NOLOAD);
1078
    if (libc) {
1079
        /* Android API 26+ has new API to get system property and old API
1080
         * (__system_property_get) is deprecated */
1081
        system_property_read_callback* property_read_callback =
1082
            (system_property_read_callback*)dlsym(
1083
                libc, "__system_property_read_callback");
1084
        if (property_read_callback) {
1085
            const prop_info* info =
1086
                __system_property_find("persist.sys.timezone");
1087
            if (info) {
1088
                property_read_callback(info, &u_property_read, gAndroidTimeZone);
1089
            }
1090
        } else {
1091
            system_property_get* property_get =
1092
                (system_property_get*)dlsym(libc, "__system_property_get");
1093
            if (property_get) {
1094
                property_get("persist.sys.timezone", gAndroidTimeZone);
1095
            }
1096
        }
1097
        dlclose(libc);
1098
    }
1099
#endif
1100
1101
0
#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1102
0
    gTimeZoneBufferPtr = nullptr;
1103
0
#endif
1104
0
}
1105
1106
U_CAPI const char* U_EXPORT2
1107
uprv_tzname(int n)
1108
0
{
1109
0
    (void)n; // Avoid unreferenced parameter warning.
1110
0
    const char *tzid = nullptr;
1111
#if U_PLATFORM_USES_ONLY_WIN32_API
1112
    tzid = uprv_detectWindowsTimeZone();
1113
1114
    if (tzid != nullptr) {
1115
        return tzid;
1116
    }
1117
1118
#ifndef U_TZNAME
1119
    // The return value is free'd in timezone.cpp on Windows because
1120
    // the other code path returns a pointer to a heap location.
1121
    // If we don't have a name already, then tzname wouldn't be any
1122
    // better, so just fall back.
1123
    return uprv_strdup("");
1124
#endif // !U_TZNAME
1125
1126
#else
1127
1128
/*#if U_PLATFORM_IS_DARWIN_BASED
1129
    int ret;
1130
1131
    tzid = getenv("TZFILE");
1132
    if (tzid != nullptr) {
1133
        return tzid;
1134
    }
1135
#endif*/
1136
1137
/* This code can be temporarily disabled to test tzname resolution later on. */
1138
0
#ifndef DEBUG_TZNAME
1139
#if U_PLATFORM == U_PF_ANDROID
1140
    tzid = gAndroidTimeZone;
1141
#else
1142
0
    tzid = getenv("TZ");
1143
0
#endif
1144
0
    if (tzid != nullptr && isValidOlsonID(tzid)
1145
#if U_PLATFORM == U_PF_SOLARIS
1146
    /* Don't misinterpret TZ "localtime" on Solaris as a time zone name. */
1147
        && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0
1148
#endif
1149
0
    ) {
1150
        /* The colon forces tzset() to treat the remainder as zoneinfo path */
1151
0
        if (tzid[0] == ':') {
1152
0
            tzid++;
1153
0
        }
1154
        /* This might be a good Olson ID. */
1155
0
        skipZoneIDPrefix(&tzid);
1156
0
        return tzid;
1157
0
    }
1158
    /* else U_TZNAME will give a better result. */
1159
0
#endif
1160
1161
0
#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1162
    /* Caller must handle threading issues */
1163
0
    if (gTimeZoneBufferPtr == nullptr) {
1164
        /*
1165
        This is a trick to look at the name of the link to get the Olson ID
1166
        because the tzfile contents is underspecified.
1167
        This isn't guaranteed to work because it may not be a symlink.
1168
        */
1169
0
        char *ret = realpath(TZDEFAULT, gTimeZoneBuffer);
1170
0
        if (ret != nullptr && uprv_strcmp(TZDEFAULT, gTimeZoneBuffer) != 0) {
1171
0
            int32_t tzZoneInfoTailLen = uprv_strlen(TZZONEINFOTAIL);
1172
0
            const char *tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL);
1173
            // MacOS14 has the realpath as something like
1174
            // /usr/share/zoneinfo.default/Australia/Melbourne
1175
            // which will not have "/zoneinfo/" in the path.
1176
            // Therefore if we fail, we fall back to read the link which is
1177
            // /var/db/timezone/zoneinfo/Australia/Melbourne
1178
            // We also fall back to reading the link if the realpath leads to something like
1179
            // /usr/share/zoneinfo/posixrules
1180
0
            if (tzZoneInfoTailPtr == nullptr ||
1181
0
                    uprv_strcmp(tzZoneInfoTailPtr + tzZoneInfoTailLen, "posixrules") == 0) {
1182
0
                ssize_t size = readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)-1);
1183
0
                if (size > 0) {
1184
0
                    gTimeZoneBuffer[size] = 0;
1185
0
                    tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL);
1186
0
                }
1187
0
            }
1188
0
            if (tzZoneInfoTailPtr != nullptr) {
1189
0
                tzZoneInfoTailPtr += tzZoneInfoTailLen;
1190
0
                skipZoneIDPrefix(&tzZoneInfoTailPtr);
1191
0
                if (isValidOlsonID(tzZoneInfoTailPtr)) {
1192
0
                    return (gTimeZoneBufferPtr = tzZoneInfoTailPtr);
1193
0
                }
1194
0
            }
1195
0
        } else {
1196
0
#if defined(SEARCH_TZFILE)
1197
0
            DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
1198
0
            if (tzInfo != nullptr) {
1199
0
                tzInfo->defaultTZBuffer = nullptr;
1200
0
                tzInfo->defaultTZFileSize = 0;
1201
0
                tzInfo->defaultTZFilePtr = nullptr;
1202
0
                tzInfo->defaultTZstatus = false;
1203
0
                tzInfo->defaultTZPosition = 0;
1204
1205
0
                gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
1206
1207
                /* Free previously allocated memory */
1208
0
                if (tzInfo->defaultTZBuffer != nullptr) {
1209
0
                    uprv_free(tzInfo->defaultTZBuffer);
1210
0
                }
1211
0
                if (tzInfo->defaultTZFilePtr != nullptr) {
1212
0
                    fclose(tzInfo->defaultTZFilePtr);
1213
0
                }
1214
0
                uprv_free(tzInfo);
1215
0
            }
1216
1217
0
            if (gTimeZoneBufferPtr != nullptr && isValidOlsonID(gTimeZoneBufferPtr)) {
1218
0
                return gTimeZoneBufferPtr;
1219
0
            }
1220
0
#endif
1221
0
        }
1222
0
    }
1223
0
    else {
1224
0
        return gTimeZoneBufferPtr;
1225
0
    }
1226
0
#endif
1227
0
#endif
1228
1229
0
#ifdef U_TZNAME
1230
#if U_PLATFORM_USES_ONLY_WIN32_API
1231
    /* The return value is free'd in timezone.cpp on Windows because
1232
     * the other code path returns a pointer to a heap location. */
1233
    return uprv_strdup(U_TZNAME[n]);
1234
#else
1235
    /*
1236
    U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1237
    So we remap the abbreviation to an olson ID.
1238
1239
    Since Windows exposes a little more timezone information,
1240
    we normally don't use this code on Windows because
1241
    uprv_detectWindowsTimeZone should have already given the correct answer.
1242
    */
1243
0
    {
1244
0
        struct tm juneSol, decemberSol;
1245
0
        int daylightType;
1246
0
        static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
1247
0
        static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
1248
1249
        /* This probing will tell us when daylight savings occurs.  */
1250
0
        localtime_r(&juneSolstice, &juneSol);
1251
0
        localtime_r(&decemberSolstice, &decemberSol);
1252
0
        if(decemberSol.tm_isdst > 0) {
1253
0
          daylightType = U_DAYLIGHT_DECEMBER;
1254
0
        } else if(juneSol.tm_isdst > 0) {
1255
0
          daylightType = U_DAYLIGHT_JUNE;
1256
0
        } else {
1257
0
          daylightType = U_DAYLIGHT_NONE;
1258
0
        }
1259
0
        tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
1260
0
        if (tzid != nullptr) {
1261
0
            return tzid;
1262
0
        }
1263
0
    }
1264
0
    return U_TZNAME[n];
1265
0
#endif
1266
#else
1267
    return "";
1268
#endif
1269
0
}
1270
1271
/* Get and set the ICU data directory --------------------------------------- */
1272
1273
static icu::UInitOnce gDataDirInitOnce {};
1274
static char *gDataDirectory = nullptr;
1275
1276
UInitOnce gTimeZoneFilesInitOnce {};
1277
static CharString *gTimeZoneFilesDirectory = nullptr;
1278
1279
#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1280
 static const char *gCorrectedPOSIXLocale = nullptr; /* Sometimes heap allocated */
1281
 static bool gCorrectedPOSIXLocaleHeapAllocated = false;
1282
#endif
1283
1284
static UBool U_CALLCONV putil_cleanup()
1285
0
{
1286
0
    if (gDataDirectory && *gDataDirectory) {
1287
0
        uprv_free(gDataDirectory);
1288
0
    }
1289
0
    gDataDirectory = nullptr;
1290
0
    gDataDirInitOnce.reset();
1291
1292
0
    delete gTimeZoneFilesDirectory;
1293
0
    gTimeZoneFilesDirectory = nullptr;
1294
0
    gTimeZoneFilesInitOnce.reset();
1295
1296
0
#ifdef SEARCH_TZFILE
1297
0
    delete gSearchTZFileResult;
1298
0
    gSearchTZFileResult = nullptr;
1299
0
#endif
1300
1301
0
#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1302
0
    if (gCorrectedPOSIXLocale && gCorrectedPOSIXLocaleHeapAllocated) {
1303
0
        uprv_free(const_cast<char *>(gCorrectedPOSIXLocale));
1304
0
        gCorrectedPOSIXLocale = nullptr;
1305
0
        gCorrectedPOSIXLocaleHeapAllocated = false;
1306
0
    }
1307
0
#endif
1308
0
    return true;
1309
0
}
1310
1311
/*
1312
 * Set the data directory.
1313
 *    Make a copy of the passed string, and set the global data dir to point to it.
1314
 */
1315
U_CAPI void U_EXPORT2
1316
1
u_setDataDirectory(const char *directory) {
1317
1
    char *newDataDir;
1318
1
    int32_t length;
1319
1320
1
    if(directory==nullptr || *directory==0) {
1321
        /* A small optimization to prevent the malloc and copy when the
1322
        shared library is used, and this is a way to make sure that nullptr
1323
        is never returned.
1324
        */
1325
1
        newDataDir = (char *)"";
1326
1
    }
1327
0
    else {
1328
0
        length=(int32_t)uprv_strlen(directory);
1329
0
        newDataDir = (char *)uprv_malloc(length + 2);
1330
        /* Exit out if newDataDir could not be created. */
1331
0
        if (newDataDir == nullptr) {
1332
0
            return;
1333
0
        }
1334
0
        uprv_strcpy(newDataDir, directory);
1335
1336
#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1337
        {
1338
            char *p;
1339
            while((p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) != nullptr) {
1340
                *p = U_FILE_SEP_CHAR;
1341
            }
1342
        }
1343
#endif
1344
0
    }
1345
1346
1
    if (gDataDirectory && *gDataDirectory) {
1347
0
        uprv_free(gDataDirectory);
1348
0
    }
1349
1
    gDataDirectory = newDataDir;
1350
1
    ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1351
1
}
1352
1353
U_CAPI UBool U_EXPORT2
1354
uprv_pathIsAbsolute(const char *path)
1355
0
{
1356
0
  if(!path || !*path) {
1357
0
    return false;
1358
0
  }
1359
1360
0
  if(*path == U_FILE_SEP_CHAR) {
1361
0
    return true;
1362
0
  }
1363
1364
#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1365
  if(*path == U_FILE_ALT_SEP_CHAR) {
1366
    return true;
1367
  }
1368
#endif
1369
1370
#if U_PLATFORM_USES_ONLY_WIN32_API
1371
  if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1372
       ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1373
      path[1] == ':' ) {
1374
    return true;
1375
  }
1376
#endif
1377
1378
0
  return false;
1379
0
}
1380
1381
/* Backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1382
   (needed for some Darwin ICU build environments) */
1383
#if U_PLATFORM_IS_DARWIN_BASED && defined(TARGET_OS_SIMULATOR) && TARGET_OS_SIMULATOR
1384
# if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1385
#  define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1386
# endif
1387
#endif
1388
1389
#if defined(ICU_DATA_DIR_WINDOWS)
1390
// Helper function to get the ICU Data Directory under the Windows directory location.
1391
static BOOL U_CALLCONV getIcuDataDirectoryUnderWindowsDirectory(char* directoryBuffer, UINT bufferLength)
1392
{
1393
    wchar_t windowsPath[MAX_PATH];
1394
    char windowsPathUtf8[MAX_PATH];
1395
1396
    UINT length = GetSystemWindowsDirectoryW(windowsPath, UPRV_LENGTHOF(windowsPath));
1397
    if ((length > 0) && (length < (UPRV_LENGTHOF(windowsPath) - 1))) {
1398
        // Convert UTF-16 to a UTF-8 string.
1399
        UErrorCode status = U_ZERO_ERROR;
1400
        int32_t windowsPathUtf8Len = 0;
1401
        u_strToUTF8(windowsPathUtf8, static_cast<int32_t>(UPRV_LENGTHOF(windowsPathUtf8)),
1402
            &windowsPathUtf8Len, reinterpret_cast<const char16_t*>(windowsPath), -1, &status);
1403
1404
        if (U_SUCCESS(status) && (status != U_STRING_NOT_TERMINATED_WARNING) &&
1405
            (windowsPathUtf8Len < (UPRV_LENGTHOF(windowsPathUtf8) - 1))) {
1406
            // Ensure it always has a separator, so we can append the ICU data path.
1407
            if (windowsPathUtf8[windowsPathUtf8Len - 1] != U_FILE_SEP_CHAR) {
1408
                windowsPathUtf8[windowsPathUtf8Len++] = U_FILE_SEP_CHAR;
1409
                windowsPathUtf8[windowsPathUtf8Len] = '\0';
1410
            }
1411
            // Check if the concatenated string will fit.
1412
            if ((windowsPathUtf8Len + UPRV_LENGTHOF(ICU_DATA_DIR_WINDOWS)) < bufferLength) {
1413
                uprv_strcpy(directoryBuffer, windowsPathUtf8);
1414
                uprv_strcat(directoryBuffer, ICU_DATA_DIR_WINDOWS);
1415
                return true;
1416
            }
1417
        }
1418
    }
1419
1420
    return false;
1421
}
1422
#endif
1423
1424
1
static void U_CALLCONV dataDirectoryInitFn() {
1425
    /* If we already have the directory, then return immediately. Will happen if user called
1426
     * u_setDataDirectory().
1427
     */
1428
1
    if (gDataDirectory) {
1429
0
        return;
1430
0
    }
1431
1432
1
    const char *path = nullptr;
1433
#if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1434
    char datadir_path_buffer[PATH_MAX];
1435
#endif
1436
1437
    /*
1438
    When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1439
    override ICU's data with the ICU_DATA environment variable. This prevents
1440
    problems where multiple custom copies of ICU's specific version of data
1441
    are installed on a system. Either the application must define the data
1442
    directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1443
    ICU, set the data with udata_setCommonData or trust that all of the
1444
    required data is contained in ICU's data library that contains
1445
    the entry point defined by U_ICUDATA_ENTRY_POINT.
1446
1447
    There may also be some platforms where environment variables
1448
    are not allowed.
1449
    */
1450
1
#   if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1451
    /* First try to get the environment variable */
1452
1
#     if U_PLATFORM_HAS_WINUWP_API == 0  // Windows UWP does not support getenv
1453
1
        path=getenv("ICU_DATA");
1454
1
#     endif
1455
1
#   endif
1456
1457
    /* ICU_DATA_DIR may be set as a compile option.
1458
     * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1459
     * and is used only when data is built in archive mode eliminating the need
1460
     * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1461
     * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1462
     * set their own path.
1463
     */
1464
#if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
1465
    if(path==nullptr || *path==0) {
1466
# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1467
        const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
1468
# endif
1469
# ifdef ICU_DATA_DIR
1470
        path=ICU_DATA_DIR;
1471
# else
1472
        path=U_ICU_DATA_DEFAULT_DIR;
1473
# endif
1474
# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1475
        if (prefix != nullptr) {
1476
            snprintf(datadir_path_buffer, sizeof(datadir_path_buffer), "%s%s", prefix, path);
1477
            path=datadir_path_buffer;
1478
        }
1479
# endif
1480
    }
1481
#endif
1482
1483
#if defined(ICU_DATA_DIR_WINDOWS)
1484
    char datadir_path_buffer[MAX_PATH];
1485
    if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
1486
        path = datadir_path_buffer;
1487
    }
1488
#endif
1489
1490
1
    if(path==nullptr) {
1491
        /* It looks really bad, set it to something. */
1492
1
        path = "";
1493
1
    }
1494
1495
1
    u_setDataDirectory(path);
1496
1
}
1497
1498
U_CAPI const char * U_EXPORT2
1499
557
u_getDataDirectory() {
1500
557
    umtx_initOnce(gDataDirInitOnce, &dataDirectoryInitFn);
1501
557
    return gDataDirectory;
1502
557
}
1503
1504
0
static void setTimeZoneFilesDir(const char *path, UErrorCode &status) {
1505
0
    if (U_FAILURE(status)) {
1506
0
        return;
1507
0
    }
1508
0
    gTimeZoneFilesDirectory->clear();
1509
0
    gTimeZoneFilesDirectory->append(path, status);
1510
#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1511
    char *p = gTimeZoneFilesDirectory->data();
1512
    while ((p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) != nullptr) {
1513
        *p = U_FILE_SEP_CHAR;
1514
    }
1515
#endif
1516
0
}
1517
1518
#define TO_STRING(x) TO_STRING_2(x)
1519
#define TO_STRING_2(x) #x
1520
1521
0
static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) {
1522
0
    U_ASSERT(gTimeZoneFilesDirectory == nullptr);
1523
0
    ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1524
0
    gTimeZoneFilesDirectory = new CharString();
1525
0
    if (gTimeZoneFilesDirectory == nullptr) {
1526
0
        status = U_MEMORY_ALLOCATION_ERROR;
1527
0
        return;
1528
0
    }
1529
1530
0
    const char *dir = "";
1531
1532
#if defined(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR)
1533
    char timezonefilesdir_path_buffer[PATH_MAX];
1534
    const char *prefix = getenv(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR);
1535
#endif
1536
1537
#if U_PLATFORM_HAS_WINUWP_API == 1
1538
// The UWP version does not support the environment variable setting.
1539
1540
# if defined(ICU_DATA_DIR_WINDOWS)
1541
    // When using the Windows system data, we can possibly pick up time zone data from the Windows directory.
1542
    char datadir_path_buffer[MAX_PATH];
1543
    if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
1544
        dir = datadir_path_buffer;
1545
    }
1546
# endif
1547
1548
#else
1549
0
    dir = getenv("ICU_TIMEZONE_FILES_DIR");
1550
0
#endif // U_PLATFORM_HAS_WINUWP_API
1551
1552
#if defined(U_TIMEZONE_FILES_DIR)
1553
    if (dir == nullptr) {
1554
        // Build time configuration setting.
1555
        dir = TO_STRING(U_TIMEZONE_FILES_DIR);
1556
    }
1557
#endif
1558
1559
0
    if (dir == nullptr) {
1560
0
        dir = "";
1561
0
    }
1562
1563
#if defined(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR)
1564
    if (prefix != nullptr) {
1565
        snprintf(timezonefilesdir_path_buffer, sizeof(timezonefilesdir_path_buffer), "%s%s", prefix, dir);
1566
        dir = timezonefilesdir_path_buffer;
1567
    }
1568
#endif
1569
1570
0
    setTimeZoneFilesDir(dir, status);
1571
0
}
1572
1573
1574
U_CAPI const char * U_EXPORT2
1575
0
u_getTimeZoneFilesDirectory(UErrorCode *status) {
1576
0
    umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1577
0
    return U_SUCCESS(*status) ? gTimeZoneFilesDirectory->data() : "";
1578
0
}
1579
1580
U_CAPI void U_EXPORT2
1581
0
u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status) {
1582
0
    umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1583
0
    setTimeZoneFilesDir(path, *status);
1584
1585
    // Note: this function does some extra churn, first setting based on the
1586
    //       environment, then immediately replacing with the value passed in.
1587
    //       The logic is simpler that way, and performance shouldn't be an issue.
1588
0
}
1589
1590
1591
#if U_POSIX_LOCALE
1592
/* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1593
 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1594
 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1595
 */
1596
static const char *uprv_getPOSIXIDForCategory(int category)
1597
0
{
1598
0
    const char* posixID = nullptr;
1599
0
    if (category == LC_MESSAGES || category == LC_CTYPE) {
1600
        /*
1601
        * On Solaris two different calls to setlocale can result in
1602
        * different values. Only get this value once.
1603
        *
1604
        * We must check this first because an application can set this.
1605
        *
1606
        * LC_ALL can't be used because it's platform dependent. The LANG
1607
        * environment variable seems to affect LC_CTYPE variable by default.
1608
        * Here is what setlocale(LC_ALL, nullptr) can return.
1609
        * HPUX can return 'C C C C C C C'
1610
        * Solaris can return /en_US/C/C/C/C/C on the second try.
1611
        * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1612
        *
1613
        * The default codepage detection also needs to use LC_CTYPE.
1614
        *
1615
        * Do not call setlocale(LC_*, "")! Using an empty string instead
1616
        * of nullptr, will modify the libc behavior.
1617
        */
1618
0
        posixID = setlocale(category, nullptr);
1619
0
        if ((posixID == nullptr)
1620
0
            || (uprv_strcmp("C", posixID) == 0)
1621
0
            || (uprv_strcmp("POSIX", posixID) == 0))
1622
0
        {
1623
            /* Maybe we got some garbage.  Try something more reasonable */
1624
0
            posixID = getenv("LC_ALL");
1625
            /* Solaris speaks POSIX -  See IEEE Std 1003.1-2008
1626
             * This is needed to properly handle empty env. variables
1627
             */
1628
#if U_PLATFORM == U_PF_SOLARIS
1629
            if ((posixID == 0) || (posixID[0] == '\0')) {
1630
                posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1631
                if ((posixID == 0) || (posixID[0] == '\0')) {
1632
#else
1633
0
            if (posixID == nullptr) {
1634
0
                posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1635
0
                if (posixID == nullptr) {
1636
0
#endif
1637
0
                    posixID = getenv("LANG");
1638
0
                }
1639
0
            }
1640
0
        }
1641
0
    }
1642
0
    if ((posixID == nullptr)
1643
0
        || (uprv_strcmp("C", posixID) == 0)
1644
0
        || (uprv_strcmp("POSIX", posixID) == 0))
1645
0
    {
1646
        /* Nothing worked.  Give it a nice POSIX default value. */
1647
0
        posixID = "en_US_POSIX";
1648
        // Note: this test will not catch 'C.UTF-8',
1649
        // that will be handled in uprv_getDefaultLocaleID().
1650
        // Leave this mapping here for the uprv_getPOSIXIDForDefaultCodepage()
1651
        // caller which expects to see "en_US_POSIX" in many branches.
1652
0
    }
1653
0
    return posixID;
1654
0
}
1655
1656
/* Return just the POSIX id for the default locale, whatever happens to be in
1657
 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1658
 */
1659
static const char *uprv_getPOSIXIDForDefaultLocale()
1660
0
{
1661
0
    static const char* posixID = nullptr;
1662
0
    if (posixID == nullptr) {
1663
0
        posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
1664
0
    }
1665
0
    return posixID;
1666
0
}
1667
1668
#if !U_CHARSET_IS_UTF8
1669
/* Return just the POSIX id for the default codepage, whatever happens to be in
1670
 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1671
 */
1672
static const char *uprv_getPOSIXIDForDefaultCodepage()
1673
{
1674
    static const char* posixID = nullptr;
1675
    if (posixID == 0) {
1676
        posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
1677
    }
1678
    return posixID;
1679
}
1680
#endif
1681
#endif
1682
1683
/* NOTE: The caller should handle thread safety */
1684
U_CAPI const char* U_EXPORT2
1685
uprv_getDefaultLocaleID()
1686
0
{
1687
0
#if U_POSIX_LOCALE
1688
/*
1689
  Note that:  (a '!' means the ID is improper somehow)
1690
     LC_ALL  ---->     default_loc          codepage
1691
--------------------------------------------------------
1692
     ab.CD             ab                   CD
1693
     ab@CD             ab__CD               -
1694
     ab@CD.EF          ab__CD               EF
1695
1696
     ab_CD.EF@GH       ab_CD_GH             EF
1697
1698
Some 'improper' ways to do the same as above:
1699
  !  ab_CD@GH.EF       ab_CD_GH             EF
1700
  !  ab_CD.EF@GH.IJ    ab_CD_GH             EF
1701
  !  ab_CD@ZZ.EF@GH.IJ ab_CD_GH             EF
1702
1703
     _CD@GH            _CD_GH               -
1704
     _CD.EF@GH         _CD_GH               EF
1705
1706
The variant cannot have dots in it.
1707
The 'rightmost' variant (@xxx) wins.
1708
The leftmost codepage (.xxx) wins.
1709
*/
1710
0
    const char* posixID = uprv_getPOSIXIDForDefaultLocale();
1711
1712
    /* Format: (no spaces)
1713
    ll [ _CC ] [ . MM ] [ @ VV]
1714
1715
      l = lang, C = ctry, M = charmap, V = variant
1716
    */
1717
1718
0
    if (gCorrectedPOSIXLocale != nullptr) {
1719
0
        return gCorrectedPOSIXLocale;
1720
0
    }
1721
1722
    // Copy the ID into owned memory.
1723
    // Over-allocate in case we replace "C" with "en_US_POSIX" (+10), + null termination
1724
0
    char *correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID) + 10 + 1));
1725
0
    if (correctedPOSIXLocale == nullptr) {
1726
0
        return nullptr;
1727
0
    }
1728
0
    uprv_strcpy(correctedPOSIXLocale, posixID);
1729
1730
0
    char *limit;
1731
0
    if ((limit = uprv_strchr(correctedPOSIXLocale, '.')) != nullptr) {
1732
0
        *limit = 0;
1733
0
    }
1734
0
    if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) {
1735
0
        *limit = 0;
1736
0
    }
1737
1738
0
    if ((uprv_strcmp("C", correctedPOSIXLocale) == 0) // no @ variant
1739
0
        || (uprv_strcmp("POSIX", correctedPOSIXLocale) == 0)) {
1740
      // Raw input was C.* or POSIX.*, Give it a nice POSIX default value.
1741
      // (The "C"/"POSIX" case is handled in uprv_getPOSIXIDForCategory())
1742
0
      uprv_strcpy(correctedPOSIXLocale, "en_US_POSIX");
1743
0
    }
1744
1745
    /* Note that we scan the *uncorrected* ID. */
1746
0
    const char *p;
1747
0
    if ((p = uprv_strrchr(posixID, '@')) != nullptr) {
1748
0
        p++;
1749
1750
        /* Take care of any special cases here.. */
1751
0
        if (!uprv_strcmp(p, "nynorsk")) {
1752
0
            p = "NY";
1753
            /* Don't worry about no__NY. In practice, it won't appear. */
1754
0
        }
1755
1756
0
        if (uprv_strchr(correctedPOSIXLocale,'_') == nullptr) {
1757
0
            uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b (note this can make the new locale 1 char longer) */
1758
0
        }
1759
0
        else {
1760
0
            uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1761
0
        }
1762
1763
0
        const char *q;
1764
0
        if ((q = uprv_strchr(p, '.')) != nullptr) {
1765
            /* How big will the resulting string be? */
1766
0
            int32_t len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1767
0
            uprv_strncat(correctedPOSIXLocale, p, q-p); // do not include charset
1768
0
            correctedPOSIXLocale[len] = 0;
1769
0
        }
1770
0
        else {
1771
            /* Anything following the @ sign */
1772
0
            uprv_strcat(correctedPOSIXLocale, p);
1773
0
        }
1774
1775
        /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1776
         * How about 'russian' -> 'ru'?
1777
         * Many of the other locales using ISO codes will be handled by the
1778
         * canonicalization functions in uloc_getDefault.
1779
         */
1780
0
    }
1781
1782
0
    if (gCorrectedPOSIXLocale == nullptr) {
1783
0
        gCorrectedPOSIXLocale = correctedPOSIXLocale;
1784
0
        gCorrectedPOSIXLocaleHeapAllocated = true;
1785
0
        ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1786
0
        correctedPOSIXLocale = nullptr;
1787
0
    }
1788
0
    posixID = gCorrectedPOSIXLocale;
1789
1790
0
    if (correctedPOSIXLocale != nullptr) {  /* Was already set - clean up. */
1791
0
        uprv_free(correctedPOSIXLocale);
1792
0
    }
1793
1794
0
    return posixID;
1795
1796
#elif U_PLATFORM_USES_ONLY_WIN32_API
1797
#define POSIX_LOCALE_CAPACITY 64
1798
    UErrorCode status = U_ZERO_ERROR;
1799
    char *correctedPOSIXLocale = nullptr;
1800
1801
    // If we have already figured this out just use the cached value
1802
    if (gCorrectedPOSIXLocale != nullptr) {
1803
        return gCorrectedPOSIXLocale;
1804
    }
1805
1806
    // No cached value, need to determine the current value
1807
    static WCHAR windowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
1808
    int length = GetLocaleInfoEx(LOCALE_NAME_USER_DEFAULT, LOCALE_SNAME, windowsLocale, LOCALE_NAME_MAX_LENGTH);
1809
1810
    // Now we should have a Windows locale name that needs converted to the POSIX style.
1811
    if (length > 0) // If length is 0, then the GetLocaleInfoEx failed.
1812
    {
1813
        // First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.)
1814
        char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
1815
1816
        int32_t i;
1817
        for (i = 0; i < UPRV_LENGTHOF(modifiedWindowsLocale); i++)
1818
        {
1819
            if (windowsLocale[i] == '_')
1820
            {
1821
                modifiedWindowsLocale[i] = '-';
1822
            }
1823
            else
1824
            {
1825
                modifiedWindowsLocale[i] = static_cast<char>(windowsLocale[i]);
1826
            }
1827
1828
            if (modifiedWindowsLocale[i] == '\0')
1829
            {
1830
                break;
1831
            }
1832
        }
1833
1834
        if (i >= UPRV_LENGTHOF(modifiedWindowsLocale))
1835
        {
1836
            // Ran out of room, can't really happen, maybe we'll be lucky about a matching
1837
            // locale when tags are dropped
1838
            modifiedWindowsLocale[UPRV_LENGTHOF(modifiedWindowsLocale) - 1] = '\0';
1839
        }
1840
1841
        // Now normalize the resulting name
1842
        correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1));
1843
        /* TODO: Should we just exit on memory allocation failure? */
1844
        if (correctedPOSIXLocale)
1845
        {
1846
            int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status);
1847
            if (U_SUCCESS(status))
1848
            {
1849
                *(correctedPOSIXLocale + posixLen) = 0;
1850
                gCorrectedPOSIXLocale = correctedPOSIXLocale;
1851
                gCorrectedPOSIXLocaleHeapAllocated = true;
1852
                ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1853
            }
1854
            else
1855
            {
1856
                uprv_free(correctedPOSIXLocale);
1857
            }
1858
        }
1859
    }
1860
1861
    // If unable to find a locale we can agree upon, use en-US by default
1862
    if (gCorrectedPOSIXLocale == nullptr) {
1863
        gCorrectedPOSIXLocale = "en_US";
1864
    }
1865
    return gCorrectedPOSIXLocale;
1866
1867
#elif U_PLATFORM == U_PF_OS400
1868
    /* locales are process scoped and are by definition thread safe */
1869
    static char correctedLocale[64];
1870
    const  char *localeID = getenv("LC_ALL");
1871
           char *p;
1872
1873
    if (localeID == nullptr)
1874
        localeID = getenv("LANG");
1875
    if (localeID == nullptr)
1876
        localeID = setlocale(LC_ALL, nullptr);
1877
    /* Make sure we have something... */
1878
    if (localeID == nullptr)
1879
        return "en_US_POSIX";
1880
1881
    /* Extract the locale name from the path. */
1882
    if((p = uprv_strrchr(localeID, '/')) != nullptr)
1883
    {
1884
        /* Increment p to start of locale name. */
1885
        p++;
1886
        localeID = p;
1887
    }
1888
1889
    /* Copy to work location. */
1890
    uprv_strcpy(correctedLocale, localeID);
1891
1892
    /* Strip off the '.locale' extension. */
1893
    if((p = uprv_strchr(correctedLocale, '.')) != nullptr) {
1894
        *p = 0;
1895
    }
1896
1897
    /* Upper case the locale name. */
1898
    T_CString_toUpperCase(correctedLocale);
1899
1900
    /* See if we are using the POSIX locale.  Any of the
1901
    * following are equivalent and use the same QLGPGCMA
1902
    * (POSIX) locale.
1903
    * QLGPGCMA2 means UCS2
1904
    * QLGPGCMA_4 means UTF-32
1905
    * QLGPGCMA_8 means UTF-8
1906
    */
1907
    if ((uprv_strcmp("C", correctedLocale) == 0) ||
1908
        (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1909
        (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1910
    {
1911
        uprv_strcpy(correctedLocale, "en_US_POSIX");
1912
    }
1913
    else
1914
    {
1915
        int16_t LocaleLen;
1916
1917
        /* Lower case the lang portion. */
1918
        for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1919
        {
1920
            *p = uprv_tolower(*p);
1921
        }
1922
1923
        /* Adjust for Euro.  After '_E' add 'URO'. */
1924
        LocaleLen = uprv_strlen(correctedLocale);
1925
        if (correctedLocale[LocaleLen - 2] == '_' &&
1926
            correctedLocale[LocaleLen - 1] == 'E')
1927
        {
1928
            uprv_strcat(correctedLocale, "URO");
1929
        }
1930
1931
        /* If using Lotus-based locale then convert to
1932
         * equivalent non Lotus.
1933
         */
1934
        else if (correctedLocale[LocaleLen - 2] == '_' &&
1935
            correctedLocale[LocaleLen - 1] == 'L')
1936
        {
1937
            correctedLocale[LocaleLen - 2] = 0;
1938
        }
1939
1940
        /* There are separate simplified and traditional
1941
         * locales called zh_HK_S and zh_HK_T.
1942
         */
1943
        else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1944
        {
1945
            uprv_strcpy(correctedLocale, "zh_HK");
1946
        }
1947
1948
        /* A special zh_CN_GBK locale...
1949
        */
1950
        else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1951
        {
1952
            uprv_strcpy(correctedLocale, "zh_CN");
1953
        }
1954
1955
    }
1956
1957
    return correctedLocale;
1958
#endif
1959
1960
0
}
1961
1962
#if !U_CHARSET_IS_UTF8
1963
#if U_POSIX_LOCALE
1964
/*
1965
Due to various platform differences, one platform may specify a charset,
1966
when they really mean a different charset. Remap the names so that they are
1967
compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1968
here. Before adding anything to this function, please consider adding unique
1969
names to the ICU alias table in the data directory.
1970
*/
1971
static const char*
1972
remapPlatformDependentCodepage(const char *locale, const char *name) {
1973
    if (locale != nullptr && *locale == 0) {
1974
        /* Make sure that an empty locale is handled the same way. */
1975
        locale = nullptr;
1976
    }
1977
    if (name == nullptr) {
1978
        return nullptr;
1979
    }
1980
#if U_PLATFORM == U_PF_AIX
1981
    if (uprv_strcmp(name, "IBM-943") == 0) {
1982
        /* Use the ASCII compatible ibm-943 */
1983
        name = "Shift-JIS";
1984
    }
1985
    else if (uprv_strcmp(name, "IBM-1252") == 0) {
1986
        /* Use the windows-1252 that contains the Euro */
1987
        name = "IBM-5348";
1988
    }
1989
#elif U_PLATFORM == U_PF_SOLARIS
1990
    if (locale != nullptr && uprv_strcmp(name, "EUC") == 0) {
1991
        /* Solaris underspecifies the "EUC" name. */
1992
        if (uprv_strcmp(locale, "zh_CN") == 0) {
1993
            name = "EUC-CN";
1994
        }
1995
        else if (uprv_strcmp(locale, "zh_TW") == 0) {
1996
            name = "EUC-TW";
1997
        }
1998
        else if (uprv_strcmp(locale, "ko_KR") == 0) {
1999
            name = "EUC-KR";
2000
        }
2001
    }
2002
    else if (uprv_strcmp(name, "eucJP") == 0) {
2003
        /*
2004
        ibm-954 is the best match.
2005
        ibm-33722 is the default for eucJP (similar to Windows).
2006
        */
2007
        name = "eucjis";
2008
    }
2009
    else if (uprv_strcmp(name, "646") == 0) {
2010
        /*
2011
         * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
2012
         * ISO-8859-1 instead of US-ASCII(646).
2013
         */
2014
        name = "ISO-8859-1";
2015
    }
2016
#elif U_PLATFORM_IS_DARWIN_BASED
2017
    if (locale == nullptr && *name == 0) {
2018
        /*
2019
        No locale was specified, and an empty name was passed in.
2020
        This usually indicates that nl_langinfo didn't return valid information.
2021
        Mac OS X uses UTF-8 by default (especially the locale data and console).
2022
        */
2023
        name = "UTF-8";
2024
    }
2025
    else if (uprv_strcmp(name, "CP949") == 0) {
2026
        /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
2027
        name = "EUC-KR";
2028
    }
2029
    else if (locale != nullptr && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
2030
        /*
2031
         * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
2032
         */
2033
        name = "UTF-8";
2034
    }
2035
#elif U_PLATFORM == U_PF_BSD
2036
    if (uprv_strcmp(name, "CP949") == 0) {
2037
        /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
2038
        name = "EUC-KR";
2039
    }
2040
#elif U_PLATFORM == U_PF_HPUX
2041
    if (locale != nullptr && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
2042
        /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
2043
        /* zh_TW.big5 is not the same charset as zh_HK.big5! */
2044
        name = "hkbig5";
2045
    }
2046
    else if (uprv_strcmp(name, "eucJP") == 0) {
2047
        /*
2048
        ibm-1350 is the best match, but unavailable.
2049
        ibm-954 is mostly a superset of ibm-1350.
2050
        ibm-33722 is the default for eucJP (similar to Windows).
2051
        */
2052
        name = "eucjis";
2053
    }
2054
#elif U_PLATFORM == U_PF_LINUX
2055
    if (locale != nullptr && uprv_strcmp(name, "euc") == 0) {
2056
        /* Linux underspecifies the "EUC" name. */
2057
        if (uprv_strcmp(locale, "korean") == 0) {
2058
            name = "EUC-KR";
2059
        }
2060
        else if (uprv_strcmp(locale, "japanese") == 0) {
2061
            /* See comment below about eucJP */
2062
            name = "eucjis";
2063
        }
2064
    }
2065
    else if (uprv_strcmp(name, "eucjp") == 0) {
2066
        /*
2067
        ibm-1350 is the best match, but unavailable.
2068
        ibm-954 is mostly a superset of ibm-1350.
2069
        ibm-33722 is the default for eucJP (similar to Windows).
2070
        */
2071
        name = "eucjis";
2072
    }
2073
    else if (locale != nullptr && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
2074
            (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
2075
        /*
2076
         * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
2077
         */
2078
        name = "UTF-8";
2079
    }
2080
    /*
2081
     * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
2082
     * it by falling back to 'US-ASCII' when nullptr is returned from this
2083
     * function. So, we don't have to worry about it here.
2084
     */
2085
#endif
2086
    /* return nullptr when "" is passed in */
2087
    if (*name == 0) {
2088
        name = nullptr;
2089
    }
2090
    return name;
2091
}
2092
2093
static const char*
2094
getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
2095
{
2096
    char localeBuf[100];
2097
    const char *name = nullptr;
2098
    char *variant = nullptr;
2099
2100
    if (localeName != nullptr && (name = (uprv_strchr(localeName, '.'))) != nullptr) {
2101
        size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
2102
        uprv_strncpy(localeBuf, localeName, localeCapacity);
2103
        localeBuf[localeCapacity-1] = 0; /* ensure NUL termination */
2104
        name = uprv_strncpy(buffer, name+1, buffCapacity);
2105
        buffer[buffCapacity-1] = 0; /* ensure NUL termination */
2106
        if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != nullptr) {
2107
            *variant = 0;
2108
        }
2109
        name = remapPlatformDependentCodepage(localeBuf, name);
2110
    }
2111
    return name;
2112
}
2113
#endif
2114
2115
static const char*
2116
int_getDefaultCodepage()
2117
{
2118
#if U_PLATFORM == U_PF_OS400
2119
    uint32_t ccsid = 37; /* Default to ibm-37 */
2120
    static char codepage[64];
2121
    Qwc_JOBI0400_t jobinfo;
2122
    Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
2123
2124
    EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
2125
        "*                         ", "                ", &error);
2126
2127
    if (error.Bytes_Available == 0) {
2128
        if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
2129
            ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
2130
        }
2131
        else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
2132
            ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
2133
        }
2134
        /* else use the default */
2135
    }
2136
    snprintf(codepage, sizeof(codepage), "ibm-%d", ccsid);
2137
    return codepage;
2138
2139
#elif U_PLATFORM == U_PF_OS390
2140
    static char codepage[64];
2141
2142
    strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
2143
    strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
2144
    codepage[63] = 0; /* NUL terminate */
2145
2146
    return codepage;
2147
2148
#elif U_PLATFORM_USES_ONLY_WIN32_API
2149
    static char codepage[64];
2150
    DWORD codepageNumber = 0;
2151
2152
#if U_PLATFORM_HAS_WINUWP_API == 1
2153
    // UWP doesn't have a direct API to get the default ACP as Microsoft would rather
2154
    // have folks use Unicode than a "system" code page, however this is the same
2155
    // codepage as the system default locale codepage.  (FWIW, the system locale is
2156
    // ONLY used for codepage, it should never be used for anything else)
2157
    GetLocaleInfoEx(LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
2158
        (LPWSTR)&codepageNumber, sizeof(codepageNumber) / sizeof(WCHAR));
2159
#else
2160
    // Win32 apps can call GetACP
2161
    codepageNumber = GetACP();
2162
#endif
2163
    // Special case for UTF-8
2164
    if (codepageNumber == 65001)
2165
    {
2166
        return "UTF-8";
2167
    }
2168
    // Windows codepages can look like windows-1252, so format the found number
2169
    // the numbers are eclectic, however all valid system code pages, besides UTF-8
2170
    // are between 3 and 19999
2171
    if (codepageNumber > 0 && codepageNumber < 20000)
2172
    {
2173
        snprintf(codepage, sizeof(codepage), "windows-%ld", codepageNumber);
2174
        return codepage;
2175
    }
2176
    // If the codepage number call failed then return UTF-8
2177
    return "UTF-8";
2178
2179
#elif U_POSIX_LOCALE
2180
    static char codesetName[100];
2181
    const char *localeName = nullptr;
2182
    const char *name = nullptr;
2183
2184
    localeName = uprv_getPOSIXIDForDefaultCodepage();
2185
    uprv_memset(codesetName, 0, sizeof(codesetName));
2186
    /* On Solaris nl_langinfo returns C locale values unless setlocale
2187
     * was called earlier.
2188
     */
2189
#if (U_HAVE_NL_LANGINFO_CODESET && U_PLATFORM != U_PF_SOLARIS)
2190
    /* When available, check nl_langinfo first because it usually gives more
2191
       useful names. It depends on LC_CTYPE.
2192
       nl_langinfo may use the same buffer as setlocale. */
2193
    {
2194
        const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
2195
#if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED
2196
        /*
2197
         * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
2198
         * instead of ASCII.
2199
         */
2200
        if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
2201
            codeset = remapPlatformDependentCodepage(localeName, codeset);
2202
        } else
2203
#endif
2204
        {
2205
            codeset = remapPlatformDependentCodepage(nullptr, codeset);
2206
        }
2207
2208
        if (codeset != nullptr) {
2209
            uprv_strncpy(codesetName, codeset, sizeof(codesetName));
2210
            codesetName[sizeof(codesetName)-1] = 0;
2211
            return codesetName;
2212
        }
2213
    }
2214
#endif
2215
2216
    /* Use setlocale in a nice way, and then check some environment variables.
2217
       Maybe the application used setlocale already.
2218
    */
2219
    uprv_memset(codesetName, 0, sizeof(codesetName));
2220
    name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
2221
    if (name) {
2222
        /* if we can find the codeset name from setlocale, return that. */
2223
        return name;
2224
    }
2225
2226
    if (*codesetName == 0)
2227
    {
2228
        /* Everything failed. Return US ASCII (ISO 646). */
2229
        (void)uprv_strcpy(codesetName, "US-ASCII");
2230
    }
2231
    return codesetName;
2232
#else
2233
    return "US-ASCII";
2234
#endif
2235
}
2236
2237
2238
U_CAPI const char*  U_EXPORT2
2239
uprv_getDefaultCodepage()
2240
{
2241
    static char const  *name = nullptr;
2242
    umtx_lock(nullptr);
2243
    if (name == nullptr) {
2244
        name = int_getDefaultCodepage();
2245
    }
2246
    umtx_unlock(nullptr);
2247
    return name;
2248
}
2249
#endif  /* !U_CHARSET_IS_UTF8 */
2250
2251
2252
/* end of platform-specific implementation -------------- */
2253
2254
/* version handling --------------------------------------------------------- */
2255
2256
U_CAPI void U_EXPORT2
2257
0
u_versionFromString(UVersionInfo versionArray, const char *versionString) {
2258
0
    char *end;
2259
0
    uint16_t part=0;
2260
2261
0
    if(versionArray==nullptr) {
2262
0
        return;
2263
0
    }
2264
2265
0
    if(versionString!=nullptr) {
2266
0
        for(;;) {
2267
0
            versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
2268
0
            if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
2269
0
                break;
2270
0
            }
2271
0
            versionString=end+1;
2272
0
        }
2273
0
    }
2274
2275
0
    while(part<U_MAX_VERSION_LENGTH) {
2276
0
        versionArray[part++]=0;
2277
0
    }
2278
0
}
2279
2280
U_CAPI void U_EXPORT2
2281
0
u_versionFromUString(UVersionInfo versionArray, const char16_t *versionString) {
2282
0
    if(versionArray!=nullptr && versionString!=nullptr) {
2283
0
        char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
2284
0
        int32_t len = u_strlen(versionString);
2285
0
        if(len>U_MAX_VERSION_STRING_LENGTH) {
2286
0
            len = U_MAX_VERSION_STRING_LENGTH;
2287
0
        }
2288
0
        u_UCharsToChars(versionString, versionChars, len);
2289
0
        versionChars[len]=0;
2290
0
        u_versionFromString(versionArray, versionChars);
2291
0
    }
2292
0
}
2293
2294
U_CAPI void U_EXPORT2
2295
0
u_versionToString(const UVersionInfo versionArray, char *versionString) {
2296
0
    uint16_t count, part;
2297
0
    uint8_t field;
2298
2299
0
    if(versionString==nullptr) {
2300
0
        return;
2301
0
    }
2302
2303
0
    if(versionArray==nullptr) {
2304
0
        versionString[0]=0;
2305
0
        return;
2306
0
    }
2307
2308
    /* count how many fields need to be written */
2309
0
    for(count=4; count>0 && versionArray[count-1]==0; --count) {
2310
0
    }
2311
2312
0
    if(count <= 1) {
2313
0
        count = 2;
2314
0
    }
2315
2316
    /* write the first part */
2317
    /* write the decimal field value */
2318
0
    field=versionArray[0];
2319
0
    if(field>=100) {
2320
0
        *versionString++=(char)('0'+field/100);
2321
0
        field%=100;
2322
0
    }
2323
0
    if(field>=10) {
2324
0
        *versionString++=(char)('0'+field/10);
2325
0
        field%=10;
2326
0
    }
2327
0
    *versionString++=(char)('0'+field);
2328
2329
    /* write the following parts */
2330
0
    for(part=1; part<count; ++part) {
2331
        /* write a dot first */
2332
0
        *versionString++=U_VERSION_DELIMITER;
2333
2334
        /* write the decimal field value */
2335
0
        field=versionArray[part];
2336
0
        if(field>=100) {
2337
0
            *versionString++=(char)('0'+field/100);
2338
0
            field%=100;
2339
0
        }
2340
0
        if(field>=10) {
2341
0
            *versionString++=(char)('0'+field/10);
2342
0
            field%=10;
2343
0
        }
2344
0
        *versionString++=(char)('0'+field);
2345
0
    }
2346
2347
    /* NUL-terminate */
2348
0
    *versionString=0;
2349
0
}
2350
2351
U_CAPI void U_EXPORT2
2352
0
u_getVersion(UVersionInfo versionArray) {
2353
0
    (void)copyright;   // Suppress unused variable warning from clang.
2354
0
    u_versionFromString(versionArray, U_ICU_VERSION);
2355
0
}
2356
2357
/**
2358
 * icucfg.h dependent code
2359
 */
2360
2361
#if U_ENABLE_DYLOAD && HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
2362
2363
#if HAVE_DLFCN_H
2364
#ifdef __MVS__
2365
#ifndef __SUSV3
2366
#define __SUSV3 1
2367
#endif
2368
#endif
2369
#include <dlfcn.h>
2370
#endif /* HAVE_DLFCN_H */
2371
2372
U_CAPI void * U_EXPORT2
2373
uprv_dl_open(const char *libName, UErrorCode *status) {
2374
  void *ret = nullptr;
2375
  if(U_FAILURE(*status)) return ret;
2376
  ret =  dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
2377
  if(ret==nullptr) {
2378
#ifdef U_TRACE_DYLOAD
2379
    printf("dlerror on dlopen(%s): %s\n", libName, dlerror());
2380
#endif
2381
    *status = U_MISSING_RESOURCE_ERROR;
2382
  }
2383
  return ret;
2384
}
2385
2386
U_CAPI void U_EXPORT2
2387
uprv_dl_close(void *lib, UErrorCode *status) {
2388
  if(U_FAILURE(*status)) return;
2389
  dlclose(lib);
2390
}
2391
2392
U_CAPI UVoidFunction* U_EXPORT2
2393
uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2394
  union {
2395
      UVoidFunction *fp;
2396
      void *vp;
2397
  } uret;
2398
  uret.fp = nullptr;
2399
  if(U_FAILURE(*status)) return uret.fp;
2400
  uret.vp = dlsym(lib, sym);
2401
  if(uret.vp == nullptr) {
2402
#ifdef U_TRACE_DYLOAD
2403
    printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror());
2404
#endif
2405
    *status = U_MISSING_RESOURCE_ERROR;
2406
  }
2407
  return uret.fp;
2408
}
2409
2410
#elif U_ENABLE_DYLOAD && U_PLATFORM_USES_ONLY_WIN32_API && !U_PLATFORM_HAS_WINUWP_API
2411
2412
/* Windows API implementation. */
2413
// Note: UWP does not expose/allow these APIs, so the UWP version gets the null implementation. */
2414
2415
U_CAPI void * U_EXPORT2
2416
uprv_dl_open(const char *libName, UErrorCode *status) {
2417
  HMODULE lib = nullptr;
2418
2419
  if(U_FAILURE(*status)) return nullptr;
2420
2421
  lib = LoadLibraryA(libName);
2422
2423
  if(lib==nullptr) {
2424
    *status = U_MISSING_RESOURCE_ERROR;
2425
  }
2426
2427
  return (void*)lib;
2428
}
2429
2430
U_CAPI void U_EXPORT2
2431
uprv_dl_close(void *lib, UErrorCode *status) {
2432
  HMODULE handle = (HMODULE)lib;
2433
  if(U_FAILURE(*status)) return;
2434
2435
  FreeLibrary(handle);
2436
2437
  return;
2438
}
2439
2440
U_CAPI UVoidFunction* U_EXPORT2
2441
uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2442
  HMODULE handle = (HMODULE)lib;
2443
  UVoidFunction* addr = nullptr;
2444
2445
  if(U_FAILURE(*status) || lib==nullptr) return nullptr;
2446
2447
  addr = (UVoidFunction*)GetProcAddress(handle, sym);
2448
2449
  if(addr==nullptr) {
2450
    DWORD lastError = GetLastError();
2451
    if(lastError == ERROR_PROC_NOT_FOUND) {
2452
      *status = U_MISSING_RESOURCE_ERROR;
2453
    } else {
2454
      *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
2455
    }
2456
  }
2457
2458
  return addr;
2459
}
2460
2461
#else
2462
2463
/* No dynamic loading, null (nonexistent) implementation. */
2464
2465
U_CAPI void * U_EXPORT2
2466
0
uprv_dl_open(const char *libName, UErrorCode *status) {
2467
0
    (void)libName;
2468
0
    if(U_FAILURE(*status)) return nullptr;
2469
0
    *status = U_UNSUPPORTED_ERROR;
2470
0
    return nullptr;
2471
0
}
2472
2473
U_CAPI void U_EXPORT2
2474
0
uprv_dl_close(void *lib, UErrorCode *status) {
2475
0
    (void)lib;
2476
0
    if(U_FAILURE(*status)) return;
2477
0
    *status = U_UNSUPPORTED_ERROR;
2478
0
    return;
2479
0
}
2480
2481
U_CAPI UVoidFunction* U_EXPORT2
2482
0
uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2483
0
  (void)lib;
2484
0
  (void)sym;
2485
0
  if(U_SUCCESS(*status)) {
2486
0
    *status = U_UNSUPPORTED_ERROR;
2487
0
  }
2488
0
  return (UVoidFunction*)nullptr;
2489
0
}
2490
2491
#endif
2492
2493
/*
2494
 * Hey, Emacs, please set the following:
2495
 *
2496
 * Local Variables:
2497
 * indent-tabs-mode: nil
2498
 * End:
2499
 *
2500
 */