Coverage Report

Created: 2024-04-24 06:23

/src/icu/source/common/putil.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
******************************************************************************
5
*
6
*   Copyright (C) 1997-2016, International Business Machines
7
*   Corporation and others.  All Rights Reserved.
8
*
9
******************************************************************************
10
*
11
*  FILE NAME : putil.c (previously putil.cpp and ptypes.cpp)
12
*
13
*   Date        Name        Description
14
*   04/14/97    aliu        Creation.
15
*   04/24/97    aliu        Added getDefaultDataDirectory() and
16
*                            getDefaultLocaleID().
17
*   04/28/97    aliu        Rewritten to assume Unix and apply general methods
18
*                            for assumed case.  Non-UNIX platforms must be
19
*                            special-cased.  Rewrote numeric methods dealing
20
*                            with NaN and Infinity to be platform independent
21
*                             over all IEEE 754 platforms.
22
*   05/13/97    aliu        Restored sign of timezone
23
*                            (semantics are hours West of GMT)
24
*   06/16/98    erm         Added IEEE_754 stuff, cleaned up isInfinite, isNan,
25
*                             nextDouble..
26
*   07/22/98    stephen     Added remainder, max, min, trunc
27
*   08/13/98    stephen     Added isNegativeInfinity, isPositiveInfinity
28
*   08/24/98    stephen     Added longBitsFromDouble
29
*   09/08/98    stephen     Minor changes for Mac Port
30
*   03/02/99    stephen     Removed openFile().  Added AS400 support.
31
*                            Fixed EBCDIC tables
32
*   04/15/99    stephen     Converted to C.
33
*   06/28/99    stephen     Removed mutex locking in u_isBigEndian().
34
*   08/04/99    jeffrey R.  Added OS/2 changes
35
*   11/15/99    helena      Integrated S/390 IEEE support.
36
*   04/26/01    Barry N.    OS/400 support for uprv_getDefaultLocaleID
37
*   08/15/01    Steven H.   OS/400 support for uprv_getDefaultCodepage
38
*   01/03/08    Steven L.   Fake Time Support
39
******************************************************************************
40
*/
41
42
// Defines _XOPEN_SOURCE for access to POSIX functions.
43
// Must be before any other #includes.
44
#include "uposixdefs.h"
45
46
// First, the platform type. Need this for U_PLATFORM.
47
#include "unicode/platform.h"
48
49
#if U_PLATFORM == U_PF_MINGW && defined __STRICT_ANSI__
50
/* tzset isn't defined in strict ANSI on MinGW. */
51
#undef __STRICT_ANSI__
52
#endif
53
54
/*
55
 * Cygwin with GCC requires inclusion of time.h after the above disabling strict asci mode statement.
56
 */
57
#include <time.h>
58
59
#if !U_PLATFORM_USES_ONLY_WIN32_API
60
#include <sys/time.h>
61
#endif
62
63
/* include the rest of the ICU headers */
64
#include "unicode/putil.h"
65
#include "unicode/ustring.h"
66
#include "putilimp.h"
67
#include "uassert.h"
68
#include "umutex.h"
69
#include "cmemory.h"
70
#include "cstring.h"
71
#include "locmap.h"
72
#include "ucln_cmn.h"
73
#include "charstr.h"
74
75
/* Include standard headers. */
76
#include <stdio.h>
77
#include <stdlib.h>
78
#include <string.h>
79
#include <math.h>
80
#include <locale.h>
81
#include <float.h>
82
83
#ifndef U_COMMON_IMPLEMENTATION
84
#error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see https://unicode-org.github.io/icu/userguide/howtouseicu
85
#endif
86
87
88
/* include system headers */
89
#if U_PLATFORM_USES_ONLY_WIN32_API
90
    /*
91
     * TODO: U_PLATFORM_USES_ONLY_WIN32_API includes MinGW.
92
     * Should Cygwin be included as well (U_PLATFORM_HAS_WIN32_API)
93
     * to use native APIs as much as possible?
94
     */
95
#ifndef WIN32_LEAN_AND_MEAN
96
#   define WIN32_LEAN_AND_MEAN
97
#endif
98
#   define VC_EXTRALEAN
99
#   define NOUSER
100
#   define NOSERVICE
101
#   define NOIME
102
#   define NOMCX
103
#   include <windows.h>
104
#   include "unicode/uloc.h"
105
#   include "wintz.h"
106
#elif U_PLATFORM == U_PF_OS400
107
#   include <float.h>
108
#   include <qusec.h>       /* error code structure */
109
#   include <qusrjobi.h>
110
#   include <qliept.h>      /* EPT_CALL macro  - this include must be after all other "QSYSINCs" */
111
#   include <mih/testptr.h> /* For uprv_maximumPtr */
112
#elif U_PLATFORM == U_PF_OS390
113
#   include "unicode/ucnv.h"   /* Needed for UCNV_SWAP_LFNL_OPTION_STRING */
114
#elif U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS
115
#   include <limits.h>
116
#   include <unistd.h>
117
#   if U_PLATFORM == U_PF_SOLARIS
118
#       ifndef _XPG4_2
119
#           define _XPG4_2
120
#       endif
121
#   elif U_PLATFORM == U_PF_ANDROID
122
#       include <sys/system_properties.h>
123
#       include <dlfcn.h>
124
#   endif
125
#elif U_PLATFORM == U_PF_QNX
126
#   include <sys/neutrino.h>
127
#endif
128
129
130
/*
131
 * Only include langinfo.h if we have a way to get the codeset. If we later
132
 * depend on more feature, we can test on U_HAVE_NL_LANGINFO.
133
 *
134
 */
135
136
#if U_HAVE_NL_LANGINFO_CODESET
137
#include <langinfo.h>
138
#endif
139
140
/**
141
 * Simple things (presence of functions, etc) should just go in configure.in and be added to
142
 * icucfg.h via autoheader.
143
 */
144
#if U_PLATFORM_IMPLEMENTS_POSIX
145
#   if U_PLATFORM == U_PF_OS400
146
#    define HAVE_DLFCN_H 0
147
#    define HAVE_DLOPEN 0
148
#   else
149
#   ifndef HAVE_DLFCN_H
150
#    define HAVE_DLFCN_H 1
151
#   endif
152
#   ifndef HAVE_DLOPEN
153
#    define HAVE_DLOPEN 1
154
#   endif
155
#   endif
156
#   ifndef HAVE_GETTIMEOFDAY
157
#    define HAVE_GETTIMEOFDAY 1
158
#   endif
159
#else
160
#   define HAVE_DLFCN_H 0
161
#   define HAVE_DLOPEN 0
162
#   define HAVE_GETTIMEOFDAY 0
163
#endif
164
165
U_NAMESPACE_USE
166
167
/* Define the extension for data files, again... */
168
#define DATA_TYPE "dat"
169
170
/* Leave this copyright notice here! */
171
static const char copyright[] = U_COPYRIGHT_STRING;
172
173
/* floating point implementations ------------------------------------------- */
174
175
/* We return QNAN rather than SNAN*/
176
#define SIGN 0x80000000U
177
178
/* Make it easy to define certain types of constants */
179
typedef union {
180
    int64_t i64; /* This must be defined first in order to allow the initialization to work. This is a C89 feature. */
181
    double d64;
182
} BitPatternConversion;
183
static const BitPatternConversion gNan = { (int64_t) INT64_C(0x7FF8000000000000) };
184
static const BitPatternConversion gInf = { (int64_t) INT64_C(0x7FF0000000000000) };
185
186
/*---------------------------------------------------------------------------
187
  Platform utilities
188
  Our general strategy is to assume we're on a POSIX platform.  Platforms which
189
  are non-POSIX must declare themselves so.  The default POSIX implementation
190
  will sometimes work for non-POSIX platforms as well (e.g., the NaN-related
191
  functions).
192
  ---------------------------------------------------------------------------*/
193
194
#if U_PLATFORM_USES_ONLY_WIN32_API || U_PLATFORM == U_PF_OS400
195
#   undef U_POSIX_LOCALE
196
#else
197
#   define U_POSIX_LOCALE    1
198
#endif
199
200
/*
201
    WARNING! u_topNBytesOfDouble and u_bottomNBytesOfDouble
202
    can't be properly optimized by the gcc compiler sometimes (i.e. gcc 3.2).
203
*/
204
#if !IEEE_754
205
static char*
206
u_topNBytesOfDouble(double* d, int n)
207
{
208
#if U_IS_BIG_ENDIAN
209
    return (char*)d;
210
#else
211
    return (char*)(d + 1) - n;
212
#endif
213
}
214
215
static char*
216
u_bottomNBytesOfDouble(double* d, int n)
217
{
218
#if U_IS_BIG_ENDIAN
219
    return (char*)(d + 1) - n;
220
#else
221
    return (char*)d;
222
#endif
223
}
224
#endif   /* !IEEE_754 */
225
226
#if IEEE_754
227
static UBool
228
0
u_signBit(double d) {
229
0
    uint8_t hiByte;
230
#if U_IS_BIG_ENDIAN
231
    hiByte = *(uint8_t *)&d;
232
#else
233
0
    hiByte = *(((uint8_t *)&d) + sizeof(double) - 1);
234
0
#endif
235
0
    return (hiByte & 0x80) != 0;
236
0
}
237
#endif
238
239
240
241
#if defined (U_DEBUG_FAKETIME)
242
/* Override the clock to test things without having to move the system clock.
243
 * Assumes POSIX gettimeofday() will function
244
 */
245
UDate fakeClock_t0 = 0; /** Time to start the clock from **/
246
UDate fakeClock_dt = 0; /** Offset (fake time - real time) **/
247
UBool fakeClock_set = FALSE; /** True if fake clock has spun up **/
248
249
static UDate getUTCtime_real() {
250
    struct timeval posixTime;
251
    gettimeofday(&posixTime, NULL);
252
    return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
253
}
254
255
static UDate getUTCtime_fake() {
256
    static UMutex fakeClockMutex;
257
    umtx_lock(&fakeClockMutex);
258
    if(!fakeClock_set) {
259
        UDate real = getUTCtime_real();
260
        const char *fake_start = getenv("U_FAKETIME_START");
261
        if((fake_start!=NULL) && (fake_start[0]!=0)) {
262
            sscanf(fake_start,"%lf",&fakeClock_t0);
263
            fakeClock_dt = fakeClock_t0 - real;
264
            fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, so the ICU clock will start at a preset value\n"
265
                    "env variable U_FAKETIME_START=%.0f (%s) for an offset of %.0f ms from the current time %.0f\n",
266
                    fakeClock_t0, fake_start, fakeClock_dt, real);
267
        } else {
268
          fakeClock_dt = 0;
269
            fprintf(stderr,"U_DEBUG_FAKETIME was set at compile time, but U_FAKETIME_START was not set.\n"
270
                    "Set U_FAKETIME_START to the number of milliseconds since 1/1/1970 to set the ICU clock.\n");
271
        }
272
        fakeClock_set = TRUE;
273
    }
274
    umtx_unlock(&fakeClockMutex);
275
276
    return getUTCtime_real() + fakeClock_dt;
277
}
278
#endif
279
280
#if U_PLATFORM_USES_ONLY_WIN32_API
281
typedef union {
282
    int64_t int64;
283
    FILETIME fileTime;
284
} FileTimeConversion;   /* This is like a ULARGE_INTEGER */
285
286
/* Number of 100 nanoseconds from 1/1/1601 to 1/1/1970 */
287
#define EPOCH_BIAS  INT64_C(116444736000000000)
288
#define HECTONANOSECOND_PER_MILLISECOND   10000
289
290
#endif
291
292
/*---------------------------------------------------------------------------
293
  Universal Implementations
294
  These are designed to work on all platforms.  Try these, and if they
295
  don't work on your platform, then special case your platform with new
296
  implementations.
297
---------------------------------------------------------------------------*/
298
299
U_CAPI UDate U_EXPORT2
300
uprv_getUTCtime()
301
0
{
302
#if defined(U_DEBUG_FAKETIME)
303
    return getUTCtime_fake(); /* Hook for overriding the clock */
304
#else
305
0
    return uprv_getRawUTCtime();
306
0
#endif
307
0
}
308
309
/* Return UTC (GMT) time measured in milliseconds since 0:00 on 1/1/70.*/
310
U_CAPI UDate U_EXPORT2
311
uprv_getRawUTCtime()
312
0
{
313
#if U_PLATFORM_USES_ONLY_WIN32_API
314
315
    FileTimeConversion winTime;
316
    GetSystemTimeAsFileTime(&winTime.fileTime);
317
    return (UDate)((winTime.int64 - EPOCH_BIAS) / HECTONANOSECOND_PER_MILLISECOND);
318
#else
319
320
0
#if HAVE_GETTIMEOFDAY
321
0
    struct timeval posixTime;
322
0
    gettimeofday(&posixTime, NULL);
323
0
    return (UDate)(((int64_t)posixTime.tv_sec * U_MILLIS_PER_SECOND) + (posixTime.tv_usec/1000));
324
#else
325
    time_t epochtime;
326
    time(&epochtime);
327
    return (UDate)epochtime * U_MILLIS_PER_SECOND;
328
#endif
329
330
0
#endif
331
0
}
332
333
/*-----------------------------------------------------------------------------
334
  IEEE 754
335
  These methods detect and return NaN and infinity values for doubles
336
  conforming to IEEE 754.  Platforms which support this standard include X86,
337
  Mac 680x0, Mac PowerPC, AIX RS/6000, and most others.
338
  If this doesn't work on your platform, you have non-IEEE floating-point, and
339
  will need to code your own versions.  A naive implementation is to return 0.0
340
  for getNaN and getInfinity, and false for isNaN and isInfinite.
341
  ---------------------------------------------------------------------------*/
342
343
U_CAPI UBool U_EXPORT2
344
uprv_isNaN(double number)
345
0
{
346
0
#if IEEE_754
347
0
    BitPatternConversion convertedNumber;
348
0
    convertedNumber.d64 = number;
349
    /* Infinity is 0x7FF0000000000000U. Anything greater than that is a NaN */
350
0
    return (UBool)((convertedNumber.i64 & U_INT64_MAX) > gInf.i64);
351
352
#elif U_PLATFORM == U_PF_OS390
353
    uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
354
                        sizeof(uint32_t));
355
    uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
356
                        sizeof(uint32_t));
357
358
    return ((highBits & 0x7F080000L) == 0x7F080000L) &&
359
      (lowBits == 0x00000000L);
360
361
#else
362
    /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
363
    /* you'll need to replace this default implementation with what's correct*/
364
    /* for your platform.*/
365
    return number != number;
366
#endif
367
0
}
368
369
U_CAPI UBool U_EXPORT2
370
uprv_isInfinite(double number)
371
0
{
372
0
#if IEEE_754
373
0
    BitPatternConversion convertedNumber;
374
0
    convertedNumber.d64 = number;
375
    /* Infinity is exactly 0x7FF0000000000000U. */
376
0
    return (UBool)((convertedNumber.i64 & U_INT64_MAX) == gInf.i64);
377
#elif U_PLATFORM == U_PF_OS390
378
    uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
379
                        sizeof(uint32_t));
380
    uint32_t lowBits  = *(uint32_t*)u_bottomNBytesOfDouble(&number,
381
                        sizeof(uint32_t));
382
383
    return ((highBits  & ~SIGN) == 0x70FF0000L) && (lowBits == 0x00000000L);
384
385
#else
386
    /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
387
    /* value, you'll need to replace this default implementation with what's*/
388
    /* correct for your platform.*/
389
    return number == (2.0 * number);
390
#endif
391
0
}
392
393
U_CAPI UBool U_EXPORT2
394
uprv_isPositiveInfinity(double number)
395
0
{
396
0
#if IEEE_754 || U_PLATFORM == U_PF_OS390
397
0
    return (UBool)(number > 0 && uprv_isInfinite(number));
398
#else
399
    return uprv_isInfinite(number);
400
#endif
401
0
}
402
403
U_CAPI UBool U_EXPORT2
404
uprv_isNegativeInfinity(double number)
405
0
{
406
0
#if IEEE_754 || U_PLATFORM == U_PF_OS390
407
0
    return (UBool)(number < 0 && uprv_isInfinite(number));
408
409
#else
410
    uint32_t highBits = *(uint32_t*)u_topNBytesOfDouble(&number,
411
                        sizeof(uint32_t));
412
    return((highBits & SIGN) && uprv_isInfinite(number));
413
414
#endif
415
0
}
416
417
U_CAPI double U_EXPORT2
418
uprv_getNaN()
419
0
{
420
0
#if IEEE_754 || U_PLATFORM == U_PF_OS390
421
0
    return gNan.d64;
422
#else
423
    /* If your platform doesn't support IEEE 754 but *does* have an NaN value,*/
424
    /* you'll need to replace this default implementation with what's correct*/
425
    /* for your platform.*/
426
    return 0.0;
427
#endif
428
0
}
429
430
U_CAPI double U_EXPORT2
431
uprv_getInfinity()
432
0
{
433
0
#if IEEE_754 || U_PLATFORM == U_PF_OS390
434
0
    return gInf.d64;
435
#else
436
    /* If your platform doesn't support IEEE 754 but *does* have an infinity*/
437
    /* value, you'll need to replace this default implementation with what's*/
438
    /* correct for your platform.*/
439
    return 0.0;
440
#endif
441
0
}
442
443
U_CAPI double U_EXPORT2
444
uprv_floor(double x)
445
0
{
446
0
    return floor(x);
447
0
}
448
449
U_CAPI double U_EXPORT2
450
uprv_ceil(double x)
451
0
{
452
0
    return ceil(x);
453
0
}
454
455
U_CAPI double U_EXPORT2
456
uprv_round(double x)
457
0
{
458
0
    return uprv_floor(x + 0.5);
459
0
}
460
461
U_CAPI double U_EXPORT2
462
uprv_fabs(double x)
463
0
{
464
0
    return fabs(x);
465
0
}
466
467
U_CAPI double U_EXPORT2
468
uprv_modf(double x, double* y)
469
0
{
470
0
    return modf(x, y);
471
0
}
472
473
U_CAPI double U_EXPORT2
474
uprv_fmod(double x, double y)
475
0
{
476
0
    return fmod(x, y);
477
0
}
478
479
U_CAPI double U_EXPORT2
480
uprv_pow(double x, double y)
481
0
{
482
    /* This is declared as "double pow(double x, double y)" */
483
0
    return pow(x, y);
484
0
}
485
486
U_CAPI double U_EXPORT2
487
uprv_pow10(int32_t x)
488
0
{
489
0
    return pow(10.0, (double)x);
490
0
}
491
492
U_CAPI double U_EXPORT2
493
uprv_fmax(double x, double y)
494
0
{
495
0
#if IEEE_754
496
    /* first handle NaN*/
497
0
    if(uprv_isNaN(x) || uprv_isNaN(y))
498
0
        return uprv_getNaN();
499
500
    /* check for -0 and 0*/
501
0
    if(x == 0.0 && y == 0.0 && u_signBit(x))
502
0
        return y;
503
504
0
#endif
505
506
    /* this should work for all flt point w/o NaN and Inf special cases */
507
0
    return (x > y ? x : y);
508
0
}
509
510
U_CAPI double U_EXPORT2
511
uprv_fmin(double x, double y)
512
0
{
513
0
#if IEEE_754
514
    /* first handle NaN*/
515
0
    if(uprv_isNaN(x) || uprv_isNaN(y))
516
0
        return uprv_getNaN();
517
518
    /* check for -0 and 0*/
519
0
    if(x == 0.0 && y == 0.0 && u_signBit(y))
520
0
        return y;
521
522
0
#endif
523
524
    /* this should work for all flt point w/o NaN and Inf special cases */
525
0
    return (x > y ? y : x);
526
0
}
527
528
U_CAPI UBool U_EXPORT2
529
0
uprv_add32_overflow(int32_t a, int32_t b, int32_t* res) {
530
    // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_add_overflow.
531
    // This function could be optimized by calling one of those primitives.
532
0
    auto a64 = static_cast<int64_t>(a);
533
0
    auto b64 = static_cast<int64_t>(b);
534
0
    int64_t res64 = a64 + b64;
535
0
    *res = static_cast<int32_t>(res64);
536
0
    return res64 != *res;
537
0
}
538
539
U_CAPI UBool U_EXPORT2
540
0
uprv_mul32_overflow(int32_t a, int32_t b, int32_t* res) {
541
    // NOTE: Some compilers (GCC, Clang) have primitives available, like __builtin_mul_overflow.
542
    // This function could be optimized by calling one of those primitives.
543
0
    auto a64 = static_cast<int64_t>(a);
544
0
    auto b64 = static_cast<int64_t>(b);
545
0
    int64_t res64 = a64 * b64;
546
0
    *res = static_cast<int32_t>(res64);
547
0
    return res64 != *res;
548
0
}
549
550
/**
551
 * Truncates the given double.
552
 * trunc(3.3) = 3.0, trunc (-3.3) = -3.0
553
 * This is different than calling floor() or ceil():
554
 * floor(3.3) = 3, floor(-3.3) = -4
555
 * ceil(3.3) = 4, ceil(-3.3) = -3
556
 */
557
U_CAPI double U_EXPORT2
558
uprv_trunc(double d)
559
0
{
560
0
#if IEEE_754
561
    /* handle error cases*/
562
0
    if(uprv_isNaN(d))
563
0
        return uprv_getNaN();
564
0
    if(uprv_isInfinite(d))
565
0
        return uprv_getInfinity();
566
567
0
    if(u_signBit(d))    /* Signbit() picks up -0.0;  d<0 does not. */
568
0
        return ceil(d);
569
0
    else
570
0
        return floor(d);
571
572
#else
573
    return d >= 0 ? floor(d) : ceil(d);
574
575
#endif
576
0
}
577
578
/**
579
 * Return the largest positive number that can be represented by an integer
580
 * type of arbitrary bit length.
581
 */
582
U_CAPI double U_EXPORT2
583
uprv_maxMantissa(void)
584
0
{
585
0
    return pow(2.0, DBL_MANT_DIG + 1.0) - 1.0;
586
0
}
587
588
U_CAPI double U_EXPORT2
589
uprv_log(double d)
590
0
{
591
0
    return log(d);
592
0
}
593
594
U_CAPI void * U_EXPORT2
595
uprv_maximumPtr(void * base)
596
0
{
597
#if U_PLATFORM == U_PF_OS400
598
    /*
599
     * With the provided function we should never be out of range of a given segment
600
     * (a traditional/typical segment that is).  Our segments have 5 bytes for the
601
     * id and 3 bytes for the offset.  The key is that the casting takes care of
602
     * only retrieving the offset portion minus x1000.  Hence, the smallest offset
603
     * seen in a program is x001000 and when casted to an int would be 0.
604
     * That's why we can only add 0xffefff.  Otherwise, we would exceed the segment.
605
     *
606
     * Currently, 16MB is the current addressing limitation on i5/OS if the activation is
607
     * non-TERASPACE.  If it is TERASPACE it is 2GB - 4k(header information).
608
     * This function determines the activation based on the pointer that is passed in and
609
     * calculates the appropriate maximum available size for
610
     * each pointer type (TERASPACE and non-TERASPACE)
611
     *
612
     * Unlike other operating systems, the pointer model isn't determined at
613
     * compile time on i5/OS.
614
     */
615
    if ((base != NULL) && (_TESTPTR(base, _C_TERASPACE_CHECK))) {
616
        /* if it is a TERASPACE pointer the max is 2GB - 4k */
617
        return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0x7fffefff)));
618
    }
619
    /* otherwise 16MB since NULL ptr is not checkable or the ptr is not TERASPACE */
620
    return ((void *)(((char *)base)-((uint32_t)(base))+((uint32_t)0xffefff)));
621
622
#else
623
0
    return U_MAX_PTR(base);
624
0
#endif
625
0
}
626
627
/*---------------------------------------------------------------------------
628
  Platform-specific Implementations
629
  Try these, and if they don't work on your platform, then special case your
630
  platform with new implementations.
631
  ---------------------------------------------------------------------------*/
632
633
/* Generic time zone layer -------------------------------------------------- */
634
635
/* Time zone utilities */
636
U_CAPI void U_EXPORT2
637
uprv_tzset()
638
0
{
639
0
#if defined(U_TZSET)
640
0
    U_TZSET();
641
#else
642
    /* no initialization*/
643
#endif
644
0
}
645
646
U_CAPI int32_t U_EXPORT2
647
uprv_timezone()
648
0
{
649
0
#ifdef U_TIMEZONE
650
0
    return U_TIMEZONE;
651
#else
652
    time_t t, t1, t2;
653
    struct tm tmrec;
654
    int32_t tdiff = 0;
655
656
    time(&t);
657
    uprv_memcpy( &tmrec, localtime(&t), sizeof(tmrec) );
658
#if U_PLATFORM != U_PF_IPHONE
659
    UBool dst_checked = (tmrec.tm_isdst != 0); /* daylight savings time is checked*/
660
#endif
661
    t1 = mktime(&tmrec);                 /* local time in seconds*/
662
    uprv_memcpy( &tmrec, gmtime(&t), sizeof(tmrec) );
663
    t2 = mktime(&tmrec);                 /* GMT (or UTC) in seconds*/
664
    tdiff = t2 - t1;
665
666
#if U_PLATFORM != U_PF_IPHONE
667
    /* imitate NT behaviour, which returns same timezone offset to GMT for
668
       winter and summer.
669
       This does not work on all platforms. For instance, on glibc on Linux
670
       and on Mac OS 10.5, tdiff calculated above remains the same
671
       regardless of whether DST is in effect or not. iOS is another
672
       platform where this does not work. Linux + glibc and Mac OS 10.5
673
       have U_TIMEZONE defined so that this code is not reached.
674
    */
675
    if (dst_checked)
676
        tdiff += 3600;
677
#endif
678
    return tdiff;
679
#endif
680
0
}
681
682
/* Note that U_TZNAME does *not* have to be tzname, but if it is,
683
   some platforms need to have it declared here. */
684
685
#if defined(U_TZNAME) && (U_PLATFORM == U_PF_IRIX || U_PLATFORM_IS_DARWIN_BASED)
686
/* RS6000 and others reject char **tzname.  */
687
extern U_IMPORT char *U_TZNAME[];
688
#endif
689
690
#if !UCONFIG_NO_FILE_IO && ((U_PLATFORM_IS_DARWIN_BASED && (U_PLATFORM != U_PF_IPHONE || defined(U_TIMEZONE))) || U_PLATFORM_IS_LINUX_BASED || U_PLATFORM == U_PF_BSD || U_PLATFORM == U_PF_SOLARIS)
691
/* These platforms are likely to use Olson timezone IDs. */
692
/* common targets of the symbolic link at TZDEFAULT are:
693
 * "/usr/share/zoneinfo/<olsonID>" default, older Linux distros, macOS to 10.12
694
 * "../usr/share/zoneinfo/<olsonID>" newer Linux distros: Red Hat Enterprise Linux 7, Ubuntu 16, SuSe Linux 12
695
 * "/usr/share/lib/zoneinfo/<olsonID>" Solaris
696
 * "../usr/share/lib/zoneinfo/<olsonID>" Solaris
697
 * "/var/db/timezone/zoneinfo/<olsonID>" macOS 10.13
698
 * To avoid checking lots of paths, just check that the target path
699
 * before the <olsonID> ends with "/zoneinfo/", and the <olsonID> is valid.
700
 */
701
702
#define CHECK_LOCALTIME_LINK 1
703
#if U_PLATFORM_IS_DARWIN_BASED
704
#include <tzfile.h>
705
#define TZZONEINFO      (TZDIR "/")
706
#elif U_PLATFORM == U_PF_SOLARIS
707
#define TZDEFAULT       "/etc/localtime"
708
#define TZZONEINFO      "/usr/share/lib/zoneinfo/"
709
#define TZ_ENV_CHECK    "localtime"
710
#else
711
0
#define TZDEFAULT       "/etc/localtime"
712
0
#define TZZONEINFO      "/usr/share/zoneinfo/"
713
#endif
714
#define TZZONEINFOTAIL  "/zoneinfo/"
715
#if U_HAVE_DIRENT_H
716
#define TZFILE_SKIP     "posixrules" /* tz file to skip when searching. */
717
/* Some Linux distributions have 'localtime' in /usr/share/zoneinfo
718
   symlinked to /etc/localtime, which makes searchForTZFile return
719
   'localtime' when it's the first match. */
720
#define TZFILE_SKIP2    "localtime"
721
#define SEARCH_TZFILE
722
#include <dirent.h>  /* Needed to search through system timezone files */
723
#endif
724
static char gTimeZoneBuffer[PATH_MAX];
725
static char *gTimeZoneBufferPtr = NULL;
726
#endif
727
728
#if !U_PLATFORM_USES_ONLY_WIN32_API
729
0
#define isNonDigit(ch) (ch < '0' || '9' < ch)
730
0
#define isDigit(ch) ('0' <= ch && ch <= '9')
731
0
static UBool isValidOlsonID(const char *id) {
732
0
    int32_t idx = 0;
733
0
    int32_t idxMax = 0;
734
735
    /* Determine if this is something like Iceland (Olson ID)
736
    or AST4ADT (non-Olson ID) */
737
0
    while (id[idx] && isNonDigit(id[idx]) && id[idx] != ',') {
738
0
        idx++;
739
0
    }
740
741
    /* Allow at maximum 2 numbers at the end of the id to support zone id's
742
    like GMT+11. */
743
0
    idxMax = idx + 2;
744
0
    while (id[idx] && isDigit(id[idx]) && idx < idxMax) {
745
0
        idx++;
746
0
    }
747
748
    /* If we went through the whole string, then it might be okay.
749
    The timezone is sometimes set to "CST-7CDT", "CST6CDT5,J129,J131/19:30",
750
    "GRNLNDST3GRNLNDDT" or similar, so we cannot use it.
751
    The rest of the time it could be an Olson ID. George */
752
0
    return (UBool)(id[idx] == 0
753
0
        || uprv_strcmp(id, "PST8PDT") == 0
754
0
        || uprv_strcmp(id, "MST7MDT") == 0
755
0
        || uprv_strcmp(id, "CST6CDT") == 0
756
0
        || uprv_strcmp(id, "EST5EDT") == 0);
757
0
}
758
759
/* On some Unix-like OS, 'posix' subdirectory in
760
   /usr/share/zoneinfo replicates the top-level contents. 'right'
761
   subdirectory has the same set of files, but individual files
762
   are different from those in the top-level directory or 'posix'
763
   because 'right' has files for TAI (Int'l Atomic Time) while 'posix'
764
   has files for UTC.
765
   When the first match for /etc/localtime is in either of them
766
   (usually in posix because 'right' has different file contents),
767
   or TZ environment variable points to one of them, createTimeZone
768
   fails because, say, 'posix/America/New_York' is not an Olson
769
   timezone id ('America/New_York' is). So, we have to skip
770
   'posix/' and 'right/' at the beginning. */
771
0
static void skipZoneIDPrefix(const char** id) {
772
0
    if (uprv_strncmp(*id, "posix/", 6) == 0
773
0
        || uprv_strncmp(*id, "right/", 6) == 0)
774
0
    {
775
0
        *id += 6;
776
0
    }
777
0
}
778
#endif
779
780
#if defined(U_TZNAME) && !U_PLATFORM_USES_ONLY_WIN32_API
781
782
#define CONVERT_HOURS_TO_SECONDS(offset) (int32_t)(offset*3600)
783
typedef struct OffsetZoneMapping {
784
    int32_t offsetSeconds;
785
    int32_t daylightType; /* 0=U_DAYLIGHT_NONE, 1=daylight in June-U_DAYLIGHT_JUNE, 2=daylight in December=U_DAYLIGHT_DECEMBER*/
786
    const char *stdID;
787
    const char *dstID;
788
    const char *olsonID;
789
} OffsetZoneMapping;
790
791
enum { U_DAYLIGHT_NONE=0,U_DAYLIGHT_JUNE=1,U_DAYLIGHT_DECEMBER=2 };
792
793
/*
794
This list tries to disambiguate a set of abbreviated timezone IDs and offsets
795
and maps it to an Olson ID.
796
Before adding anything to this list, take a look at
797
icu/source/tools/tzcode/tz.alias
798
Sometimes no daylight savings (0) is important to define due to aliases.
799
This list can be tested with icu/source/test/compat/tzone.pl
800
More values could be added to daylightType to increase precision.
801
*/
802
static const struct OffsetZoneMapping OFFSET_ZONE_MAPPINGS[] = {
803
    {-45900, 2, "CHAST", "CHADT", "Pacific/Chatham"},
804
    {-43200, 1, "PETT", "PETST", "Asia/Kamchatka"},
805
    {-43200, 2, "NZST", "NZDT", "Pacific/Auckland"},
806
    {-43200, 1, "ANAT", "ANAST", "Asia/Anadyr"},
807
    {-39600, 1, "MAGT", "MAGST", "Asia/Magadan"},
808
    {-37800, 2, "LHST", "LHST", "Australia/Lord_Howe"},
809
    {-36000, 2, "EST", "EST", "Australia/Sydney"},
810
    {-36000, 1, "SAKT", "SAKST", "Asia/Sakhalin"},
811
    {-36000, 1, "VLAT", "VLAST", "Asia/Vladivostok"},
812
    {-34200, 2, "CST", "CST", "Australia/South"},
813
    {-32400, 1, "YAKT", "YAKST", "Asia/Yakutsk"},
814
    {-32400, 1, "CHOT", "CHOST", "Asia/Choibalsan"},
815
    {-31500, 2, "CWST", "CWST", "Australia/Eucla"},
816
    {-28800, 1, "IRKT", "IRKST", "Asia/Irkutsk"},
817
    {-28800, 1, "ULAT", "ULAST", "Asia/Ulaanbaatar"},
818
    {-28800, 2, "WST", "WST", "Australia/West"},
819
    {-25200, 1, "HOVT", "HOVST", "Asia/Hovd"},
820
    {-25200, 1, "KRAT", "KRAST", "Asia/Krasnoyarsk"},
821
    {-21600, 1, "NOVT", "NOVST", "Asia/Novosibirsk"},
822
    {-21600, 1, "OMST", "OMSST", "Asia/Omsk"},
823
    {-18000, 1, "YEKT", "YEKST", "Asia/Yekaterinburg"},
824
    {-14400, 1, "SAMT", "SAMST", "Europe/Samara"},
825
    {-14400, 1, "AMT", "AMST", "Asia/Yerevan"},
826
    {-14400, 1, "AZT", "AZST", "Asia/Baku"},
827
    {-10800, 1, "AST", "ADT", "Asia/Baghdad"},
828
    {-10800, 1, "MSK", "MSD", "Europe/Moscow"},
829
    {-10800, 1, "VOLT", "VOLST", "Europe/Volgograd"},
830
    {-7200, 0, "EET", "CEST", "Africa/Tripoli"},
831
    {-7200, 1, "EET", "EEST", "Europe/Athens"}, /* Conflicts with Africa/Cairo */
832
    {-7200, 1, "IST", "IDT", "Asia/Jerusalem"},
833
    {-3600, 0, "CET", "WEST", "Africa/Algiers"},
834
    {-3600, 2, "WAT", "WAST", "Africa/Windhoek"},
835
    {0, 1, "GMT", "IST", "Europe/Dublin"},
836
    {0, 1, "GMT", "BST", "Europe/London"},
837
    {0, 0, "WET", "WEST", "Africa/Casablanca"},
838
    {0, 0, "WET", "WET", "Africa/El_Aaiun"},
839
    {3600, 1, "AZOT", "AZOST", "Atlantic/Azores"},
840
    {3600, 1, "EGT", "EGST", "America/Scoresbysund"},
841
    {10800, 1, "PMST", "PMDT", "America/Miquelon"},
842
    {10800, 2, "UYT", "UYST", "America/Montevideo"},
843
    {10800, 1, "WGT", "WGST", "America/Godthab"},
844
    {10800, 2, "BRT", "BRST", "Brazil/East"},
845
    {12600, 1, "NST", "NDT", "America/St_Johns"},
846
    {14400, 1, "AST", "ADT", "Canada/Atlantic"},
847
    {14400, 2, "AMT", "AMST", "America/Cuiaba"},
848
    {14400, 2, "CLT", "CLST", "Chile/Continental"},
849
    {14400, 2, "FKT", "FKST", "Atlantic/Stanley"},
850
    {14400, 2, "PYT", "PYST", "America/Asuncion"},
851
    {18000, 1, "CST", "CDT", "America/Havana"},
852
    {18000, 1, "EST", "EDT", "US/Eastern"}, /* Conflicts with America/Grand_Turk */
853
    {21600, 2, "EAST", "EASST", "Chile/EasterIsland"},
854
    {21600, 0, "CST", "MDT", "Canada/Saskatchewan"},
855
    {21600, 0, "CST", "CDT", "America/Guatemala"},
856
    {21600, 1, "CST", "CDT", "US/Central"}, /* Conflicts with Mexico/General */
857
    {25200, 1, "MST", "MDT", "US/Mountain"}, /* Conflicts with Mexico/BajaSur */
858
    {28800, 0, "PST", "PST", "Pacific/Pitcairn"},
859
    {28800, 1, "PST", "PDT", "US/Pacific"}, /* Conflicts with Mexico/BajaNorte */
860
    {32400, 1, "AKST", "AKDT", "US/Alaska"},
861
    {36000, 1, "HAST", "HADT", "US/Aleutian"}
862
};
863
864
/*#define DEBUG_TZNAME*/
865
866
static const char* remapShortTimeZone(const char *stdID, const char *dstID, int32_t daylightType, int32_t offset)
867
0
{
868
0
    int32_t idx;
869
#ifdef DEBUG_TZNAME
870
    fprintf(stderr, "TZ=%s std=%s dst=%s daylight=%d offset=%d\n", getenv("TZ"), stdID, dstID, daylightType, offset);
871
#endif
872
0
    for (idx = 0; idx < UPRV_LENGTHOF(OFFSET_ZONE_MAPPINGS); idx++)
873
0
    {
874
0
        if (offset == OFFSET_ZONE_MAPPINGS[idx].offsetSeconds
875
0
            && daylightType == OFFSET_ZONE_MAPPINGS[idx].daylightType
876
0
            && strcmp(OFFSET_ZONE_MAPPINGS[idx].stdID, stdID) == 0
877
0
            && strcmp(OFFSET_ZONE_MAPPINGS[idx].dstID, dstID) == 0)
878
0
        {
879
0
            return OFFSET_ZONE_MAPPINGS[idx].olsonID;
880
0
        }
881
0
    }
882
0
    return NULL;
883
0
}
884
#endif
885
886
#ifdef SEARCH_TZFILE
887
0
#define MAX_READ_SIZE 512
888
889
typedef struct DefaultTZInfo {
890
    char* defaultTZBuffer;
891
    int64_t defaultTZFileSize;
892
    FILE* defaultTZFilePtr;
893
    UBool defaultTZstatus;
894
    int32_t defaultTZPosition;
895
} DefaultTZInfo;
896
897
/*
898
 * This method compares the two files given to see if they are a match.
899
 * It is currently use to compare two TZ files.
900
 */
901
0
static UBool compareBinaryFiles(const char* defaultTZFileName, const char* TZFileName, DefaultTZInfo* tzInfo) {
902
0
    FILE* file;
903
0
    int64_t sizeFile;
904
0
    int64_t sizeFileLeft;
905
0
    int32_t sizeFileRead;
906
0
    int32_t sizeFileToRead;
907
0
    char bufferFile[MAX_READ_SIZE];
908
0
    UBool result = TRUE;
909
910
0
    if (tzInfo->defaultTZFilePtr == NULL) {
911
0
        tzInfo->defaultTZFilePtr = fopen(defaultTZFileName, "r");
912
0
    }
913
0
    file = fopen(TZFileName, "r");
914
915
0
    tzInfo->defaultTZPosition = 0; /* reset position to begin search */
916
917
0
    if (file != NULL && tzInfo->defaultTZFilePtr != NULL) {
918
        /* First check that the file size are equal. */
919
0
        if (tzInfo->defaultTZFileSize == 0) {
920
0
            fseek(tzInfo->defaultTZFilePtr, 0, SEEK_END);
921
0
            tzInfo->defaultTZFileSize = ftell(tzInfo->defaultTZFilePtr);
922
0
        }
923
0
        fseek(file, 0, SEEK_END);
924
0
        sizeFile = ftell(file);
925
0
        sizeFileLeft = sizeFile;
926
927
0
        if (sizeFile != tzInfo->defaultTZFileSize) {
928
0
            result = FALSE;
929
0
        } else {
930
            /* Store the data from the files in separate buffers and
931
             * compare each byte to determine equality.
932
             */
933
0
            if (tzInfo->defaultTZBuffer == NULL) {
934
0
                rewind(tzInfo->defaultTZFilePtr);
935
0
                tzInfo->defaultTZBuffer = (char*)uprv_malloc(sizeof(char) * tzInfo->defaultTZFileSize);
936
0
                sizeFileRead = fread(tzInfo->defaultTZBuffer, 1, tzInfo->defaultTZFileSize, tzInfo->defaultTZFilePtr);
937
0
            }
938
0
            rewind(file);
939
0
            while(sizeFileLeft > 0) {
940
0
                uprv_memset(bufferFile, 0, MAX_READ_SIZE);
941
0
                sizeFileToRead = sizeFileLeft < MAX_READ_SIZE ? sizeFileLeft : MAX_READ_SIZE;
942
943
0
                sizeFileRead = fread(bufferFile, 1, sizeFileToRead, file);
944
0
                if (memcmp(tzInfo->defaultTZBuffer + tzInfo->defaultTZPosition, bufferFile, sizeFileRead) != 0) {
945
0
                    result = FALSE;
946
0
                    break;
947
0
                }
948
0
                sizeFileLeft -= sizeFileRead;
949
0
                tzInfo->defaultTZPosition += sizeFileRead;
950
0
            }
951
0
        }
952
0
    } else {
953
0
        result = FALSE;
954
0
    }
955
956
0
    if (file != NULL) {
957
0
        fclose(file);
958
0
    }
959
960
0
    return result;
961
0
}
962
963
964
/* dirent also lists two entries: "." and ".." that we can safely ignore. */
965
#define SKIP1 "."
966
#define SKIP2 ".."
967
static UBool U_CALLCONV putil_cleanup(void);
968
static CharString *gSearchTZFileResult = NULL;
969
970
/*
971
 * This method recursively traverses the directory given for a matching TZ file and returns the first match.
972
 * This function is not thread safe - it uses a global, gSearchTZFileResult, to hold its results.
973
 */
974
0
static char* searchForTZFile(const char* path, DefaultTZInfo* tzInfo) {
975
0
    DIR* dirp = NULL;
976
0
    struct dirent* dirEntry = NULL;
977
0
    char* result = NULL;
978
0
    UErrorCode status = U_ZERO_ERROR;
979
980
    /* Save the current path */
981
0
    CharString curpath(path, -1, status);
982
0
    if (U_FAILURE(status)) {
983
0
        goto cleanupAndReturn;
984
0
    }
985
986
0
    dirp = opendir(path);
987
0
    if (dirp == NULL) {
988
0
        goto cleanupAndReturn;
989
0
    }
990
991
0
    if (gSearchTZFileResult == NULL) {
992
0
        gSearchTZFileResult = new CharString;
993
0
        if (gSearchTZFileResult == NULL) {
994
0
            goto cleanupAndReturn;
995
0
        }
996
0
        ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
997
0
    }
998
999
    /* Check each entry in the directory. */
1000
0
    while((dirEntry = readdir(dirp)) != NULL) {
1001
0
        const char* dirName = dirEntry->d_name;
1002
0
        if (uprv_strcmp(dirName, SKIP1) != 0 && uprv_strcmp(dirName, SKIP2) != 0
1003
0
            && uprv_strcmp(TZFILE_SKIP, dirName) != 0 && uprv_strcmp(TZFILE_SKIP2, dirName) != 0) {
1004
            /* Create a newpath with the new entry to test each entry in the directory. */
1005
0
            CharString newpath(curpath, status);
1006
0
            newpath.append(dirName, -1, status);
1007
0
            if (U_FAILURE(status)) {
1008
0
                break;
1009
0
            }
1010
1011
0
            DIR* subDirp = NULL;
1012
0
            if ((subDirp = opendir(newpath.data())) != NULL) {
1013
                /* If this new path is a directory, make a recursive call with the newpath. */
1014
0
                closedir(subDirp);
1015
0
                newpath.append('/', status);
1016
0
                if (U_FAILURE(status)) {
1017
0
                    break;
1018
0
                }
1019
0
                result = searchForTZFile(newpath.data(), tzInfo);
1020
                /*
1021
                 Have to get out here. Otherwise, we'd keep looking
1022
                 and return the first match in the top-level directory
1023
                 if there's a match in the top-level. If not, this function
1024
                 would return NULL and set gTimeZoneBufferPtr to NULL in initDefault().
1025
                 It worked without this in most cases because we have a fallback of calling
1026
                 localtime_r to figure out the default timezone.
1027
                */
1028
0
                if (result != NULL)
1029
0
                    break;
1030
0
            } else {
1031
0
                if(compareBinaryFiles(TZDEFAULT, newpath.data(), tzInfo)) {
1032
0
                    int32_t amountToSkip = sizeof(TZZONEINFO) - 1;
1033
0
                    if (amountToSkip > newpath.length()) {
1034
0
                        amountToSkip = newpath.length();
1035
0
                    }
1036
0
                    const char* zoneid = newpath.data() + amountToSkip;
1037
0
                    skipZoneIDPrefix(&zoneid);
1038
0
                    gSearchTZFileResult->clear();
1039
0
                    gSearchTZFileResult->append(zoneid, -1, status);
1040
0
                    if (U_FAILURE(status)) {
1041
0
                        break;
1042
0
                    }
1043
0
                    result = gSearchTZFileResult->data();
1044
                    /* Get out after the first one found. */
1045
0
                    break;
1046
0
                }
1047
0
            }
1048
0
        }
1049
0
    }
1050
1051
0
  cleanupAndReturn:
1052
0
    if (dirp) {
1053
0
        closedir(dirp);
1054
0
    }
1055
0
    return result;
1056
0
}
1057
#endif
1058
1059
#if U_PLATFORM == U_PF_ANDROID
1060
typedef int(system_property_read_callback)(const prop_info* info,
1061
                                           void (*callback)(void* cookie,
1062
                                                            const char* name,
1063
                                                            const char* value,
1064
                                                            uint32_t serial),
1065
                                           void* cookie);
1066
typedef int(system_property_get)(const char*, char*);
1067
1068
static char gAndroidTimeZone[PROP_VALUE_MAX] = { '\0' };
1069
1070
static void u_property_read(void* cookie, const char* name, const char* value,
1071
                            uint32_t serial) {
1072
    uprv_strcpy((char* )cookie, value);
1073
}
1074
#endif
1075
1076
U_CAPI void U_EXPORT2
1077
uprv_tzname_clear_cache(void)
1078
0
{
1079
#if U_PLATFORM == U_PF_ANDROID
1080
    /* Android's timezone is stored in system property. */
1081
    gAndroidTimeZone[0] = '\0';
1082
    void* libc = dlopen("libc.so", RTLD_NOLOAD);
1083
    if (libc) {
1084
        /* Android API 26+ has new API to get system property and old API
1085
         * (__system_property_get) is deprecated */
1086
        system_property_read_callback* property_read_callback =
1087
            (system_property_read_callback*)dlsym(
1088
                libc, "__system_property_read_callback");
1089
        if (property_read_callback) {
1090
            const prop_info* info =
1091
                __system_property_find("persist.sys.timezone");
1092
            if (info) {
1093
                property_read_callback(info, &u_property_read, gAndroidTimeZone);
1094
            }
1095
        } else {
1096
            system_property_get* property_get =
1097
                (system_property_get*)dlsym(libc, "__system_property_get");
1098
            if (property_get) {
1099
                property_get("persist.sys.timezone", gAndroidTimeZone);
1100
            }
1101
        }
1102
        dlclose(libc);
1103
    }
1104
#endif
1105
1106
0
#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1107
0
    gTimeZoneBufferPtr = NULL;
1108
0
#endif
1109
0
}
1110
1111
U_CAPI const char* U_EXPORT2
1112
uprv_tzname(int n)
1113
0
{
1114
0
    (void)n; // Avoid unreferenced parameter warning.
1115
0
    const char *tzid = NULL;
1116
#if U_PLATFORM_USES_ONLY_WIN32_API
1117
    tzid = uprv_detectWindowsTimeZone();
1118
1119
    if (tzid != NULL) {
1120
        return tzid;
1121
    }
1122
1123
#ifndef U_TZNAME
1124
    // The return value is free'd in timezone.cpp on Windows because
1125
    // the other code path returns a pointer to a heap location.
1126
    // If we don't have a name already, then tzname wouldn't be any
1127
    // better, so just fall back.
1128
    return uprv_strdup("");
1129
#endif // !U_TZNAME
1130
1131
#else
1132
1133
/*#if U_PLATFORM_IS_DARWIN_BASED
1134
    int ret;
1135
1136
    tzid = getenv("TZFILE");
1137
    if (tzid != NULL) {
1138
        return tzid;
1139
    }
1140
#endif*/
1141
1142
/* This code can be temporarily disabled to test tzname resolution later on. */
1143
0
#ifndef DEBUG_TZNAME
1144
#if U_PLATFORM == U_PF_ANDROID
1145
    tzid = gAndroidTimeZone;
1146
#else
1147
0
    tzid = getenv("TZ");
1148
0
#endif
1149
0
    if (tzid != NULL && isValidOlsonID(tzid)
1150
#if U_PLATFORM == U_PF_SOLARIS
1151
    /* Don't misinterpret TZ "localtime" on Solaris as a time zone name. */
1152
        && uprv_strcmp(tzid, TZ_ENV_CHECK) != 0
1153
#endif
1154
0
    ) {
1155
        /* The colon forces tzset() to treat the remainder as zoneinfo path */
1156
0
        if (tzid[0] == ':') {
1157
0
            tzid++;
1158
0
        }
1159
        /* This might be a good Olson ID. */
1160
0
        skipZoneIDPrefix(&tzid);
1161
0
        return tzid;
1162
0
    }
1163
    /* else U_TZNAME will give a better result. */
1164
0
#endif
1165
1166
0
#if defined(CHECK_LOCALTIME_LINK) && !defined(DEBUG_SKIP_LOCALTIME_LINK)
1167
    /* Caller must handle threading issues */
1168
0
    if (gTimeZoneBufferPtr == NULL) {
1169
        /*
1170
        This is a trick to look at the name of the link to get the Olson ID
1171
        because the tzfile contents is underspecified.
1172
        This isn't guaranteed to work because it may not be a symlink.
1173
        */
1174
0
        int32_t ret = (int32_t)readlink(TZDEFAULT, gTimeZoneBuffer, sizeof(gTimeZoneBuffer)-1);
1175
0
        if (0 < ret) {
1176
0
            int32_t tzZoneInfoTailLen = uprv_strlen(TZZONEINFOTAIL);
1177
0
            gTimeZoneBuffer[ret] = 0;
1178
0
            char *  tzZoneInfoTailPtr = uprv_strstr(gTimeZoneBuffer, TZZONEINFOTAIL);
1179
1180
0
            if (tzZoneInfoTailPtr != NULL
1181
0
                && isValidOlsonID(tzZoneInfoTailPtr + tzZoneInfoTailLen))
1182
0
            {
1183
0
                return (gTimeZoneBufferPtr = tzZoneInfoTailPtr + tzZoneInfoTailLen);
1184
0
            }
1185
0
        } else {
1186
0
#if defined(SEARCH_TZFILE)
1187
0
            DefaultTZInfo* tzInfo = (DefaultTZInfo*)uprv_malloc(sizeof(DefaultTZInfo));
1188
0
            if (tzInfo != NULL) {
1189
0
                tzInfo->defaultTZBuffer = NULL;
1190
0
                tzInfo->defaultTZFileSize = 0;
1191
0
                tzInfo->defaultTZFilePtr = NULL;
1192
0
                tzInfo->defaultTZstatus = FALSE;
1193
0
                tzInfo->defaultTZPosition = 0;
1194
1195
0
                gTimeZoneBufferPtr = searchForTZFile(TZZONEINFO, tzInfo);
1196
1197
                /* Free previously allocated memory */
1198
0
                if (tzInfo->defaultTZBuffer != NULL) {
1199
0
                    uprv_free(tzInfo->defaultTZBuffer);
1200
0
                }
1201
0
                if (tzInfo->defaultTZFilePtr != NULL) {
1202
0
                    fclose(tzInfo->defaultTZFilePtr);
1203
0
                }
1204
0
                uprv_free(tzInfo);
1205
0
            }
1206
1207
0
            if (gTimeZoneBufferPtr != NULL && isValidOlsonID(gTimeZoneBufferPtr)) {
1208
0
                return gTimeZoneBufferPtr;
1209
0
            }
1210
0
#endif
1211
0
        }
1212
0
    }
1213
0
    else {
1214
0
        return gTimeZoneBufferPtr;
1215
0
    }
1216
0
#endif
1217
0
#endif
1218
1219
0
#ifdef U_TZNAME
1220
#if U_PLATFORM_USES_ONLY_WIN32_API
1221
    /* The return value is free'd in timezone.cpp on Windows because
1222
     * the other code path returns a pointer to a heap location. */
1223
    return uprv_strdup(U_TZNAME[n]);
1224
#else
1225
    /*
1226
    U_TZNAME is usually a non-unique abbreviation, which isn't normally usable.
1227
    So we remap the abbreviation to an olson ID.
1228
1229
    Since Windows exposes a little more timezone information,
1230
    we normally don't use this code on Windows because
1231
    uprv_detectWindowsTimeZone should have already given the correct answer.
1232
    */
1233
0
    {
1234
0
        struct tm juneSol, decemberSol;
1235
0
        int daylightType;
1236
0
        static const time_t juneSolstice=1182478260; /*2007-06-21 18:11 UT*/
1237
0
        static const time_t decemberSolstice=1198332540; /*2007-12-22 06:09 UT*/
1238
1239
        /* This probing will tell us when daylight savings occurs.  */
1240
0
        localtime_r(&juneSolstice, &juneSol);
1241
0
        localtime_r(&decemberSolstice, &decemberSol);
1242
0
        if(decemberSol.tm_isdst > 0) {
1243
0
          daylightType = U_DAYLIGHT_DECEMBER;
1244
0
        } else if(juneSol.tm_isdst > 0) {
1245
0
          daylightType = U_DAYLIGHT_JUNE;
1246
0
        } else {
1247
0
          daylightType = U_DAYLIGHT_NONE;
1248
0
        }
1249
0
        tzid = remapShortTimeZone(U_TZNAME[0], U_TZNAME[1], daylightType, uprv_timezone());
1250
0
        if (tzid != NULL) {
1251
0
            return tzid;
1252
0
        }
1253
0
    }
1254
0
    return U_TZNAME[n];
1255
0
#endif
1256
#else
1257
    return "";
1258
#endif
1259
0
}
1260
1261
/* Get and set the ICU data directory --------------------------------------- */
1262
1263
static icu::UInitOnce gDataDirInitOnce = U_INITONCE_INITIALIZER;
1264
static char *gDataDirectory = NULL;
1265
1266
UInitOnce gTimeZoneFilesInitOnce = U_INITONCE_INITIALIZER;
1267
static CharString *gTimeZoneFilesDirectory = NULL;
1268
1269
#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1270
 static const char *gCorrectedPOSIXLocale = NULL; /* Sometimes heap allocated */
1271
 static bool gCorrectedPOSIXLocaleHeapAllocated = false;
1272
#endif
1273
1274
static UBool U_CALLCONV putil_cleanup(void)
1275
0
{
1276
0
    if (gDataDirectory && *gDataDirectory) {
1277
0
        uprv_free(gDataDirectory);
1278
0
    }
1279
0
    gDataDirectory = NULL;
1280
0
    gDataDirInitOnce.reset();
1281
1282
0
    delete gTimeZoneFilesDirectory;
1283
0
    gTimeZoneFilesDirectory = NULL;
1284
0
    gTimeZoneFilesInitOnce.reset();
1285
1286
0
#ifdef SEARCH_TZFILE
1287
0
    delete gSearchTZFileResult;
1288
0
    gSearchTZFileResult = NULL;
1289
0
#endif
1290
1291
0
#if U_POSIX_LOCALE || U_PLATFORM_USES_ONLY_WIN32_API
1292
0
    if (gCorrectedPOSIXLocale && gCorrectedPOSIXLocaleHeapAllocated) {
1293
0
        uprv_free(const_cast<char *>(gCorrectedPOSIXLocale));
1294
0
        gCorrectedPOSIXLocale = NULL;
1295
0
        gCorrectedPOSIXLocaleHeapAllocated = false;
1296
0
    }
1297
0
#endif
1298
0
    return TRUE;
1299
0
}
1300
1301
/*
1302
 * Set the data directory.
1303
 *    Make a copy of the passed string, and set the global data dir to point to it.
1304
 */
1305
U_CAPI void U_EXPORT2
1306
0
u_setDataDirectory(const char *directory) {
1307
0
    char *newDataDir;
1308
0
    int32_t length;
1309
1310
0
    if(directory==NULL || *directory==0) {
1311
        /* A small optimization to prevent the malloc and copy when the
1312
        shared library is used, and this is a way to make sure that NULL
1313
        is never returned.
1314
        */
1315
0
        newDataDir = (char *)"";
1316
0
    }
1317
0
    else {
1318
0
        length=(int32_t)uprv_strlen(directory);
1319
0
        newDataDir = (char *)uprv_malloc(length + 2);
1320
        /* Exit out if newDataDir could not be created. */
1321
0
        if (newDataDir == NULL) {
1322
0
            return;
1323
0
        }
1324
0
        uprv_strcpy(newDataDir, directory);
1325
1326
#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1327
        {
1328
            char *p;
1329
            while((p = uprv_strchr(newDataDir, U_FILE_ALT_SEP_CHAR)) != NULL) {
1330
                *p = U_FILE_SEP_CHAR;
1331
            }
1332
        }
1333
#endif
1334
0
    }
1335
1336
0
    if (gDataDirectory && *gDataDirectory) {
1337
0
        uprv_free(gDataDirectory);
1338
0
    }
1339
0
    gDataDirectory = newDataDir;
1340
0
    ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1341
0
}
1342
1343
U_CAPI UBool U_EXPORT2
1344
uprv_pathIsAbsolute(const char *path)
1345
0
{
1346
0
  if(!path || !*path) {
1347
0
    return FALSE;
1348
0
  }
1349
1350
0
  if(*path == U_FILE_SEP_CHAR) {
1351
0
    return TRUE;
1352
0
  }
1353
1354
#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1355
  if(*path == U_FILE_ALT_SEP_CHAR) {
1356
    return TRUE;
1357
  }
1358
#endif
1359
1360
#if U_PLATFORM_USES_ONLY_WIN32_API
1361
  if( (((path[0] >= 'A') && (path[0] <= 'Z')) ||
1362
       ((path[0] >= 'a') && (path[0] <= 'z'))) &&
1363
      path[1] == ':' ) {
1364
    return TRUE;
1365
  }
1366
#endif
1367
1368
0
  return FALSE;
1369
0
}
1370
1371
/* Backup setting of ICU_DATA_DIR_PREFIX_ENV_VAR
1372
   (needed for some Darwin ICU build environments) */
1373
#if U_PLATFORM_IS_DARWIN_BASED && defined(TARGET_OS_SIMULATOR) && TARGET_OS_SIMULATOR
1374
# if !defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1375
#  define ICU_DATA_DIR_PREFIX_ENV_VAR "IPHONE_SIMULATOR_ROOT"
1376
# endif
1377
#endif
1378
1379
#if defined(ICU_DATA_DIR_WINDOWS)
1380
// Helper function to get the ICU Data Directory under the Windows directory location.
1381
static BOOL U_CALLCONV getIcuDataDirectoryUnderWindowsDirectory(char* directoryBuffer, UINT bufferLength)
1382
{
1383
    wchar_t windowsPath[MAX_PATH];
1384
    char windowsPathUtf8[MAX_PATH];
1385
1386
    UINT length = GetSystemWindowsDirectoryW(windowsPath, UPRV_LENGTHOF(windowsPath));
1387
    if ((length > 0) && (length < (UPRV_LENGTHOF(windowsPath) - 1))) {
1388
        // Convert UTF-16 to a UTF-8 string.
1389
        UErrorCode status = U_ZERO_ERROR;
1390
        int32_t windowsPathUtf8Len = 0;
1391
        u_strToUTF8(windowsPathUtf8, static_cast<int32_t>(UPRV_LENGTHOF(windowsPathUtf8)),
1392
            &windowsPathUtf8Len, reinterpret_cast<const UChar*>(windowsPath), -1, &status);
1393
1394
        if (U_SUCCESS(status) && (status != U_STRING_NOT_TERMINATED_WARNING) &&
1395
            (windowsPathUtf8Len < (UPRV_LENGTHOF(windowsPathUtf8) - 1))) {
1396
            // Ensure it always has a separator, so we can append the ICU data path.
1397
            if (windowsPathUtf8[windowsPathUtf8Len - 1] != U_FILE_SEP_CHAR) {
1398
                windowsPathUtf8[windowsPathUtf8Len++] = U_FILE_SEP_CHAR;
1399
                windowsPathUtf8[windowsPathUtf8Len] = '\0';
1400
            }
1401
            // Check if the concatenated string will fit.
1402
            if ((windowsPathUtf8Len + UPRV_LENGTHOF(ICU_DATA_DIR_WINDOWS)) < bufferLength) {
1403
                uprv_strcpy(directoryBuffer, windowsPathUtf8);
1404
                uprv_strcat(directoryBuffer, ICU_DATA_DIR_WINDOWS);
1405
                return TRUE;
1406
            }
1407
        }
1408
    }
1409
1410
    return FALSE;
1411
}
1412
#endif
1413
1414
0
static void U_CALLCONV dataDirectoryInitFn() {
1415
    /* If we already have the directory, then return immediately. Will happen if user called
1416
     * u_setDataDirectory().
1417
     */
1418
0
    if (gDataDirectory) {
1419
0
        return;
1420
0
    }
1421
1422
0
    const char *path = NULL;
1423
#if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1424
    char datadir_path_buffer[PATH_MAX];
1425
#endif
1426
1427
    /*
1428
    When ICU_NO_USER_DATA_OVERRIDE is defined, users aren't allowed to
1429
    override ICU's data with the ICU_DATA environment variable. This prevents
1430
    problems where multiple custom copies of ICU's specific version of data
1431
    are installed on a system. Either the application must define the data
1432
    directory with u_setDataDirectory, define ICU_DATA_DIR when compiling
1433
    ICU, set the data with udata_setCommonData or trust that all of the
1434
    required data is contained in ICU's data library that contains
1435
    the entry point defined by U_ICUDATA_ENTRY_POINT.
1436
1437
    There may also be some platforms where environment variables
1438
    are not allowed.
1439
    */
1440
0
#   if !defined(ICU_NO_USER_DATA_OVERRIDE) && !UCONFIG_NO_FILE_IO
1441
    /* First try to get the environment variable */
1442
0
#     if U_PLATFORM_HAS_WINUWP_API == 0  // Windows UWP does not support getenv
1443
0
        path=getenv("ICU_DATA");
1444
0
#     endif
1445
0
#   endif
1446
1447
    /* ICU_DATA_DIR may be set as a compile option.
1448
     * U_ICU_DATA_DEFAULT_DIR is provided and is set by ICU at compile time
1449
     * and is used only when data is built in archive mode eliminating the need
1450
     * for ICU_DATA_DIR to be set. U_ICU_DATA_DEFAULT_DIR is set to the installation
1451
     * directory of the data dat file. Users should use ICU_DATA_DIR if they want to
1452
     * set their own path.
1453
     */
1454
#if defined(ICU_DATA_DIR) || defined(U_ICU_DATA_DEFAULT_DIR)
1455
    if(path==NULL || *path==0) {
1456
# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1457
        const char *prefix = getenv(ICU_DATA_DIR_PREFIX_ENV_VAR);
1458
# endif
1459
# ifdef ICU_DATA_DIR
1460
        path=ICU_DATA_DIR;
1461
# else
1462
        path=U_ICU_DATA_DEFAULT_DIR;
1463
# endif
1464
# if defined(ICU_DATA_DIR_PREFIX_ENV_VAR)
1465
        if (prefix != NULL) {
1466
            snprintf(datadir_path_buffer, PATH_MAX, "%s%s", prefix, path);
1467
            path=datadir_path_buffer;
1468
        }
1469
# endif
1470
    }
1471
#endif
1472
1473
#if defined(ICU_DATA_DIR_WINDOWS)
1474
    char datadir_path_buffer[MAX_PATH];
1475
    if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
1476
        path = datadir_path_buffer;
1477
    }
1478
#endif
1479
1480
0
    if(path==NULL) {
1481
        /* It looks really bad, set it to something. */
1482
0
        path = "";
1483
0
    }
1484
1485
0
    u_setDataDirectory(path);
1486
0
    return;
1487
0
}
1488
1489
U_CAPI const char * U_EXPORT2
1490
0
u_getDataDirectory(void) {
1491
0
    umtx_initOnce(gDataDirInitOnce, &dataDirectoryInitFn);
1492
0
    return gDataDirectory;
1493
0
}
1494
1495
0
static void setTimeZoneFilesDir(const char *path, UErrorCode &status) {
1496
0
    if (U_FAILURE(status)) {
1497
0
        return;
1498
0
    }
1499
0
    gTimeZoneFilesDirectory->clear();
1500
0
    gTimeZoneFilesDirectory->append(path, status);
1501
#if (U_FILE_SEP_CHAR != U_FILE_ALT_SEP_CHAR)
1502
    char *p = gTimeZoneFilesDirectory->data();
1503
    while ((p = uprv_strchr(p, U_FILE_ALT_SEP_CHAR)) != NULL) {
1504
        *p = U_FILE_SEP_CHAR;
1505
    }
1506
#endif
1507
0
}
1508
1509
#define TO_STRING(x) TO_STRING_2(x)
1510
#define TO_STRING_2(x) #x
1511
1512
0
static void U_CALLCONV TimeZoneDataDirInitFn(UErrorCode &status) {
1513
0
    U_ASSERT(gTimeZoneFilesDirectory == NULL);
1514
0
    ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1515
0
    gTimeZoneFilesDirectory = new CharString();
1516
0
    if (gTimeZoneFilesDirectory == NULL) {
1517
0
        status = U_MEMORY_ALLOCATION_ERROR;
1518
0
        return;
1519
0
    }
1520
1521
0
    const char *dir = "";
1522
1523
#if defined(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR)
1524
    char timezonefilesdir_path_buffer[PATH_MAX];
1525
    const char *prefix = getenv(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR);
1526
#endif
1527
1528
#if U_PLATFORM_HAS_WINUWP_API == 1
1529
// The UWP version does not support the environment variable setting.
1530
1531
# if defined(ICU_DATA_DIR_WINDOWS)
1532
    // When using the Windows system data, we can possibly pick up time zone data from the Windows directory.
1533
    char datadir_path_buffer[MAX_PATH];
1534
    if (getIcuDataDirectoryUnderWindowsDirectory(datadir_path_buffer, UPRV_LENGTHOF(datadir_path_buffer))) {
1535
        dir = datadir_path_buffer;
1536
    }
1537
# endif
1538
1539
#else
1540
0
    dir = getenv("ICU_TIMEZONE_FILES_DIR");
1541
0
#endif // U_PLATFORM_HAS_WINUWP_API
1542
1543
#if defined(U_TIMEZONE_FILES_DIR)
1544
    if (dir == NULL) {
1545
        // Build time configuration setting.
1546
        dir = TO_STRING(U_TIMEZONE_FILES_DIR);
1547
    }
1548
#endif
1549
1550
0
    if (dir == NULL) {
1551
0
        dir = "";
1552
0
    }
1553
1554
#if defined(ICU_TIMEZONE_FILES_DIR_PREFIX_ENV_VAR)
1555
    if (prefix != NULL) {
1556
        snprintf(timezonefilesdir_path_buffer, PATH_MAX, "%s%s", prefix, dir);
1557
        dir = timezonefilesdir_path_buffer;
1558
    }
1559
#endif
1560
1561
0
    setTimeZoneFilesDir(dir, status);
1562
0
}
1563
1564
1565
U_CAPI const char * U_EXPORT2
1566
0
u_getTimeZoneFilesDirectory(UErrorCode *status) {
1567
0
    umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1568
0
    return U_SUCCESS(*status) ? gTimeZoneFilesDirectory->data() : "";
1569
0
}
1570
1571
U_CAPI void U_EXPORT2
1572
0
u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status) {
1573
0
    umtx_initOnce(gTimeZoneFilesInitOnce, &TimeZoneDataDirInitFn, *status);
1574
0
    setTimeZoneFilesDir(path, *status);
1575
1576
    // Note: this function does some extra churn, first setting based on the
1577
    //       environment, then immediately replacing with the value passed in.
1578
    //       The logic is simpler that way, and performance shouldn't be an issue.
1579
0
}
1580
1581
1582
#if U_POSIX_LOCALE
1583
/* A helper function used by uprv_getPOSIXIDForDefaultLocale and
1584
 * uprv_getPOSIXIDForDefaultCodepage. Returns the posix locale id for
1585
 * LC_CTYPE and LC_MESSAGES. It doesn't support other locale categories.
1586
 */
1587
static const char *uprv_getPOSIXIDForCategory(int category)
1588
0
{
1589
0
    const char* posixID = NULL;
1590
0
    if (category == LC_MESSAGES || category == LC_CTYPE) {
1591
        /*
1592
        * On Solaris two different calls to setlocale can result in
1593
        * different values. Only get this value once.
1594
        *
1595
        * We must check this first because an application can set this.
1596
        *
1597
        * LC_ALL can't be used because it's platform dependent. The LANG
1598
        * environment variable seems to affect LC_CTYPE variable by default.
1599
        * Here is what setlocale(LC_ALL, NULL) can return.
1600
        * HPUX can return 'C C C C C C C'
1601
        * Solaris can return /en_US/C/C/C/C/C on the second try.
1602
        * Linux can return LC_CTYPE=C;LC_NUMERIC=C;...
1603
        *
1604
        * The default codepage detection also needs to use LC_CTYPE.
1605
        *
1606
        * Do not call setlocale(LC_*, "")! Using an empty string instead
1607
        * of NULL, will modify the libc behavior.
1608
        */
1609
0
        posixID = setlocale(category, NULL);
1610
0
        if ((posixID == 0)
1611
0
            || (uprv_strcmp("C", posixID) == 0)
1612
0
            || (uprv_strcmp("POSIX", posixID) == 0))
1613
0
        {
1614
            /* Maybe we got some garbage.  Try something more reasonable */
1615
0
            posixID = getenv("LC_ALL");
1616
            /* Solaris speaks POSIX -  See IEEE Std 1003.1-2008
1617
             * This is needed to properly handle empty env. variables
1618
             */
1619
#if U_PLATFORM == U_PF_SOLARIS
1620
            if ((posixID == 0) || (posixID[0] == '\0')) {
1621
                posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1622
                if ((posixID == 0) || (posixID[0] == '\0')) {
1623
#else
1624
0
            if (posixID == 0) {
1625
0
                posixID = getenv(category == LC_MESSAGES ? "LC_MESSAGES" : "LC_CTYPE");
1626
0
                if (posixID == 0) {
1627
0
#endif
1628
0
                    posixID = getenv("LANG");
1629
0
                }
1630
0
            }
1631
0
        }
1632
0
    }
1633
0
    if ((posixID==0)
1634
0
        || (uprv_strcmp("C", posixID) == 0)
1635
0
        || (uprv_strcmp("POSIX", posixID) == 0))
1636
0
    {
1637
        /* Nothing worked.  Give it a nice POSIX default value. */
1638
0
        posixID = "en_US_POSIX";
1639
        // Note: this test will not catch 'C.UTF-8',
1640
        // that will be handled in uprv_getDefaultLocaleID().
1641
        // Leave this mapping here for the uprv_getPOSIXIDForDefaultCodepage()
1642
        // caller which expects to see "en_US_POSIX" in many branches.
1643
0
    }
1644
0
    return posixID;
1645
0
}
1646
1647
/* Return just the POSIX id for the default locale, whatever happens to be in
1648
 * it. It gets the value from LC_MESSAGES and indirectly from LC_ALL and LANG.
1649
 */
1650
static const char *uprv_getPOSIXIDForDefaultLocale(void)
1651
0
{
1652
0
    static const char* posixID = NULL;
1653
0
    if (posixID == 0) {
1654
0
        posixID = uprv_getPOSIXIDForCategory(LC_MESSAGES);
1655
0
    }
1656
0
    return posixID;
1657
0
}
1658
1659
#if !U_CHARSET_IS_UTF8
1660
/* Return just the POSIX id for the default codepage, whatever happens to be in
1661
 * it. It gets the value from LC_CTYPE and indirectly from LC_ALL and LANG.
1662
 */
1663
static const char *uprv_getPOSIXIDForDefaultCodepage(void)
1664
{
1665
    static const char* posixID = NULL;
1666
    if (posixID == 0) {
1667
        posixID = uprv_getPOSIXIDForCategory(LC_CTYPE);
1668
    }
1669
    return posixID;
1670
}
1671
#endif
1672
#endif
1673
1674
/* NOTE: The caller should handle thread safety */
1675
U_CAPI const char* U_EXPORT2
1676
uprv_getDefaultLocaleID()
1677
0
{
1678
0
#if U_POSIX_LOCALE
1679
/*
1680
  Note that:  (a '!' means the ID is improper somehow)
1681
     LC_ALL  ---->     default_loc          codepage
1682
--------------------------------------------------------
1683
     ab.CD             ab                   CD
1684
     ab@CD             ab__CD               -
1685
     ab@CD.EF          ab__CD               EF
1686
1687
     ab_CD.EF@GH       ab_CD_GH             EF
1688
1689
Some 'improper' ways to do the same as above:
1690
  !  ab_CD@GH.EF       ab_CD_GH             EF
1691
  !  ab_CD.EF@GH.IJ    ab_CD_GH             EF
1692
  !  ab_CD@ZZ.EF@GH.IJ ab_CD_GH             EF
1693
1694
     _CD@GH            _CD_GH               -
1695
     _CD.EF@GH         _CD_GH               EF
1696
1697
The variant cannot have dots in it.
1698
The 'rightmost' variant (@xxx) wins.
1699
The leftmost codepage (.xxx) wins.
1700
*/
1701
0
    const char* posixID = uprv_getPOSIXIDForDefaultLocale();
1702
1703
    /* Format: (no spaces)
1704
    ll [ _CC ] [ . MM ] [ @ VV]
1705
1706
      l = lang, C = ctry, M = charmap, V = variant
1707
    */
1708
1709
0
    if (gCorrectedPOSIXLocale != nullptr) {
1710
0
        return gCorrectedPOSIXLocale;
1711
0
    }
1712
1713
    // Copy the ID into owned memory.
1714
    // Over-allocate in case we replace "C" with "en_US_POSIX" (+10), + null termination
1715
0
    char *correctedPOSIXLocale = static_cast<char *>(uprv_malloc(uprv_strlen(posixID) + 10 + 1));
1716
0
    if (correctedPOSIXLocale == nullptr) {
1717
0
        return nullptr;
1718
0
    }
1719
0
    uprv_strcpy(correctedPOSIXLocale, posixID);
1720
1721
0
    char *limit;
1722
0
    if ((limit = uprv_strchr(correctedPOSIXLocale, '.')) != nullptr) {
1723
0
        *limit = 0;
1724
0
    }
1725
0
    if ((limit = uprv_strchr(correctedPOSIXLocale, '@')) != nullptr) {
1726
0
        *limit = 0;
1727
0
    }
1728
1729
0
    if ((uprv_strcmp("C", correctedPOSIXLocale) == 0) // no @ variant
1730
0
        || (uprv_strcmp("POSIX", correctedPOSIXLocale) == 0)) {
1731
      // Raw input was C.* or POSIX.*, Give it a nice POSIX default value.
1732
      // (The "C"/"POSIX" case is handled in uprv_getPOSIXIDForCategory())
1733
0
      uprv_strcpy(correctedPOSIXLocale, "en_US_POSIX");
1734
0
    }
1735
1736
    /* Note that we scan the *uncorrected* ID. */
1737
0
    const char *p;
1738
0
    if ((p = uprv_strrchr(posixID, '@')) != nullptr) {
1739
0
        p++;
1740
1741
        /* Take care of any special cases here.. */
1742
0
        if (!uprv_strcmp(p, "nynorsk")) {
1743
0
            p = "NY";
1744
            /* Don't worry about no__NY. In practice, it won't appear. */
1745
0
        }
1746
1747
0
        if (uprv_strchr(correctedPOSIXLocale,'_') == nullptr) {
1748
0
            uprv_strcat(correctedPOSIXLocale, "__"); /* aa@b -> aa__b (note this can make the new locale 1 char longer) */
1749
0
        }
1750
0
        else {
1751
0
            uprv_strcat(correctedPOSIXLocale, "_"); /* aa_CC@b -> aa_CC_b */
1752
0
        }
1753
1754
0
        const char *q;
1755
0
        if ((q = uprv_strchr(p, '.')) != nullptr) {
1756
            /* How big will the resulting string be? */
1757
0
            int32_t len = (int32_t)(uprv_strlen(correctedPOSIXLocale) + (q-p));
1758
0
            uprv_strncat(correctedPOSIXLocale, p, q-p); // do not include charset
1759
0
            correctedPOSIXLocale[len] = 0;
1760
0
        }
1761
0
        else {
1762
            /* Anything following the @ sign */
1763
0
            uprv_strcat(correctedPOSIXLocale, p);
1764
0
        }
1765
1766
        /* Should there be a map from 'no@nynorsk' -> no_NO_NY here?
1767
         * How about 'russian' -> 'ru'?
1768
         * Many of the other locales using ISO codes will be handled by the
1769
         * canonicalization functions in uloc_getDefault.
1770
         */
1771
0
    }
1772
1773
0
    if (gCorrectedPOSIXLocale == nullptr) {
1774
0
        gCorrectedPOSIXLocale = correctedPOSIXLocale;
1775
0
        gCorrectedPOSIXLocaleHeapAllocated = true;
1776
0
        ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1777
0
        correctedPOSIXLocale = nullptr;
1778
0
    }
1779
0
    posixID = gCorrectedPOSIXLocale;
1780
1781
0
    if (correctedPOSIXLocale != nullptr) {  /* Was already set - clean up. */
1782
0
        uprv_free(correctedPOSIXLocale);
1783
0
    }
1784
1785
0
    return posixID;
1786
1787
#elif U_PLATFORM_USES_ONLY_WIN32_API
1788
#define POSIX_LOCALE_CAPACITY 64
1789
    UErrorCode status = U_ZERO_ERROR;
1790
    char *correctedPOSIXLocale = nullptr;
1791
1792
    // If we have already figured this out just use the cached value
1793
    if (gCorrectedPOSIXLocale != nullptr) {
1794
        return gCorrectedPOSIXLocale;
1795
    }
1796
1797
    // No cached value, need to determine the current value
1798
    static WCHAR windowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
1799
    int length = GetLocaleInfoEx(LOCALE_NAME_USER_DEFAULT, LOCALE_SNAME, windowsLocale, LOCALE_NAME_MAX_LENGTH);
1800
1801
    // Now we should have a Windows locale name that needs converted to the POSIX style.
1802
    if (length > 0) // If length is 0, then the GetLocaleInfoEx failed.
1803
    {
1804
        // First we need to go from UTF-16 to char (and also convert from _ to - while we're at it.)
1805
        char modifiedWindowsLocale[LOCALE_NAME_MAX_LENGTH] = {};
1806
1807
        int32_t i;
1808
        for (i = 0; i < UPRV_LENGTHOF(modifiedWindowsLocale); i++)
1809
        {
1810
            if (windowsLocale[i] == '_')
1811
            {
1812
                modifiedWindowsLocale[i] = '-';
1813
            }
1814
            else
1815
            {
1816
                modifiedWindowsLocale[i] = static_cast<char>(windowsLocale[i]);
1817
            }
1818
1819
            if (modifiedWindowsLocale[i] == '\0')
1820
            {
1821
                break;
1822
            }
1823
        }
1824
1825
        if (i >= UPRV_LENGTHOF(modifiedWindowsLocale))
1826
        {
1827
            // Ran out of room, can't really happen, maybe we'll be lucky about a matching
1828
            // locale when tags are dropped
1829
            modifiedWindowsLocale[UPRV_LENGTHOF(modifiedWindowsLocale) - 1] = '\0';
1830
        }
1831
1832
        // Now normalize the resulting name
1833
        correctedPOSIXLocale = static_cast<char *>(uprv_malloc(POSIX_LOCALE_CAPACITY + 1));
1834
        /* TODO: Should we just exit on memory allocation failure? */
1835
        if (correctedPOSIXLocale)
1836
        {
1837
            int32_t posixLen = uloc_canonicalize(modifiedWindowsLocale, correctedPOSIXLocale, POSIX_LOCALE_CAPACITY, &status);
1838
            if (U_SUCCESS(status))
1839
            {
1840
                *(correctedPOSIXLocale + posixLen) = 0;
1841
                gCorrectedPOSIXLocale = correctedPOSIXLocale;
1842
                gCorrectedPOSIXLocaleHeapAllocated = true;
1843
                ucln_common_registerCleanup(UCLN_COMMON_PUTIL, putil_cleanup);
1844
            }
1845
            else
1846
            {
1847
                uprv_free(correctedPOSIXLocale);
1848
            }
1849
        }
1850
    }
1851
1852
    // If unable to find a locale we can agree upon, use en-US by default
1853
    if (gCorrectedPOSIXLocale == nullptr) {
1854
        gCorrectedPOSIXLocale = "en_US";
1855
    }
1856
    return gCorrectedPOSIXLocale;
1857
1858
#elif U_PLATFORM == U_PF_OS400
1859
    /* locales are process scoped and are by definition thread safe */
1860
    static char correctedLocale[64];
1861
    const  char *localeID = getenv("LC_ALL");
1862
           char *p;
1863
1864
    if (localeID == NULL)
1865
        localeID = getenv("LANG");
1866
    if (localeID == NULL)
1867
        localeID = setlocale(LC_ALL, NULL);
1868
    /* Make sure we have something... */
1869
    if (localeID == NULL)
1870
        return "en_US_POSIX";
1871
1872
    /* Extract the locale name from the path. */
1873
    if((p = uprv_strrchr(localeID, '/')) != NULL)
1874
    {
1875
        /* Increment p to start of locale name. */
1876
        p++;
1877
        localeID = p;
1878
    }
1879
1880
    /* Copy to work location. */
1881
    uprv_strcpy(correctedLocale, localeID);
1882
1883
    /* Strip off the '.locale' extension. */
1884
    if((p = uprv_strchr(correctedLocale, '.')) != NULL) {
1885
        *p = 0;
1886
    }
1887
1888
    /* Upper case the locale name. */
1889
    T_CString_toUpperCase(correctedLocale);
1890
1891
    /* See if we are using the POSIX locale.  Any of the
1892
    * following are equivalent and use the same QLGPGCMA
1893
    * (POSIX) locale.
1894
    * QLGPGCMA2 means UCS2
1895
    * QLGPGCMA_4 means UTF-32
1896
    * QLGPGCMA_8 means UTF-8
1897
    */
1898
    if ((uprv_strcmp("C", correctedLocale) == 0) ||
1899
        (uprv_strcmp("POSIX", correctedLocale) == 0) ||
1900
        (uprv_strncmp("QLGPGCMA", correctedLocale, 8) == 0))
1901
    {
1902
        uprv_strcpy(correctedLocale, "en_US_POSIX");
1903
    }
1904
    else
1905
    {
1906
        int16_t LocaleLen;
1907
1908
        /* Lower case the lang portion. */
1909
        for(p = correctedLocale; *p != 0 && *p != '_'; p++)
1910
        {
1911
            *p = uprv_tolower(*p);
1912
        }
1913
1914
        /* Adjust for Euro.  After '_E' add 'URO'. */
1915
        LocaleLen = uprv_strlen(correctedLocale);
1916
        if (correctedLocale[LocaleLen - 2] == '_' &&
1917
            correctedLocale[LocaleLen - 1] == 'E')
1918
        {
1919
            uprv_strcat(correctedLocale, "URO");
1920
        }
1921
1922
        /* If using Lotus-based locale then convert to
1923
         * equivalent non Lotus.
1924
         */
1925
        else if (correctedLocale[LocaleLen - 2] == '_' &&
1926
            correctedLocale[LocaleLen - 1] == 'L')
1927
        {
1928
            correctedLocale[LocaleLen - 2] = 0;
1929
        }
1930
1931
        /* There are separate simplified and traditional
1932
         * locales called zh_HK_S and zh_HK_T.
1933
         */
1934
        else if (uprv_strncmp(correctedLocale, "zh_HK", 5) == 0)
1935
        {
1936
            uprv_strcpy(correctedLocale, "zh_HK");
1937
        }
1938
1939
        /* A special zh_CN_GBK locale...
1940
        */
1941
        else if (uprv_strcmp(correctedLocale, "zh_CN_GBK") == 0)
1942
        {
1943
            uprv_strcpy(correctedLocale, "zh_CN");
1944
        }
1945
1946
    }
1947
1948
    return correctedLocale;
1949
#endif
1950
1951
0
}
1952
1953
#if !U_CHARSET_IS_UTF8
1954
#if U_POSIX_LOCALE
1955
/*
1956
Due to various platform differences, one platform may specify a charset,
1957
when they really mean a different charset. Remap the names so that they are
1958
compatible with ICU. Only conflicting/ambiguous aliases should be resolved
1959
here. Before adding anything to this function, please consider adding unique
1960
names to the ICU alias table in the data directory.
1961
*/
1962
static const char*
1963
remapPlatformDependentCodepage(const char *locale, const char *name) {
1964
    if (locale != NULL && *locale == 0) {
1965
        /* Make sure that an empty locale is handled the same way. */
1966
        locale = NULL;
1967
    }
1968
    if (name == NULL) {
1969
        return NULL;
1970
    }
1971
#if U_PLATFORM == U_PF_AIX
1972
    if (uprv_strcmp(name, "IBM-943") == 0) {
1973
        /* Use the ASCII compatible ibm-943 */
1974
        name = "Shift-JIS";
1975
    }
1976
    else if (uprv_strcmp(name, "IBM-1252") == 0) {
1977
        /* Use the windows-1252 that contains the Euro */
1978
        name = "IBM-5348";
1979
    }
1980
#elif U_PLATFORM == U_PF_SOLARIS
1981
    if (locale != NULL && uprv_strcmp(name, "EUC") == 0) {
1982
        /* Solaris underspecifies the "EUC" name. */
1983
        if (uprv_strcmp(locale, "zh_CN") == 0) {
1984
            name = "EUC-CN";
1985
        }
1986
        else if (uprv_strcmp(locale, "zh_TW") == 0) {
1987
            name = "EUC-TW";
1988
        }
1989
        else if (uprv_strcmp(locale, "ko_KR") == 0) {
1990
            name = "EUC-KR";
1991
        }
1992
    }
1993
    else if (uprv_strcmp(name, "eucJP") == 0) {
1994
        /*
1995
        ibm-954 is the best match.
1996
        ibm-33722 is the default for eucJP (similar to Windows).
1997
        */
1998
        name = "eucjis";
1999
    }
2000
    else if (uprv_strcmp(name, "646") == 0) {
2001
        /*
2002
         * The default codepage given by Solaris is 646 but the C library routines treat it as if it was
2003
         * ISO-8859-1 instead of US-ASCII(646).
2004
         */
2005
        name = "ISO-8859-1";
2006
    }
2007
#elif U_PLATFORM_IS_DARWIN_BASED
2008
    if (locale == NULL && *name == 0) {
2009
        /*
2010
        No locale was specified, and an empty name was passed in.
2011
        This usually indicates that nl_langinfo didn't return valid information.
2012
        Mac OS X uses UTF-8 by default (especially the locale data and console).
2013
        */
2014
        name = "UTF-8";
2015
    }
2016
    else if (uprv_strcmp(name, "CP949") == 0) {
2017
        /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
2018
        name = "EUC-KR";
2019
    }
2020
    else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 && uprv_strcmp(name, "US-ASCII") == 0) {
2021
        /*
2022
         * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
2023
         */
2024
        name = "UTF-8";
2025
    }
2026
#elif U_PLATFORM == U_PF_BSD
2027
    if (uprv_strcmp(name, "CP949") == 0) {
2028
        /* Remap CP949 to a similar codepage to avoid issues with backslash and won symbol. */
2029
        name = "EUC-KR";
2030
    }
2031
#elif U_PLATFORM == U_PF_HPUX
2032
    if (locale != NULL && uprv_strcmp(locale, "zh_HK") == 0 && uprv_strcmp(name, "big5") == 0) {
2033
        /* HP decided to extend big5 as hkbig5 even though it's not compatible :-( */
2034
        /* zh_TW.big5 is not the same charset as zh_HK.big5! */
2035
        name = "hkbig5";
2036
    }
2037
    else if (uprv_strcmp(name, "eucJP") == 0) {
2038
        /*
2039
        ibm-1350 is the best match, but unavailable.
2040
        ibm-954 is mostly a superset of ibm-1350.
2041
        ibm-33722 is the default for eucJP (similar to Windows).
2042
        */
2043
        name = "eucjis";
2044
    }
2045
#elif U_PLATFORM == U_PF_LINUX
2046
    if (locale != NULL && uprv_strcmp(name, "euc") == 0) {
2047
        /* Linux underspecifies the "EUC" name. */
2048
        if (uprv_strcmp(locale, "korean") == 0) {
2049
            name = "EUC-KR";
2050
        }
2051
        else if (uprv_strcmp(locale, "japanese") == 0) {
2052
            /* See comment below about eucJP */
2053
            name = "eucjis";
2054
        }
2055
    }
2056
    else if (uprv_strcmp(name, "eucjp") == 0) {
2057
        /*
2058
        ibm-1350 is the best match, but unavailable.
2059
        ibm-954 is mostly a superset of ibm-1350.
2060
        ibm-33722 is the default for eucJP (similar to Windows).
2061
        */
2062
        name = "eucjis";
2063
    }
2064
    else if (locale != NULL && uprv_strcmp(locale, "en_US_POSIX") != 0 &&
2065
            (uprv_strcmp(name, "ANSI_X3.4-1968") == 0 || uprv_strcmp(name, "US-ASCII") == 0)) {
2066
        /*
2067
         * For non C/POSIX locale, default the code page to UTF-8 instead of US-ASCII.
2068
         */
2069
        name = "UTF-8";
2070
    }
2071
    /*
2072
     * Linux returns ANSI_X3.4-1968 for C/POSIX, but the call site takes care of
2073
     * it by falling back to 'US-ASCII' when NULL is returned from this
2074
     * function. So, we don't have to worry about it here.
2075
     */
2076
#endif
2077
    /* return NULL when "" is passed in */
2078
    if (*name == 0) {
2079
        name = NULL;
2080
    }
2081
    return name;
2082
}
2083
2084
static const char*
2085
getCodepageFromPOSIXID(const char *localeName, char * buffer, int32_t buffCapacity)
2086
{
2087
    char localeBuf[100];
2088
    const char *name = NULL;
2089
    char *variant = NULL;
2090
2091
    if (localeName != NULL && (name = (uprv_strchr(localeName, '.'))) != NULL) {
2092
        size_t localeCapacity = uprv_min(sizeof(localeBuf), (name-localeName)+1);
2093
        uprv_strncpy(localeBuf, localeName, localeCapacity);
2094
        localeBuf[localeCapacity-1] = 0; /* ensure NULL termination */
2095
        name = uprv_strncpy(buffer, name+1, buffCapacity);
2096
        buffer[buffCapacity-1] = 0; /* ensure NULL termination */
2097
        if ((variant = const_cast<char *>(uprv_strchr(name, '@'))) != NULL) {
2098
            *variant = 0;
2099
        }
2100
        name = remapPlatformDependentCodepage(localeBuf, name);
2101
    }
2102
    return name;
2103
}
2104
#endif
2105
2106
static const char*
2107
int_getDefaultCodepage()
2108
{
2109
#if U_PLATFORM == U_PF_OS400
2110
    uint32_t ccsid = 37; /* Default to ibm-37 */
2111
    static char codepage[64];
2112
    Qwc_JOBI0400_t jobinfo;
2113
    Qus_EC_t error = { sizeof(Qus_EC_t) }; /* SPI error code */
2114
2115
    EPT_CALL(QUSRJOBI)(&jobinfo, sizeof(jobinfo), "JOBI0400",
2116
        "*                         ", "                ", &error);
2117
2118
    if (error.Bytes_Available == 0) {
2119
        if (jobinfo.Coded_Char_Set_ID != 0xFFFF) {
2120
            ccsid = (uint32_t)jobinfo.Coded_Char_Set_ID;
2121
        }
2122
        else if (jobinfo.Default_Coded_Char_Set_Id != 0xFFFF) {
2123
            ccsid = (uint32_t)jobinfo.Default_Coded_Char_Set_Id;
2124
        }
2125
        /* else use the default */
2126
    }
2127
    sprintf(codepage,"ibm-%d", ccsid);
2128
    return codepage;
2129
2130
#elif U_PLATFORM == U_PF_OS390
2131
    static char codepage[64];
2132
2133
    strncpy(codepage, nl_langinfo(CODESET),63-strlen(UCNV_SWAP_LFNL_OPTION_STRING));
2134
    strcat(codepage,UCNV_SWAP_LFNL_OPTION_STRING);
2135
    codepage[63] = 0; /* NULL terminate */
2136
2137
    return codepage;
2138
2139
#elif U_PLATFORM_USES_ONLY_WIN32_API
2140
    static char codepage[64];
2141
    DWORD codepageNumber = 0;
2142
2143
#if U_PLATFORM_HAS_WINUWP_API == 1
2144
    // UWP doesn't have a direct API to get the default ACP as Microsoft would rather
2145
    // have folks use Unicode than a "system" code page, however this is the same
2146
    // codepage as the system default locale codepage.  (FWIW, the system locale is
2147
    // ONLY used for codepage, it should never be used for anything else)
2148
    GetLocaleInfoEx(LOCALE_NAME_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
2149
        (LPWSTR)&codepageNumber, sizeof(codepageNumber) / sizeof(WCHAR));
2150
#else
2151
    // Win32 apps can call GetACP
2152
    codepageNumber = GetACP();
2153
#endif
2154
    // Special case for UTF-8
2155
    if (codepageNumber == 65001)
2156
    {
2157
        return "UTF-8";
2158
    }
2159
    // Windows codepages can look like windows-1252, so format the found number
2160
    // the numbers are eclectic, however all valid system code pages, besides UTF-8
2161
    // are between 3 and 19999
2162
    if (codepageNumber > 0 && codepageNumber < 20000)
2163
    {
2164
        sprintf(codepage, "windows-%ld", codepageNumber);
2165
        return codepage;
2166
    }
2167
    // If the codepage number call failed then return UTF-8
2168
    return "UTF-8";
2169
2170
#elif U_POSIX_LOCALE
2171
    static char codesetName[100];
2172
    const char *localeName = NULL;
2173
    const char *name = NULL;
2174
2175
    localeName = uprv_getPOSIXIDForDefaultCodepage();
2176
    uprv_memset(codesetName, 0, sizeof(codesetName));
2177
    /* On Solaris nl_langinfo returns C locale values unless setlocale
2178
     * was called earlier.
2179
     */
2180
#if (U_HAVE_NL_LANGINFO_CODESET && U_PLATFORM != U_PF_SOLARIS)
2181
    /* When available, check nl_langinfo first because it usually gives more
2182
       useful names. It depends on LC_CTYPE.
2183
       nl_langinfo may use the same buffer as setlocale. */
2184
    {
2185
        const char *codeset = nl_langinfo(U_NL_LANGINFO_CODESET);
2186
#if U_PLATFORM_IS_DARWIN_BASED || U_PLATFORM_IS_LINUX_BASED
2187
        /*
2188
         * On Linux and MacOSX, ensure that default codepage for non C/POSIX locale is UTF-8
2189
         * instead of ASCII.
2190
         */
2191
        if (uprv_strcmp(localeName, "en_US_POSIX") != 0) {
2192
            codeset = remapPlatformDependentCodepage(localeName, codeset);
2193
        } else
2194
#endif
2195
        {
2196
            codeset = remapPlatformDependentCodepage(NULL, codeset);
2197
        }
2198
2199
        if (codeset != NULL) {
2200
            uprv_strncpy(codesetName, codeset, sizeof(codesetName));
2201
            codesetName[sizeof(codesetName)-1] = 0;
2202
            return codesetName;
2203
        }
2204
    }
2205
#endif
2206
2207
    /* Use setlocale in a nice way, and then check some environment variables.
2208
       Maybe the application used setlocale already.
2209
    */
2210
    uprv_memset(codesetName, 0, sizeof(codesetName));
2211
    name = getCodepageFromPOSIXID(localeName, codesetName, sizeof(codesetName));
2212
    if (name) {
2213
        /* if we can find the codeset name from setlocale, return that. */
2214
        return name;
2215
    }
2216
2217
    if (*codesetName == 0)
2218
    {
2219
        /* Everything failed. Return US ASCII (ISO 646). */
2220
        (void)uprv_strcpy(codesetName, "US-ASCII");
2221
    }
2222
    return codesetName;
2223
#else
2224
    return "US-ASCII";
2225
#endif
2226
}
2227
2228
2229
U_CAPI const char*  U_EXPORT2
2230
uprv_getDefaultCodepage()
2231
{
2232
    static char const  *name = NULL;
2233
    umtx_lock(NULL);
2234
    if (name == NULL) {
2235
        name = int_getDefaultCodepage();
2236
    }
2237
    umtx_unlock(NULL);
2238
    return name;
2239
}
2240
#endif  /* !U_CHARSET_IS_UTF8 */
2241
2242
2243
/* end of platform-specific implementation -------------- */
2244
2245
/* version handling --------------------------------------------------------- */
2246
2247
U_CAPI void U_EXPORT2
2248
0
u_versionFromString(UVersionInfo versionArray, const char *versionString) {
2249
0
    char *end;
2250
0
    uint16_t part=0;
2251
2252
0
    if(versionArray==NULL) {
2253
0
        return;
2254
0
    }
2255
2256
0
    if(versionString!=NULL) {
2257
0
        for(;;) {
2258
0
            versionArray[part]=(uint8_t)uprv_strtoul(versionString, &end, 10);
2259
0
            if(end==versionString || ++part==U_MAX_VERSION_LENGTH || *end!=U_VERSION_DELIMITER) {
2260
0
                break;
2261
0
            }
2262
0
            versionString=end+1;
2263
0
        }
2264
0
    }
2265
2266
0
    while(part<U_MAX_VERSION_LENGTH) {
2267
0
        versionArray[part++]=0;
2268
0
    }
2269
0
}
2270
2271
U_CAPI void U_EXPORT2
2272
0
u_versionFromUString(UVersionInfo versionArray, const UChar *versionString) {
2273
0
    if(versionArray!=NULL && versionString!=NULL) {
2274
0
        char versionChars[U_MAX_VERSION_STRING_LENGTH+1];
2275
0
        int32_t len = u_strlen(versionString);
2276
0
        if(len>U_MAX_VERSION_STRING_LENGTH) {
2277
0
            len = U_MAX_VERSION_STRING_LENGTH;
2278
0
        }
2279
0
        u_UCharsToChars(versionString, versionChars, len);
2280
0
        versionChars[len]=0;
2281
0
        u_versionFromString(versionArray, versionChars);
2282
0
    }
2283
0
}
2284
2285
U_CAPI void U_EXPORT2
2286
0
u_versionToString(const UVersionInfo versionArray, char *versionString) {
2287
0
    uint16_t count, part;
2288
0
    uint8_t field;
2289
2290
0
    if(versionString==NULL) {
2291
0
        return;
2292
0
    }
2293
2294
0
    if(versionArray==NULL) {
2295
0
        versionString[0]=0;
2296
0
        return;
2297
0
    }
2298
2299
    /* count how many fields need to be written */
2300
0
    for(count=4; count>0 && versionArray[count-1]==0; --count) {
2301
0
    }
2302
2303
0
    if(count <= 1) {
2304
0
        count = 2;
2305
0
    }
2306
2307
    /* write the first part */
2308
    /* write the decimal field value */
2309
0
    field=versionArray[0];
2310
0
    if(field>=100) {
2311
0
        *versionString++=(char)('0'+field/100);
2312
0
        field%=100;
2313
0
    }
2314
0
    if(field>=10) {
2315
0
        *versionString++=(char)('0'+field/10);
2316
0
        field%=10;
2317
0
    }
2318
0
    *versionString++=(char)('0'+field);
2319
2320
    /* write the following parts */
2321
0
    for(part=1; part<count; ++part) {
2322
        /* write a dot first */
2323
0
        *versionString++=U_VERSION_DELIMITER;
2324
2325
        /* write the decimal field value */
2326
0
        field=versionArray[part];
2327
0
        if(field>=100) {
2328
0
            *versionString++=(char)('0'+field/100);
2329
0
            field%=100;
2330
0
        }
2331
0
        if(field>=10) {
2332
0
            *versionString++=(char)('0'+field/10);
2333
0
            field%=10;
2334
0
        }
2335
0
        *versionString++=(char)('0'+field);
2336
0
    }
2337
2338
    /* NUL-terminate */
2339
0
    *versionString=0;
2340
0
}
2341
2342
U_CAPI void U_EXPORT2
2343
0
u_getVersion(UVersionInfo versionArray) {
2344
0
    (void)copyright;   // Suppress unused variable warning from clang.
2345
0
    u_versionFromString(versionArray, U_ICU_VERSION);
2346
0
}
2347
2348
/**
2349
 * icucfg.h dependent code
2350
 */
2351
2352
#if U_ENABLE_DYLOAD && HAVE_DLOPEN && !U_PLATFORM_USES_ONLY_WIN32_API
2353
2354
#if HAVE_DLFCN_H
2355
#ifdef __MVS__
2356
#ifndef __SUSV3
2357
#define __SUSV3 1
2358
#endif
2359
#endif
2360
#include <dlfcn.h>
2361
#endif /* HAVE_DLFCN_H */
2362
2363
U_CAPI void * U_EXPORT2
2364
0
uprv_dl_open(const char *libName, UErrorCode *status) {
2365
0
  void *ret = NULL;
2366
0
  if(U_FAILURE(*status)) return ret;
2367
0
  ret =  dlopen(libName, RTLD_NOW|RTLD_GLOBAL);
2368
0
  if(ret==NULL) {
2369
#ifdef U_TRACE_DYLOAD
2370
    printf("dlerror on dlopen(%s): %s\n", libName, dlerror());
2371
#endif
2372
0
    *status = U_MISSING_RESOURCE_ERROR;
2373
0
  }
2374
0
  return ret;
2375
0
}
2376
2377
U_CAPI void U_EXPORT2
2378
0
uprv_dl_close(void *lib, UErrorCode *status) {
2379
0
  if(U_FAILURE(*status)) return;
2380
0
  dlclose(lib);
2381
0
}
2382
2383
U_CAPI UVoidFunction* U_EXPORT2
2384
0
uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2385
0
  union {
2386
0
      UVoidFunction *fp;
2387
0
      void *vp;
2388
0
  } uret;
2389
0
  uret.fp = NULL;
2390
0
  if(U_FAILURE(*status)) return uret.fp;
2391
0
  uret.vp = dlsym(lib, sym);
2392
0
  if(uret.vp == NULL) {
2393
#ifdef U_TRACE_DYLOAD
2394
    printf("dlerror on dlsym(%p,%s): %s\n", lib,sym, dlerror());
2395
#endif
2396
0
    *status = U_MISSING_RESOURCE_ERROR;
2397
0
  }
2398
0
  return uret.fp;
2399
0
}
2400
2401
#elif U_ENABLE_DYLOAD && U_PLATFORM_USES_ONLY_WIN32_API && !U_PLATFORM_HAS_WINUWP_API
2402
2403
/* Windows API implementation. */
2404
// Note: UWP does not expose/allow these APIs, so the UWP version gets the null implementation. */
2405
2406
U_CAPI void * U_EXPORT2
2407
uprv_dl_open(const char *libName, UErrorCode *status) {
2408
  HMODULE lib = NULL;
2409
2410
  if(U_FAILURE(*status)) return NULL;
2411
2412
  lib = LoadLibraryA(libName);
2413
2414
  if(lib==NULL) {
2415
    *status = U_MISSING_RESOURCE_ERROR;
2416
  }
2417
2418
  return (void*)lib;
2419
}
2420
2421
U_CAPI void U_EXPORT2
2422
uprv_dl_close(void *lib, UErrorCode *status) {
2423
  HMODULE handle = (HMODULE)lib;
2424
  if(U_FAILURE(*status)) return;
2425
2426
  FreeLibrary(handle);
2427
2428
  return;
2429
}
2430
2431
U_CAPI UVoidFunction* U_EXPORT2
2432
uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2433
  HMODULE handle = (HMODULE)lib;
2434
  UVoidFunction* addr = NULL;
2435
2436
  if(U_FAILURE(*status) || lib==NULL) return NULL;
2437
2438
  addr = (UVoidFunction*)GetProcAddress(handle, sym);
2439
2440
  if(addr==NULL) {
2441
    DWORD lastError = GetLastError();
2442
    if(lastError == ERROR_PROC_NOT_FOUND) {
2443
      *status = U_MISSING_RESOURCE_ERROR;
2444
    } else {
2445
      *status = U_UNSUPPORTED_ERROR; /* other unknown error. */
2446
    }
2447
  }
2448
2449
  return addr;
2450
}
2451
2452
#else
2453
2454
/* No dynamic loading, null (nonexistent) implementation. */
2455
2456
U_CAPI void * U_EXPORT2
2457
uprv_dl_open(const char *libName, UErrorCode *status) {
2458
    (void)libName;
2459
    if(U_FAILURE(*status)) return NULL;
2460
    *status = U_UNSUPPORTED_ERROR;
2461
    return NULL;
2462
}
2463
2464
U_CAPI void U_EXPORT2
2465
uprv_dl_close(void *lib, UErrorCode *status) {
2466
    (void)lib;
2467
    if(U_FAILURE(*status)) return;
2468
    *status = U_UNSUPPORTED_ERROR;
2469
    return;
2470
}
2471
2472
U_CAPI UVoidFunction* U_EXPORT2
2473
uprv_dlsym_func(void *lib, const char* sym, UErrorCode *status) {
2474
  (void)lib;
2475
  (void)sym;
2476
  if(U_SUCCESS(*status)) {
2477
    *status = U_UNSUPPORTED_ERROR;
2478
  }
2479
  return (UVoidFunction*)NULL;
2480
}
2481
2482
#endif
2483
2484
/*
2485
 * Hey, Emacs, please set the following:
2486
 *
2487
 * Local Variables:
2488
 * indent-tabs-mode: nil
2489
 * End:
2490
 *
2491
 */