Coverage Report

Created: 2025-06-24 06:43

/src/icu/source/common/umapfile.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
******************************************************************************
5
*
6
*   Copyright (C) 1999-2013, International Business Machines
7
*   Corporation and others.  All Rights Reserved.
8
*
9
******************************************************************************/
10
11
12
/*----------------------------------------------------------------------------
13
 *
14
 *       Memory mapped file wrappers for use by the ICU Data Implementation
15
 *       All of the platform-specific implementation for mapping data files
16
 *         is here.  The rest of the ICU Data implementation uses only the
17
 *         wrapper functions.
18
 *
19
 *----------------------------------------------------------------------------*/
20
/* Defines _XOPEN_SOURCE for access to POSIX functions.
21
 * Must be before any other #includes. */
22
#include "uposixdefs.h"
23
24
#include "unicode/putil.h"
25
#include "unicode/ustring.h"
26
#include "udatamem.h"
27
#include "umapfile.h"
28
29
/* memory-mapping base definitions ------------------------------------------ */
30
31
#if MAP_IMPLEMENTATION==MAP_WIN32
32
#ifndef WIN32_LEAN_AND_MEAN
33
#   define WIN32_LEAN_AND_MEAN
34
#endif
35
#   define VC_EXTRALEAN
36
#   define NOUSER
37
#   define NOSERVICE
38
#   define NOIME
39
#   define NOMCX
40
41
#   if U_PLATFORM_HAS_WINUWP_API == 1
42
        // Some previous versions of the Windows 10 SDK don't expose various APIs for UWP applications
43
        // to use, even though UWP apps are allowed to call and use them.  Temporarily change the
44
        // WINAPI family partition below to Desktop, so that function declarations are visible for UWP.
45
#       include <winapifamily.h>
46
#       if !(WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP | WINAPI_PARTITION_SYSTEM))
47
#           pragma push_macro("WINAPI_PARTITION_DESKTOP")
48
#           undef WINAPI_PARTITION_DESKTOP
49
#           define WINAPI_PARTITION_DESKTOP 1
50
#           define CHANGED_WINAPI_PARTITION_DESKTOP_VALUE
51
#       endif
52
#   endif
53
54
#   include <windows.h>
55
56
#   if U_PLATFORM_HAS_WINUWP_API == 1 && defined(CHANGED_WINAPI_PARTITION_DESKTOP_VALUE)
57
#       pragma pop_macro("WINAPI_PARTITION_DESKTOP")
58
#   endif
59
60
#   include "cmemory.h"
61
62
typedef HANDLE MemoryMap;
63
64
#   define IS_MAP(map) ((map)!=nullptr)
65
66
#elif MAP_IMPLEMENTATION==MAP_POSIX || MAP_IMPLEMENTATION==MAP_390DLL
67
    typedef size_t MemoryMap;
68
69
#   define IS_MAP(map) ((map)!=0)
70
71
#   include <unistd.h>
72
#   include <sys/mman.h>
73
#   include <sys/stat.h>
74
#   include <fcntl.h>
75
76
#   ifndef MAP_FAILED
77
#       define MAP_FAILED ((void*)-1)
78
#   endif
79
80
#   if MAP_IMPLEMENTATION==MAP_390DLL
81
        /*   No memory mapping for 390 batch mode.  Fake it using dll loading.  */
82
#       include <dll.h>
83
#       include "cstring.h"
84
#       include "cmemory.h"
85
#       include "unicode/udata.h"
86
#       define LIB_PREFIX "lib"
87
#       define LIB_SUFFIX ".dll"
88
        /* This is inconvenient until we figure out what to do with U_ICUDATA_NAME in utypes.h */
89
#       define U_ICUDATA_ENTRY_NAME "icudt" U_ICU_VERSION_SHORT U_LIB_SUFFIX_C_NAME_STRING "_dat"
90
#   endif
91
#elif MAP_IMPLEMENTATION==MAP_STDIO
92
#   include <stdio.h>
93
#   include "cmemory.h"
94
95
    typedef void *MemoryMap;
96
97
#   define IS_MAP(map) ((map)!=nullptr)
98
#endif
99
100
/*----------------------------------------------------------------------------*
101
 *                                                                            *
102
 *   Memory Mapped File support.  Platform dependent implementation of        *
103
 *                           functions used by the rest of the implementation.*
104
 *                                                                            *
105
 *----------------------------------------------------------------------------*/
106
#if MAP_IMPLEMENTATION==MAP_NONE
107
    U_CFUNC UBool
108
    uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) {
109
        if (U_FAILURE(*status)) {
110
            return FALSE;
111
        }
112
        UDataMemory_init(pData); /* Clear the output struct. */
113
        return FALSE;            /* no file access */
114
    }
115
116
    U_CFUNC void uprv_unmapFile(UDataMemory *pData) {
117
        /* nothing to do */
118
    }
119
#elif MAP_IMPLEMENTATION==MAP_WIN32
120
    U_CFUNC UBool
121
    uprv_mapFile(
122
         UDataMemory *pData,    /* Fill in with info on the result doing the mapping. */
123
                                /*   Output only; any original contents are cleared.  */
124
         const char *path,      /* File path to be opened/mapped.                     */
125
         UErrorCode *status     /* Error status, used to report out-of-memory errors. */
126
         )
127
    {
128
        if (U_FAILURE(*status)) {
129
            return FALSE;
130
        }
131
132
        HANDLE map = nullptr;
133
        HANDLE file = INVALID_HANDLE_VALUE;
134
135
        UDataMemory_init(pData); /* Clear the output struct.        */
136
137
        /* open the input file */
138
#if U_PLATFORM_HAS_WINUWP_API == 0
139
        // Note: In the non-UWP code-path (ie: Win32), the value of the path variable might have come from 
140
        // the CRT 'getenv' function, and would be therefore be encoded in the default ANSI code page.
141
        // This means that we can't call the *W version of API below, whereas in the UWP code-path
142
        // there is no 'getenv' call, and thus the string will be only UTF-8/Invariant characters.
143
        file=CreateFileA(path, GENERIC_READ, FILE_SHARE_READ, nullptr,
144
            OPEN_EXISTING,
145
            FILE_ATTRIBUTE_NORMAL|FILE_FLAG_RANDOM_ACCESS, nullptr);
146
#else
147
        // Convert from UTF-8 string to UTF-16 string.
148
        wchar_t utf16Path[MAX_PATH];
149
        int32_t pathUtf16Len = 0;
150
        u_strFromUTF8(reinterpret_cast<UChar*>(utf16Path), static_cast<int32_t>(UPRV_LENGTHOF(utf16Path)), &pathUtf16Len, path, -1, status);
151
152
        if (U_FAILURE(*status)) {
153
            return FALSE;
154
        }
155
        if (*status == U_STRING_NOT_TERMINATED_WARNING) {
156
            // Report back an error instead of a warning.
157
            *status = U_BUFFER_OVERFLOW_ERROR;
158
            return FALSE;
159
        }
160
161
        file = CreateFileW(utf16Path, GENERIC_READ, FILE_SHARE_READ, nullptr,
162
            OPEN_EXISTING,
163
            FILE_ATTRIBUTE_NORMAL | FILE_FLAG_RANDOM_ACCESS, nullptr);
164
#endif
165
        if (file == INVALID_HANDLE_VALUE) {
166
            // If we failed to open the file due to an out-of-memory error, then we want
167
            // to report that error back to the caller.
168
            if (HRESULT_FROM_WIN32(GetLastError()) == E_OUTOFMEMORY) {
169
                *status = U_MEMORY_ALLOCATION_ERROR;
170
            }
171
            return FALSE;
172
        }
173
174
        // Note: We use NULL/nullptr for lpAttributes parameter below.
175
        // This means our handle cannot be inherited and we will get the default security descriptor.
176
        /* create an unnamed Windows file-mapping object for the specified file */
177
        map = CreateFileMappingW(file, nullptr, PAGE_READONLY, 0, 0, nullptr);
178
179
        CloseHandle(file);
180
        if (map == nullptr) {
181
            // If we failed to create the mapping due to an out-of-memory error, then 
182
            // we want to report that error back to the caller.
183
            if (HRESULT_FROM_WIN32(GetLastError()) == E_OUTOFMEMORY) {
184
                *status = U_MEMORY_ALLOCATION_ERROR;
185
            }
186
            return FALSE;
187
        }
188
189
        /* map a view of the file into our address space */
190
        pData->pHeader = reinterpret_cast<const DataHeader *>(MapViewOfFile(map, FILE_MAP_READ, 0, 0, 0));
191
        if (pData->pHeader == nullptr) {
192
            CloseHandle(map);
193
            return FALSE;
194
        }
195
        pData->map = map;
196
        return TRUE;
197
    }
198
199
    U_CFUNC void
200
    uprv_unmapFile(UDataMemory *pData) {
201
        if (pData != nullptr && pData->map != nullptr) {
202
            UnmapViewOfFile(pData->pHeader);
203
            CloseHandle(pData->map);
204
            pData->pHeader = nullptr;
205
            pData->map = nullptr;
206
        }
207
    }
208
209
210
211
#elif MAP_IMPLEMENTATION==MAP_POSIX
212
    U_CFUNC UBool
213
0
    uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) {
214
0
        int fd;
215
0
        int length;
216
0
        struct stat mystat;
217
0
        void *data;
218
219
0
        if (U_FAILURE(*status)) {
220
0
            return FALSE;
221
0
        }
222
223
0
        UDataMemory_init(pData); /* Clear the output struct.        */
224
225
        /* determine the length of the file */
226
0
        if(stat(path, &mystat)!=0 || mystat.st_size<=0) {
227
0
            return FALSE;
228
0
        }
229
0
        length=mystat.st_size;
230
231
        /* open the file */
232
0
        fd=open(path, O_RDONLY);
233
0
        if(fd==-1) {
234
0
            return FALSE;
235
0
        }
236
237
        /* get a view of the mapping */
238
0
#if U_PLATFORM != U_PF_HPUX
239
0
        data=mmap(0, length, PROT_READ, MAP_SHARED,  fd, 0);
240
#else
241
        data=mmap(0, length, PROT_READ, MAP_PRIVATE, fd, 0);
242
#endif
243
0
        close(fd); /* no longer needed */
244
0
        if(data==MAP_FAILED) {
245
            // Possibly check the errno value for ENOMEM, and report U_MEMORY_ALLOCATION_ERROR?
246
0
            return FALSE;
247
0
        }
248
249
0
        pData->map = (char *)data + length;
250
0
        pData->pHeader=(const DataHeader *)data;
251
0
        pData->mapAddr = data;
252
#if U_PLATFORM == U_PF_IPHONE
253
        posix_madvise(data, length, POSIX_MADV_RANDOM);
254
#endif
255
0
        return TRUE;
256
0
    }
257
258
    U_CFUNC void
259
0
    uprv_unmapFile(UDataMemory *pData) {
260
0
        if(pData!=nullptr && pData->map!=nullptr) {
261
0
            size_t dataLen = (char *)pData->map - (char *)pData->mapAddr;
262
0
            if(munmap(pData->mapAddr, dataLen)==-1) {
263
0
            }
264
0
            pData->pHeader=nullptr;
265
0
            pData->map=0;
266
0
            pData->mapAddr=nullptr;
267
0
        }
268
0
    }
269
270
271
272
#elif MAP_IMPLEMENTATION==MAP_STDIO
273
    /* copy of the filestrm.c/T_FileStream_size() implementation */
274
    static int32_t
275
    umap_fsize(FILE *f) {
276
        int32_t savedPos = ftell(f);
277
        int32_t size = 0;
278
279
        /*Changes by Bertrand A. D. doesn't affect the current position
280
        goes to the end of the file before ftell*/
281
        fseek(f, 0, SEEK_END);
282
        size = (int32_t)ftell(f);
283
        fseek(f, savedPos, SEEK_SET);
284
        return size;
285
    }
286
287
    U_CFUNC UBool
288
    uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) {
289
        FILE *file;
290
        int32_t fileLength;
291
        void *p;
292
293
        if (U_FAILURE(*status)) {
294
            return FALSE;
295
        }
296
297
        UDataMemory_init(pData); /* Clear the output struct.        */
298
        /* open the input file */
299
        file=fopen(path, "rb");
300
        if(file==nullptr) {
301
            return FALSE;
302
        }
303
304
        /* get the file length */
305
        fileLength=umap_fsize(file);
306
        if(ferror(file) || fileLength<=20) {
307
            fclose(file);
308
            return FALSE;
309
        }
310
311
        /* allocate the memory to hold the file data */
312
        p=uprv_malloc(fileLength);
313
        if(p==nullptr) {
314
            fclose(file);
315
            *status = U_MEMORY_ALLOCATION_ERROR;
316
            return FALSE;
317
        }
318
319
        /* read the file */
320
        if(fileLength!=fread(p, 1, fileLength, file)) {
321
            uprv_free(p);
322
            fclose(file);
323
            return FALSE;
324
        }
325
326
        fclose(file);
327
        pData->map=p;
328
        pData->pHeader=(const DataHeader *)p;
329
        pData->mapAddr=p;
330
        return TRUE;
331
    }
332
333
    U_CFUNC void
334
    uprv_unmapFile(UDataMemory *pData) {
335
        if(pData!=nullptr && pData->map!=nullptr) {
336
            uprv_free(pData->map);
337
            pData->map     = nullptr;
338
            pData->mapAddr = nullptr;
339
            pData->pHeader = nullptr;
340
        }
341
    }
342
343
344
#elif MAP_IMPLEMENTATION==MAP_390DLL
345
    /*  390 specific Library Loading.
346
     *  This is the only platform left that dynamically loads an ICU Data Library.
347
     *  All other platforms use .data files when dynamic loading is required, but
348
     *  this turn out to be awkward to support in 390 batch mode.
349
     *
350
     *  The idea here is to hide the fact that 390 is using dll loading from the
351
     *   rest of ICU, and make it look like there is file loading happening.
352
     *
353
     */
354
355
    static char *strcpy_returnEnd(char *dest, const char *src)
356
    {
357
        while((*dest=*src)!=0) {
358
            ++dest;
359
            ++src;
360
        }
361
        return dest;
362
    }
363
    
364
    /*------------------------------------------------------------------------------
365
     *                                                                              
366
     *  computeDirPath   given a user-supplied path of an item to be opened,             
367
     *                         compute and return 
368
     *                            - the full directory path to be used 
369
     *                              when opening the file.
370
     *                            - Pointer to null at end of above returned path    
371
     *
372
     *                       Parameters:
373
     *                          path:        input path.  Buffer is not altered.
374
     *                          pathBuffer:  Output buffer.  Any contents are overwritten.
375
     *
376
     *                       Returns:
377
     *                          Pointer to null termination in returned pathBuffer.
378
     *
379
     *                    TODO:  This works the way ICU historically has, but the
380
     *                           whole data fallback search path is so complicated that
381
     *                           probably almost no one will ever really understand it,
382
     *                           the potential for confusion is large.  (It's not just 
383
     *                           this one function, but the whole scheme.)
384
     *                            
385
     *------------------------------------------------------------------------------*/
386
    static char *uprv_computeDirPath(const char *path, char *pathBuffer)
387
    {
388
        char   *finalSlash;       /* Ptr to last dir separator in input path, or null if none. */
389
        int32_t pathLen;          /* Length of the returned directory path                     */
390
        
391
        finalSlash = 0;
392
        if (path != 0) {
393
            finalSlash = uprv_strrchr(path, U_FILE_SEP_CHAR);
394
        }
395
        
396
        *pathBuffer = 0;
397
        if (finalSlash == 0) {
398
        /* No user-supplied path.  
399
            * Copy the ICU_DATA path to the path buffer and return that*/
400
            const char *icuDataDir;
401
            icuDataDir=u_getDataDirectory();
402
            if(icuDataDir!=nullptr && *icuDataDir!=0) {
403
                return strcpy_returnEnd(pathBuffer, icuDataDir);
404
            } else {
405
                /* there is no icuDataDir either.  Just return the empty pathBuffer. */
406
                return pathBuffer;
407
            }
408
        } 
409
        
410
        /* User supplied path did contain a directory portion.
411
        * Copy it to the output path buffer */
412
        pathLen = (int32_t)(finalSlash - path + 1);
413
        uprv_memcpy(pathBuffer, path, pathLen);
414
        *(pathBuffer+pathLen) = 0;
415
        return pathBuffer+pathLen;
416
    }
417
    
418
419
#   define DATA_TYPE "dat"
420
421
    U_CFUNC UBool uprv_mapFile(UDataMemory *pData, const char *path, UErrorCode *status) {
422
        const char *inBasename;
423
        char *basename;
424
        char pathBuffer[1024];
425
        const DataHeader *pHeader;
426
        dllhandle *handle;
427
        void *val=0;
428
429
        if (U_FAILURE(*status)) {
430
            return FALSE;
431
        }
432
433
        inBasename=uprv_strrchr(path, U_FILE_SEP_CHAR);
434
        if(inBasename==nullptr) {
435
            inBasename = path;
436
        } else {
437
            inBasename++;
438
        }
439
        basename=uprv_computeDirPath(path, pathBuffer);
440
        if(uprv_strcmp(inBasename, U_ICUDATA_NAME".dat") != 0) {
441
            /* must mmap file... for build */
442
            int fd;
443
            int length;
444
            struct stat mystat;
445
            void *data;
446
            UDataMemory_init(pData); /* Clear the output struct. */
447
448
            /* determine the length of the file */
449
            if(stat(path, &mystat)!=0 || mystat.st_size<=0) {
450
                return FALSE;
451
            }
452
            length=mystat.st_size;
453
454
            /* open the file */
455
            fd=open(path, O_RDONLY);
456
            if(fd==-1) {
457
                return FALSE;
458
            }
459
460
            /* get a view of the mapping */
461
            data=mmap(0, length, PROT_READ, MAP_PRIVATE, fd, 0);
462
            close(fd); /* no longer needed */
463
            if(data==MAP_FAILED) {
464
                // Possibly check the errorno value for ENOMEM, and report U_MEMORY_ALLOCATION_ERROR?
465
                return FALSE;
466
            }
467
            pData->map = (char *)data + length;
468
            pData->pHeader=(const DataHeader *)data;
469
            pData->mapAddr = data;
470
            return TRUE;
471
        }
472
473
#       ifdef OS390BATCH
474
            /* ### hack: we still need to get u_getDataDirectory() fixed
475
            for OS/390 (batch mode - always return "//"? )
476
            and this here straightened out with LIB_PREFIX and LIB_SUFFIX (both empty?!)
477
            This is probably due to the strange file system on OS/390.  It's more like
478
            a database with short entry names than a typical file system. */
479
            /* U_ICUDATA_NAME should always have the correct name */
480
            /* BUT FOR BATCH MODE IT IS AN EXCEPTION BECAUSE */
481
            /* THE FIRST THREE LETTERS ARE PREASSIGNED TO THE */
482
            /* PROJECT!!!!! */
483
            uprv_strcpy(pathBuffer, "IXMI" U_ICU_VERSION_SHORT "DA");
484
#       else
485
            /* set up the library name */
486
            uprv_strcpy(basename, LIB_PREFIX U_LIBICUDATA_NAME U_ICU_VERSION_SHORT LIB_SUFFIX);
487
#       endif
488
489
#       ifdef UDATA_DEBUG
490
             fprintf(stderr, "dllload: %s ", pathBuffer);
491
#       endif
492
493
        handle=dllload(pathBuffer);
494
495
#       ifdef UDATA_DEBUG
496
               fprintf(stderr, " -> %08X\n", handle );
497
#       endif
498
499
        if(handle != nullptr) {
500
               /* we have a data DLL - what kind of lookup do we need here? */
501
               /* try to find the Table of Contents */
502
               UDataMemory_init(pData); /* Clear the output struct.        */
503
               val=dllqueryvar((dllhandle*)handle, U_ICUDATA_ENTRY_NAME);
504
               if(val == 0) {
505
                    /* failed... so keep looking */
506
                    return FALSE;
507
               }
508
#              ifdef UDATA_DEBUG
509
                    fprintf(stderr, "dllqueryvar(%08X, %s) -> %08X\n", handle, U_ICUDATA_ENTRY_NAME, val);
510
#              endif
511
512
               pData->pHeader=(const DataHeader *)val;
513
               return TRUE;
514
         } else {
515
               return FALSE; /* no handle */
516
         }
517
    }
518
519
    U_CFUNC void uprv_unmapFile(UDataMemory *pData) {
520
        if(pData!=nullptr && pData->map!=nullptr) {
521
            uprv_free(pData->map);
522
            pData->map     = nullptr;
523
            pData->mapAddr = nullptr;
524
            pData->pHeader = nullptr;
525
        }   
526
    }
527
528
#else
529
#   error MAP_IMPLEMENTATION is set incorrectly
530
#endif