Coverage Report

Created: 2025-07-04 06:49

/src/cpython/Python/fileutils.c
Line
Count
Source (jump to first uncovered line)
1
#include "Python.h"
2
#include "pycore_fileutils.h"     // fileutils definitions
3
#include "pycore_runtime.h"       // _PyRuntime
4
#include "pycore_pystate.h"       // _Py_AssertHoldsTstate()
5
#include "osdefs.h"               // SEP
6
7
#include <stdlib.h>               // mbstowcs()
8
#ifdef HAVE_UNISTD_H
9
#  include <unistd.h>             // getcwd()
10
#endif
11
12
#ifdef MS_WINDOWS
13
#  include <malloc.h>
14
#  include <windows.h>
15
#  include <winioctl.h>             // FILE_DEVICE_* constants
16
#  include "pycore_fileutils_windows.h" // FILE_STAT_BASIC_INFORMATION
17
#  if defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP)
18
#    define PATHCCH_ALLOW_LONG_PATHS 0x01
19
#  else
20
#    include <pathcch.h>            // PathCchCombineEx
21
#  endif
22
extern int winerror_to_errno(int);
23
#endif
24
25
#ifdef HAVE_LANGINFO_H
26
#  include <langinfo.h>           // nl_langinfo(CODESET)
27
#endif
28
29
#ifdef HAVE_SYS_IOCTL_H
30
#include <sys/ioctl.h>
31
#endif
32
33
#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
34
#  include <iconv.h>              // iconv_open()
35
#endif
36
37
#ifdef HAVE_FCNTL_H
38
#  include <fcntl.h>              // fcntl(F_GETFD)
39
#endif
40
41
#ifdef O_CLOEXEC
42
/* Does open() support the O_CLOEXEC flag? Possible values:
43
44
   -1: unknown
45
    0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
46
    1: open() supports O_CLOEXEC flag, close-on-exec is set
47
48
   The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
49
   and os.open(). */
50
int _Py_open_cloexec_works = -1;
51
#endif
52
53
// The value must be the same in unicodeobject.c.
54
363k
#define MAX_UNICODE 0x10ffff
55
56
// mbstowcs() and mbrtowc() errors
57
static const size_t DECODE_ERROR = ((size_t)-1);
58
#ifdef HAVE_MBRTOWC
59
static const size_t INCOMPLETE_CHARACTER = (size_t)-2;
60
#endif
61
62
63
static int
64
get_surrogateescape(_Py_error_handler errors, int *surrogateescape)
65
18.0k
{
66
18.0k
    switch (errors)
67
18.0k
    {
68
0
    case _Py_ERROR_STRICT:
69
0
        *surrogateescape = 0;
70
0
        return 0;
71
18.0k
    case _Py_ERROR_SURROGATEESCAPE:
72
18.0k
        *surrogateescape = 1;
73
18.0k
        return 0;
74
0
    default:
75
0
        return -1;
76
18.0k
    }
77
18.0k
}
78
79
80
PyObject *
81
_Py_device_encoding(int fd)
82
0
{
83
0
    int valid;
84
0
    Py_BEGIN_ALLOW_THREADS
85
0
    _Py_BEGIN_SUPPRESS_IPH
86
0
    valid = isatty(fd);
87
0
    _Py_END_SUPPRESS_IPH
88
0
    Py_END_ALLOW_THREADS
89
0
    if (!valid)
90
0
        Py_RETURN_NONE;
91
92
#ifdef MS_WINDOWS
93
#ifdef HAVE_WINDOWS_CONSOLE_IO
94
    UINT cp;
95
    if (fd == 0)
96
        cp = GetConsoleCP();
97
    else if (fd == 1 || fd == 2)
98
        cp = GetConsoleOutputCP();
99
    else
100
        cp = 0;
101
    /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
102
       has no console */
103
    if (cp == 0) {
104
        Py_RETURN_NONE;
105
    }
106
107
    return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
108
#else
109
    Py_RETURN_NONE;
110
#endif /* HAVE_WINDOWS_CONSOLE_IO */
111
#else
112
0
    if (_PyRuntime.preconfig.utf8_mode) {
113
0
        _Py_DECLARE_STR(utf_8, "utf-8");
114
0
        return &_Py_STR(utf_8);
115
0
    }
116
0
    return _Py_GetLocaleEncodingObject();
117
0
#endif
118
0
}
119
120
121
static int
122
is_valid_wide_char(wchar_t ch)
123
363k
{
124
#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
125
    /* Oracle Solaris doesn't use Unicode code points as wchar_t encoding
126
       for non-Unicode locales, which makes values higher than MAX_UNICODE
127
       possibly valid. */
128
    return 1;
129
#endif
130
363k
    if (Py_UNICODE_IS_SURROGATE(ch)) {
131
        // Reject lone surrogate characters
132
0
        return 0;
133
0
    }
134
363k
#if SIZEOF_WCHAR_T > 2
135
363k
    if (ch > MAX_UNICODE) {
136
        // bpo-35883: Reject characters outside [U+0000; U+10ffff] range.
137
        // The glibc mbstowcs() UTF-8 decoder does not respect the RFC 3629,
138
        // it creates characters outside the [U+0000; U+10ffff] range:
139
        // https://sourceware.org/bugzilla/show_bug.cgi?id=2373
140
0
        return 0;
141
0
    }
142
363k
#endif
143
363k
    return 1;
144
363k
}
145
146
147
static size_t
148
_Py_mbstowcs(wchar_t *dest, const char *src, size_t n)
149
36.9k
{
150
36.9k
    size_t count = mbstowcs(dest, src, n);
151
36.9k
    if (dest != NULL && count != DECODE_ERROR) {
152
381k
        for (size_t i=0; i < count; i++) {
153
363k
            wchar_t ch = dest[i];
154
363k
            if (!is_valid_wide_char(ch)) {
155
0
                return DECODE_ERROR;
156
0
            }
157
363k
        }
158
17.4k
    }
159
36.9k
    return count;
160
36.9k
}
161
162
163
#ifdef HAVE_MBRTOWC
164
static size_t
165
_Py_mbrtowc(wchar_t *pwc, const char *str, size_t len, mbstate_t *pmbs)
166
0
{
167
0
    assert(pwc != NULL);
168
0
    size_t count = mbrtowc(pwc, str, len, pmbs);
169
0
    if (count != 0 && count != DECODE_ERROR && count != INCOMPLETE_CHARACTER) {
170
0
        if (!is_valid_wide_char(*pwc)) {
171
0
            return DECODE_ERROR;
172
0
        }
173
0
    }
174
0
    return count;
175
0
}
176
#endif
177
178
179
#if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
180
181
#define USE_FORCE_ASCII
182
183
extern int _Py_normalize_encoding(const char *, char *, size_t);
184
185
/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale
186
   and POSIX locale. nl_langinfo(CODESET) announces an alias of the
187
   ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
188
   ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
189
   locale.getpreferredencoding() codec. For example, if command line arguments
190
   are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
191
   UnicodeEncodeError instead of retrieving the original byte string.
192
193
   The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
194
   nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
195
   one byte in range 0x80-0xff can be decoded from the locale encoding. The
196
   workaround is also enabled on error, for example if getting the locale
197
   failed.
198
199
   On HP-UX with the C locale or the POSIX locale, nl_langinfo(CODESET)
200
   announces "roman8" but mbstowcs() uses Latin1 in practice. Force also the
201
   ASCII encoding in this case.
202
203
   Values of force_ascii:
204
205
       1: the workaround is used: Py_EncodeLocale() uses
206
          encode_ascii_surrogateescape() and Py_DecodeLocale() uses
207
          decode_ascii()
208
       0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
209
          Py_DecodeLocale() uses mbstowcs()
210
      -1: unknown, need to call check_force_ascii() to get the value
211
*/
212
11.6k
#define force_ascii (_PyRuntime.fileutils.force_ascii)
213
214
static int
215
check_force_ascii(void)
216
16
{
217
16
    char *loc = setlocale(LC_CTYPE, NULL);
218
16
    if (loc == NULL) {
219
0
        goto error;
220
0
    }
221
16
    if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
222
        /* the LC_CTYPE locale is different than C and POSIX */
223
0
        return 0;
224
0
    }
225
226
16
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
227
16
    const char *codeset = nl_langinfo(CODESET);
228
16
    if (!codeset || codeset[0] == '\0') {
229
        /* CODESET is not set or empty */
230
0
        goto error;
231
0
    }
232
233
16
    char encoding[20];   /* longest name: "iso_646.irv_1991\0" */
234
16
    if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding))) {
235
0
        goto error;
236
0
    }
237
238
#ifdef __hpux
239
    if (strcmp(encoding, "roman8") == 0) {
240
        unsigned char ch;
241
        wchar_t wch;
242
        size_t res;
243
244
        ch = (unsigned char)0xA7;
245
        res = _Py_mbstowcs(&wch, (char*)&ch, 1);
246
        if (res != DECODE_ERROR && wch == L'\xA7') {
247
            /* On HP-UX with C locale or the POSIX locale,
248
               nl_langinfo(CODESET) announces "roman8", whereas mbstowcs() uses
249
               Latin1 encoding in practice. Force ASCII in this case.
250
251
               Roman8 decodes 0xA7 to U+00CF. Latin1 decodes 0xA7 to U+00A7. */
252
            return 1;
253
        }
254
    }
255
#else
256
16
    const char* ascii_aliases[] = {
257
16
        "ascii",
258
        /* Aliases from Lib/encodings/aliases.py */
259
16
        "646",
260
16
        "ansi_x3.4_1968",
261
16
        "ansi_x3.4_1986",
262
16
        "ansi_x3_4_1968",
263
16
        "cp367",
264
16
        "csascii",
265
16
        "ibm367",
266
16
        "iso646_us",
267
16
        "iso_646.irv_1991",
268
16
        "iso_ir_6",
269
16
        "us",
270
16
        "us_ascii",
271
16
        NULL
272
16
    };
273
274
16
    int is_ascii = 0;
275
48
    for (const char **alias=ascii_aliases; *alias != NULL; alias++) {
276
48
        if (strcmp(encoding, *alias) == 0) {
277
16
            is_ascii = 1;
278
16
            break;
279
16
        }
280
48
    }
281
16
    if (!is_ascii) {
282
        /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
283
0
        return 0;
284
0
    }
285
286
2.06k
    for (unsigned int i=0x80; i<=0xff; i++) {
287
2.04k
        char ch[1];
288
2.04k
        wchar_t wch[1];
289
2.04k
        size_t res;
290
291
2.04k
        unsigned uch = (unsigned char)i;
292
2.04k
        ch[0] = (char)uch;
293
2.04k
        res = _Py_mbstowcs(wch, ch, 1);
294
2.04k
        if (res != DECODE_ERROR) {
295
            /* decoding a non-ASCII character from the locale encoding succeed:
296
               the locale encoding is not ASCII, force ASCII */
297
0
            return 1;
298
0
        }
299
2.04k
    }
300
    /* None of the bytes in the range 0x80-0xff can be decoded from the locale
301
       encoding: the locale encoding is really ASCII */
302
16
#endif   /* !defined(__hpux) */
303
16
    return 0;
304
#else
305
    /* nl_langinfo(CODESET) is not available: always force ASCII */
306
    return 1;
307
#endif   /* defined(HAVE_LANGINFO_H) && defined(CODESET) */
308
309
0
error:
310
    /* if an error occurred, force the ASCII encoding */
311
0
    return 1;
312
16
}
313
314
315
int
316
_Py_GetForceASCII(void)
317
16
{
318
16
    if (force_ascii == -1) {
319
16
        force_ascii = check_force_ascii();
320
16
    }
321
16
    return force_ascii;
322
16
}
323
324
325
void
326
_Py_ResetForceASCII(void)
327
32
{
328
32
    force_ascii = -1;
329
32
}
330
331
332
static int
333
encode_ascii(const wchar_t *text, char **str,
334
             size_t *error_pos, const char **reason,
335
             int raw_malloc, _Py_error_handler errors)
336
0
{
337
0
    char *result = NULL, *out;
338
0
    size_t len, i;
339
0
    wchar_t ch;
340
341
0
    int surrogateescape;
342
0
    if (get_surrogateescape(errors, &surrogateescape) < 0) {
343
0
        return -3;
344
0
    }
345
346
0
    len = wcslen(text);
347
348
    /* +1 for NULL byte */
349
0
    if (raw_malloc) {
350
0
        result = PyMem_RawMalloc(len + 1);
351
0
    }
352
0
    else {
353
0
        result = PyMem_Malloc(len + 1);
354
0
    }
355
0
    if (result == NULL) {
356
0
        return -1;
357
0
    }
358
359
0
    out = result;
360
0
    for (i=0; i<len; i++) {
361
0
        ch = text[i];
362
363
0
        if (ch <= 0x7f) {
364
            /* ASCII character */
365
0
            *out++ = (char)ch;
366
0
        }
367
0
        else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
368
            /* UTF-8b surrogate */
369
0
            *out++ = (char)(ch - 0xdc00);
370
0
        }
371
0
        else {
372
0
            if (raw_malloc) {
373
0
                PyMem_RawFree(result);
374
0
            }
375
0
            else {
376
0
                PyMem_Free(result);
377
0
            }
378
0
            if (error_pos != NULL) {
379
0
                *error_pos = i;
380
0
            }
381
0
            if (reason) {
382
0
                *reason = "encoding error";
383
0
            }
384
0
            return -2;
385
0
        }
386
0
    }
387
0
    *out = '\0';
388
0
    *str = result;
389
0
    return 0;
390
0
}
391
#else
392
int
393
_Py_GetForceASCII(void)
394
{
395
    return 0;
396
}
397
398
void
399
_Py_ResetForceASCII(void)
400
{
401
    /* nothing to do */
402
}
403
#endif   /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
404
405
406
#if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
407
static int
408
decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
409
             const char **reason, _Py_error_handler errors)
410
0
{
411
0
    wchar_t *res;
412
0
    unsigned char *in;
413
0
    wchar_t *out;
414
0
    size_t argsize = strlen(arg) + 1;
415
416
0
    int surrogateescape;
417
0
    if (get_surrogateescape(errors, &surrogateescape) < 0) {
418
0
        return -3;
419
0
    }
420
421
0
    if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
422
0
        return -1;
423
0
    }
424
0
    res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
425
0
    if (!res) {
426
0
        return -1;
427
0
    }
428
429
0
    out = res;
430
0
    for (in = (unsigned char*)arg; *in; in++) {
431
0
        unsigned char ch = *in;
432
0
        if (ch < 128) {
433
0
            *out++ = ch;
434
0
        }
435
0
        else {
436
0
            if (!surrogateescape) {
437
0
                PyMem_RawFree(res);
438
0
                if (wlen) {
439
0
                    *wlen = in - (unsigned char*)arg;
440
0
                }
441
0
                if (reason) {
442
0
                    *reason = "decoding error";
443
0
                }
444
0
                return -2;
445
0
            }
446
0
            *out++ = 0xdc00 + ch;
447
0
        }
448
0
    }
449
0
    *out = 0;
450
451
0
    if (wlen != NULL) {
452
0
        *wlen = out - res;
453
0
    }
454
0
    *wstr = res;
455
0
    return 0;
456
0
}
457
#endif   /* !HAVE_MBRTOWC */
458
459
static int
460
decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
461
                      const char **reason, _Py_error_handler errors)
462
17.4k
{
463
17.4k
    wchar_t *res;
464
17.4k
    size_t argsize;
465
17.4k
    size_t count;
466
17.4k
#ifdef HAVE_MBRTOWC
467
17.4k
    unsigned char *in;
468
17.4k
    wchar_t *out;
469
17.4k
    mbstate_t mbs;
470
17.4k
#endif
471
472
17.4k
    int surrogateescape;
473
17.4k
    if (get_surrogateescape(errors, &surrogateescape) < 0) {
474
0
        return -3;
475
0
    }
476
477
#ifdef HAVE_BROKEN_MBSTOWCS
478
    /* Some platforms have a broken implementation of
479
     * mbstowcs which does not count the characters that
480
     * would result from conversion.  Use an upper bound.
481
     */
482
    argsize = strlen(arg);
483
#else
484
17.4k
    argsize = _Py_mbstowcs(NULL, arg, 0);
485
17.4k
#endif
486
17.4k
    if (argsize != DECODE_ERROR) {
487
17.4k
        if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
488
0
            return -1;
489
0
        }
490
17.4k
        res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
491
17.4k
        if (!res) {
492
0
            return -1;
493
0
        }
494
495
17.4k
        count = _Py_mbstowcs(res, arg, argsize + 1);
496
17.4k
        if (count != DECODE_ERROR) {
497
17.4k
            *wstr = res;
498
17.4k
            if (wlen != NULL) {
499
17.4k
                *wlen = count;
500
17.4k
            }
501
17.4k
            return 0;
502
17.4k
        }
503
0
        PyMem_RawFree(res);
504
0
    }
505
506
    /* Conversion failed. Fall back to escaping with surrogateescape. */
507
0
#ifdef HAVE_MBRTOWC
508
    /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
509
510
    /* Overallocate; as multi-byte characters are in the argument, the
511
       actual output could use less memory. */
512
0
    argsize = strlen(arg) + 1;
513
0
    if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
514
0
        return -1;
515
0
    }
516
0
    res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
517
0
    if (!res) {
518
0
        return -1;
519
0
    }
520
521
0
    in = (unsigned char*)arg;
522
0
    out = res;
523
0
    memset(&mbs, 0, sizeof mbs);
524
0
    while (argsize) {
525
0
        size_t converted = _Py_mbrtowc(out, (char*)in, argsize, &mbs);
526
0
        if (converted == 0) {
527
            /* Reached end of string; null char stored. */
528
0
            break;
529
0
        }
530
531
0
        if (converted == DECODE_ERROR || converted == INCOMPLETE_CHARACTER) {
532
0
            if (!surrogateescape) {
533
0
                goto decode_error;
534
0
            }
535
536
            /* Decoding error. Escape as UTF-8b, and start over in the initial
537
               shift state. */
538
0
            *out++ = 0xdc00 + *in++;
539
0
            argsize--;
540
0
            memset(&mbs, 0, sizeof mbs);
541
0
            continue;
542
0
        }
543
544
        // _Py_mbrtowc() reject lone surrogate characters
545
0
        assert(!Py_UNICODE_IS_SURROGATE(*out));
546
547
        /* successfully converted some bytes */
548
0
        in += converted;
549
0
        argsize -= converted;
550
0
        out++;
551
0
    }
552
0
    if (wlen != NULL) {
553
0
        *wlen = out - res;
554
0
    }
555
0
    *wstr = res;
556
0
    return 0;
557
558
0
decode_error:
559
0
    PyMem_RawFree(res);
560
0
    if (wlen) {
561
0
        *wlen = in - (unsigned char*)arg;
562
0
    }
563
0
    if (reason) {
564
0
        *reason = "decoding error";
565
0
    }
566
0
    return -2;
567
#else   /* HAVE_MBRTOWC */
568
    /* Cannot use C locale for escaping; manually escape as if charset
569
       is ASCII (i.e. escape all bytes > 128. This will still roundtrip
570
       correctly in the locale's charset, which must be an ASCII superset. */
571
    return decode_ascii(arg, wstr, wlen, reason, errors);
572
#endif   /* HAVE_MBRTOWC */
573
0
}
574
575
576
/* Decode a byte string from the locale encoding.
577
578
   Use the strict error handler if 'surrogateescape' is zero.  Use the
579
   surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
580
   bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
581
   can be decoded as a surrogate character, escape the bytes using the
582
   surrogateescape error handler instead of decoding them.
583
584
   On success, return 0 and write the newly allocated wide character string into
585
   *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
586
   the number of wide characters excluding the null character into *wlen.
587
588
   On memory allocation failure, return -1.
589
590
   On decoding error, return -2. If wlen is not NULL, write the start of
591
   invalid byte sequence in the input string into *wlen. If reason is not NULL,
592
   write the decoding error message into *reason.
593
594
   Return -3 if the error handler 'errors' is not supported.
595
596
   Use the Py_EncodeLocaleEx() function to encode the character string back to
597
   a byte string. */
598
int
599
_Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
600
                   const char **reason,
601
                   int current_locale, _Py_error_handler errors)
602
17.4k
{
603
17.4k
    if (current_locale) {
604
#ifdef _Py_FORCE_UTF8_LOCALE
605
        return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
606
                                errors);
607
#else
608
12.2k
        return decode_current_locale(arg, wstr, wlen, reason, errors);
609
12.2k
#endif
610
12.2k
    }
611
612
#ifdef _Py_FORCE_UTF8_FS_ENCODING
613
    return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
614
                            errors);
615
#else
616
5.23k
    int use_utf8 = (_PyRuntime.preconfig.utf8_mode >= 1);
617
#ifdef MS_WINDOWS
618
    use_utf8 |= (_PyRuntime.preconfig.legacy_windows_fs_encoding == 0);
619
#endif
620
5.23k
    if (use_utf8) {
621
0
        return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
622
0
                                errors);
623
0
    }
624
625
5.23k
#ifdef USE_FORCE_ASCII
626
5.23k
    if (force_ascii == -1) {
627
0
        force_ascii = check_force_ascii();
628
0
    }
629
630
5.23k
    if (force_ascii) {
631
        /* force ASCII encoding to workaround mbstowcs() issue */
632
0
        return decode_ascii(arg, wstr, wlen, reason, errors);
633
0
    }
634
5.23k
#endif
635
636
5.23k
    return decode_current_locale(arg, wstr, wlen, reason, errors);
637
5.23k
#endif   /* !_Py_FORCE_UTF8_FS_ENCODING */
638
5.23k
}
639
640
641
/* Decode a byte string from the locale encoding with the
642
   surrogateescape error handler: undecodable bytes are decoded as characters
643
   in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
644
   character, escape the bytes using the surrogateescape error handler instead
645
   of decoding them.
646
647
   Return a pointer to a newly allocated wide character string, use
648
   PyMem_RawFree() to free the memory. If size is not NULL, write the number of
649
   wide characters excluding the null character into *size
650
651
   Return NULL on decoding error or memory allocation error. If *size* is not
652
   NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
653
   decoding error.
654
655
   Decoding errors should never happen, unless there is a bug in the C
656
   library.
657
658
   Use the Py_EncodeLocale() function to encode the character string back to a
659
   byte string. */
660
wchar_t*
661
Py_DecodeLocale(const char* arg, size_t *wlen)
662
112
{
663
112
    wchar_t *wstr;
664
112
    int res = _Py_DecodeLocaleEx(arg, &wstr, wlen,
665
112
                                 NULL, 0,
666
112
                                 _Py_ERROR_SURROGATEESCAPE);
667
112
    if (res != 0) {
668
0
        assert(res != -3);
669
0
        if (wlen != NULL) {
670
0
            *wlen = (size_t)res;
671
0
        }
672
0
        return NULL;
673
0
    }
674
112
    return wstr;
675
112
}
676
677
678
static int
679
encode_current_locale(const wchar_t *text, char **str,
680
                      size_t *error_pos, const char **reason,
681
                      int raw_malloc, _Py_error_handler errors)
682
568
{
683
568
    const size_t len = wcslen(text);
684
568
    char *result = NULL, *bytes = NULL;
685
568
    size_t i, size, converted;
686
568
    wchar_t c, buf[2];
687
688
568
    int surrogateescape;
689
568
    if (get_surrogateescape(errors, &surrogateescape) < 0) {
690
0
        return -3;
691
0
    }
692
693
    /* The function works in two steps:
694
       1. compute the length of the output buffer in bytes (size)
695
       2. outputs the bytes */
696
568
    size = 0;
697
568
    buf[1] = 0;
698
1.13k
    while (1) {
699
82.1k
        for (i=0; i < len; i++) {
700
81.0k
            c = text[i];
701
81.0k
            if (c >= 0xdc80 && c <= 0xdcff) {
702
0
                if (!surrogateescape) {
703
0
                    goto encode_error;
704
0
                }
705
                /* UTF-8b surrogate */
706
0
                if (bytes != NULL) {
707
0
                    *bytes++ = c - 0xdc00;
708
0
                    size--;
709
0
                }
710
0
                else {
711
0
                    size++;
712
0
                }
713
0
                continue;
714
0
            }
715
81.0k
            else {
716
81.0k
                buf[0] = c;
717
81.0k
                if (bytes != NULL) {
718
40.5k
                    converted = wcstombs(bytes, buf, size);
719
40.5k
                }
720
40.5k
                else {
721
40.5k
                    converted = wcstombs(NULL, buf, 0);
722
40.5k
                }
723
81.0k
                if (converted == DECODE_ERROR) {
724
0
                    goto encode_error;
725
0
                }
726
81.0k
                if (bytes != NULL) {
727
40.5k
                    bytes += converted;
728
40.5k
                    size -= converted;
729
40.5k
                }
730
40.5k
                else {
731
40.5k
                    size += converted;
732
40.5k
                }
733
81.0k
            }
734
81.0k
        }
735
1.13k
        if (result != NULL) {
736
568
            *bytes = '\0';
737
568
            break;
738
568
        }
739
740
568
        size += 1; /* nul byte at the end */
741
568
        if (raw_malloc) {
742
568
            result = PyMem_RawMalloc(size);
743
568
        }
744
0
        else {
745
0
            result = PyMem_Malloc(size);
746
0
        }
747
568
        if (result == NULL) {
748
0
            return -1;
749
0
        }
750
568
        bytes = result;
751
568
    }
752
568
    *str = result;
753
568
    return 0;
754
755
0
encode_error:
756
0
    if (raw_malloc) {
757
0
        PyMem_RawFree(result);
758
0
    }
759
0
    else {
760
0
        PyMem_Free(result);
761
0
    }
762
0
    if (error_pos != NULL) {
763
0
        *error_pos = i;
764
0
    }
765
0
    if (reason) {
766
0
        *reason = "encoding error";
767
0
    }
768
0
    return -2;
769
568
}
770
771
772
/* Encode a string to the locale encoding.
773
774
   Parameters:
775
776
   * raw_malloc: if non-zero, allocate memory using PyMem_RawMalloc() instead
777
     of PyMem_Malloc().
778
   * current_locale: if non-zero, use the current LC_CTYPE, otherwise use
779
     Python filesystem encoding.
780
   * errors: error handler like "strict" or "surrogateescape".
781
782
   Return value:
783
784
    0: success, *str is set to a newly allocated decoded string.
785
   -1: memory allocation failure
786
   -2: encoding error, set *error_pos and *reason (if set).
787
   -3: the error handler 'errors' is not supported.
788
 */
789
static int
790
encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
791
                 const char **reason,
792
                 int raw_malloc, int current_locale, _Py_error_handler errors)
793
568
{
794
568
    if (current_locale) {
795
#ifdef _Py_FORCE_UTF8_LOCALE
796
        return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
797
                                raw_malloc, errors);
798
#else
799
0
        return encode_current_locale(text, str, error_pos, reason,
800
0
                                     raw_malloc, errors);
801
0
#endif
802
0
    }
803
804
#ifdef _Py_FORCE_UTF8_FS_ENCODING
805
    return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
806
                            raw_malloc, errors);
807
#else
808
568
    int use_utf8 = (_PyRuntime.preconfig.utf8_mode >= 1);
809
#ifdef MS_WINDOWS
810
    use_utf8 |= (_PyRuntime.preconfig.legacy_windows_fs_encoding == 0);
811
#endif
812
568
    if (use_utf8) {
813
0
        return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
814
0
                                raw_malloc, errors);
815
0
    }
816
817
568
#ifdef USE_FORCE_ASCII
818
568
    if (force_ascii == -1) {
819
0
        force_ascii = check_force_ascii();
820
0
    }
821
822
568
    if (force_ascii) {
823
0
        return encode_ascii(text, str, error_pos, reason,
824
0
                            raw_malloc, errors);
825
0
    }
826
568
#endif
827
828
568
    return encode_current_locale(text, str, error_pos, reason,
829
568
                                 raw_malloc, errors);
830
568
#endif   /* _Py_FORCE_UTF8_FS_ENCODING */
831
568
}
832
833
static char*
834
encode_locale(const wchar_t *text, size_t *error_pos,
835
              int raw_malloc, int current_locale)
836
160
{
837
160
    char *str;
838
160
    int res = encode_locale_ex(text, &str, error_pos, NULL,
839
160
                               raw_malloc, current_locale,
840
160
                               _Py_ERROR_SURROGATEESCAPE);
841
160
    if (res != -2 && error_pos) {
842
0
        *error_pos = (size_t)-1;
843
0
    }
844
160
    if (res != 0) {
845
0
        return NULL;
846
0
    }
847
160
    return str;
848
160
}
849
850
/* Encode a wide character string to the locale encoding with the
851
   surrogateescape error handler: surrogate characters in the range
852
   U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
853
854
   Return a pointer to a newly allocated byte string, use PyMem_Free() to free
855
   the memory. Return NULL on encoding or memory allocation error.
856
857
   If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
858
   to the index of the invalid character on encoding error.
859
860
   Use the Py_DecodeLocale() function to decode the bytes string back to a wide
861
   character string. */
862
char*
863
Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
864
0
{
865
0
    return encode_locale(text, error_pos, 0, 0);
866
0
}
867
868
869
/* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
870
   instead of PyMem_Free(). */
871
char*
872
_Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
873
160
{
874
160
    return encode_locale(text, error_pos, 1, 0);
875
160
}
876
877
878
int
879
_Py_EncodeLocaleEx(const wchar_t *text, char **str,
880
                   size_t *error_pos, const char **reason,
881
                   int current_locale, _Py_error_handler errors)
882
408
{
883
408
    return encode_locale_ex(text, str, error_pos, reason, 1,
884
408
                            current_locale, errors);
885
408
}
886
887
888
// Get the current locale encoding name:
889
//
890
// - Return "utf-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
891
// - Return "utf-8" if the UTF-8 Mode is enabled
892
// - On Windows, return the ANSI code page (ex: "cp1250")
893
// - Return "utf-8" if nl_langinfo(CODESET) returns an empty string.
894
// - Otherwise, return nl_langinfo(CODESET).
895
//
896
// Return NULL on memory allocation failure.
897
//
898
// See also config_get_locale_encoding()
899
wchar_t*
900
_Py_GetLocaleEncoding(void)
901
32
{
902
#ifdef _Py_FORCE_UTF8_LOCALE
903
    // On Android langinfo.h and CODESET are missing,
904
    // and UTF-8 is always used in mbstowcs() and wcstombs().
905
    return _PyMem_RawWcsdup(L"utf-8");
906
#else
907
908
#ifdef MS_WINDOWS
909
    wchar_t encoding[23];
910
    unsigned int ansi_codepage = GetACP();
911
    swprintf(encoding, Py_ARRAY_LENGTH(encoding), L"cp%u", ansi_codepage);
912
    encoding[Py_ARRAY_LENGTH(encoding) - 1] = 0;
913
    return _PyMem_RawWcsdup(encoding);
914
#else
915
32
    const char *encoding = nl_langinfo(CODESET);
916
32
    if (!encoding || encoding[0] == '\0') {
917
        // Use UTF-8 if nl_langinfo() returns an empty string. It can happen on
918
        // macOS if the LC_CTYPE locale is not supported.
919
0
        return _PyMem_RawWcsdup(L"utf-8");
920
0
    }
921
922
32
    wchar_t *wstr;
923
32
    int res = decode_current_locale(encoding, &wstr, NULL,
924
32
                                    NULL, _Py_ERROR_SURROGATEESCAPE);
925
32
    if (res < 0) {
926
0
        return NULL;
927
0
    }
928
32
    return wstr;
929
32
#endif  // !MS_WINDOWS
930
931
32
#endif  // !_Py_FORCE_UTF8_LOCALE
932
32
}
933
934
935
PyObject *
936
_Py_GetLocaleEncodingObject(void)
937
0
{
938
0
    wchar_t *encoding = _Py_GetLocaleEncoding();
939
0
    if (encoding == NULL) {
940
0
        PyErr_NoMemory();
941
0
        return NULL;
942
0
    }
943
944
0
    PyObject *str = PyUnicode_FromWideChar(encoding, -1);
945
0
    PyMem_RawFree(encoding);
946
0
    return str;
947
0
}
948
949
#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
950
951
/* Check whether current locale uses Unicode as internal wchar_t form. */
952
int
953
_Py_LocaleUsesNonUnicodeWchar(void)
954
{
955
    /* Oracle Solaris uses non-Unicode internal wchar_t form for
956
       non-Unicode locales and hence needs conversion to UTF first. */
957
    char* codeset = nl_langinfo(CODESET);
958
    if (!codeset) {
959
        return 0;
960
    }
961
    /* 646 refers to ISO/IEC 646 standard that corresponds to ASCII encoding */
962
    return (strcmp(codeset, "UTF-8") != 0 && strcmp(codeset, "646") != 0);
963
}
964
965
static wchar_t *
966
_Py_ConvertWCharForm(const wchar_t *source, Py_ssize_t size,
967
                     const char *tocode, const char *fromcode)
968
{
969
    static_assert(sizeof(wchar_t) == 4, "wchar_t must be 32-bit");
970
971
    /* Ensure we won't overflow the size. */
972
    if (size > (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t))) {
973
        PyErr_NoMemory();
974
        return NULL;
975
    }
976
977
    /* the string doesn't have to be NULL terminated */
978
    wchar_t* target = PyMem_Malloc(size * sizeof(wchar_t));
979
    if (target == NULL) {
980
        PyErr_NoMemory();
981
        return NULL;
982
    }
983
984
    iconv_t cd = iconv_open(tocode, fromcode);
985
    if (cd == (iconv_t)-1) {
986
        PyErr_Format(PyExc_ValueError, "iconv_open() failed");
987
        PyMem_Free(target);
988
        return NULL;
989
    }
990
991
    char *inbuf = (char *) source;
992
    char *outbuf = (char *) target;
993
    size_t inbytesleft = sizeof(wchar_t) * size;
994
    size_t outbytesleft = inbytesleft;
995
996
    size_t ret = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
997
    if (ret == DECODE_ERROR) {
998
        PyErr_Format(PyExc_ValueError, "iconv() failed");
999
        PyMem_Free(target);
1000
        iconv_close(cd);
1001
        return NULL;
1002
    }
1003
1004
    iconv_close(cd);
1005
    return target;
1006
}
1007
1008
/* Convert a wide character string to the UCS-4 encoded string. This
1009
   is necessary on systems where internal form of wchar_t are not Unicode
1010
   code points (e.g. Oracle Solaris).
1011
1012
   Return a pointer to a newly allocated string, use PyMem_Free() to free
1013
   the memory. Return NULL and raise exception on conversion or memory
1014
   allocation error. */
1015
wchar_t *
1016
_Py_DecodeNonUnicodeWchar(const wchar_t *native, Py_ssize_t size)
1017
{
1018
    return _Py_ConvertWCharForm(native, size, "UCS-4-INTERNAL", "wchar_t");
1019
}
1020
1021
/* Convert a UCS-4 encoded string to native wide character string. This
1022
   is necessary on systems where internal form of wchar_t are not Unicode
1023
   code points (e.g. Oracle Solaris).
1024
1025
   The conversion is done in place. This can be done because both wchar_t
1026
   and UCS-4 use 4-byte encoding, and one wchar_t symbol always correspond
1027
   to a single UCS-4 symbol and vice versa. (This is true for Oracle Solaris,
1028
   which is currently the only system using these functions; it doesn't have
1029
   to be for other systems).
1030
1031
   Return 0 on success. Return -1 and raise exception on conversion
1032
   or memory allocation error. */
1033
int
1034
_Py_EncodeNonUnicodeWchar_InPlace(wchar_t *unicode, Py_ssize_t size)
1035
{
1036
    wchar_t* result = _Py_ConvertWCharForm(unicode, size, "wchar_t", "UCS-4-INTERNAL");
1037
    if (!result) {
1038
        return -1;
1039
    }
1040
    memcpy(unicode, result, size * sizeof(wchar_t));
1041
    PyMem_Free(result);
1042
    return 0;
1043
}
1044
#endif /* HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION */
1045
1046
#ifdef MS_WINDOWS
1047
static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
1048
1049
static void
1050
FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
1051
{
1052
    /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
1053
    /* Cannot simply cast and dereference in_ptr,
1054
       since it might not be aligned properly */
1055
    __int64 in;
1056
    memcpy(&in, in_ptr, sizeof(in));
1057
    *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
1058
    *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
1059
}
1060
1061
static void
1062
LARGE_INTEGER_to_time_t_nsec(LARGE_INTEGER *in_ptr, time_t *time_out, int* nsec_out)
1063
{
1064
    *nsec_out = (int)(in_ptr->QuadPart % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
1065
    *time_out = Py_SAFE_DOWNCAST((in_ptr->QuadPart / 10000000) - secs_between_epochs, __int64, time_t);
1066
}
1067
1068
void
1069
_Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
1070
{
1071
    /* XXX endianness */
1072
    __int64 out;
1073
    out = time_in + secs_between_epochs;
1074
    out = out * 10000000 + nsec_in / 100;
1075
    memcpy(out_ptr, &out, sizeof(out));
1076
}
1077
1078
/* Below, we *know* that ugo+r is 0444 */
1079
#if _S_IREAD != 0400
1080
#error Unsupported C library
1081
#endif
1082
static int
1083
attributes_to_mode(DWORD attr)
1084
{
1085
    int m = 0;
1086
    if (attr & FILE_ATTRIBUTE_DIRECTORY)
1087
        m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
1088
    else
1089
        m |= _S_IFREG;
1090
    if (attr & FILE_ATTRIBUTE_READONLY)
1091
        m |= 0444;
1092
    else
1093
        m |= 0666;
1094
    return m;
1095
}
1096
1097
1098
typedef union {
1099
    FILE_ID_128 id;
1100
    struct {
1101
        uint64_t st_ino;
1102
        uint64_t st_ino_high;
1103
    };
1104
} id_128_to_ino;
1105
1106
1107
void
1108
_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
1109
                           FILE_BASIC_INFO *basic_info, FILE_ID_INFO *id_info,
1110
                           struct _Py_stat_struct *result)
1111
{
1112
    memset(result, 0, sizeof(*result));
1113
    result->st_mode = attributes_to_mode(info->dwFileAttributes);
1114
    result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
1115
    result->st_dev = id_info ? id_info->VolumeSerialNumber : info->dwVolumeSerialNumber;
1116
    result->st_rdev = 0;
1117
    /* st_ctime is deprecated, but we preserve the legacy value in our caller, not here */
1118
    if (basic_info) {
1119
        LARGE_INTEGER_to_time_t_nsec(&basic_info->CreationTime, &result->st_birthtime, &result->st_birthtime_nsec);
1120
        LARGE_INTEGER_to_time_t_nsec(&basic_info->ChangeTime, &result->st_ctime, &result->st_ctime_nsec);
1121
        LARGE_INTEGER_to_time_t_nsec(&basic_info->LastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
1122
        LARGE_INTEGER_to_time_t_nsec(&basic_info->LastAccessTime, &result->st_atime, &result->st_atime_nsec);
1123
    } else {
1124
        FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_birthtime, &result->st_birthtime_nsec);
1125
        FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
1126
        FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
1127
    }
1128
    result->st_nlink = info->nNumberOfLinks;
1129
1130
    if (id_info) {
1131
        id_128_to_ino file_id;
1132
        file_id.id = id_info->FileId;
1133
        result->st_ino = file_id.st_ino;
1134
        result->st_ino_high = file_id.st_ino_high;
1135
    }
1136
    if (!result->st_ino && !result->st_ino_high) {
1137
        /* should only occur for DirEntry_from_find_data, in which case the
1138
           index is likely to be zero anyway. */
1139
        result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
1140
    }
1141
1142
    /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
1143
       open other name surrogate reparse points without traversing them. To
1144
       detect/handle these, check st_file_attributes and st_reparse_tag. */
1145
    result->st_reparse_tag = reparse_tag;
1146
    if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
1147
        reparse_tag == IO_REPARSE_TAG_SYMLINK) {
1148
        /* set the bits that make this a symlink */
1149
        result->st_mode = (result->st_mode & ~S_IFMT) | S_IFLNK;
1150
    }
1151
    result->st_file_attributes = info->dwFileAttributes;
1152
}
1153
1154
void
1155
_Py_stat_basic_info_to_stat(FILE_STAT_BASIC_INFORMATION *info,
1156
                            struct _Py_stat_struct *result)
1157
{
1158
    memset(result, 0, sizeof(*result));
1159
    result->st_mode = attributes_to_mode(info->FileAttributes);
1160
    result->st_size = info->EndOfFile.QuadPart;
1161
    LARGE_INTEGER_to_time_t_nsec(&info->CreationTime, &result->st_birthtime, &result->st_birthtime_nsec);
1162
    LARGE_INTEGER_to_time_t_nsec(&info->ChangeTime, &result->st_ctime, &result->st_ctime_nsec);
1163
    LARGE_INTEGER_to_time_t_nsec(&info->LastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
1164
    LARGE_INTEGER_to_time_t_nsec(&info->LastAccessTime, &result->st_atime, &result->st_atime_nsec);
1165
    result->st_nlink = info->NumberOfLinks;
1166
    result->st_dev = info->VolumeSerialNumber.QuadPart;
1167
    /* File systems with less than 128-bits zero pad into this field */
1168
    id_128_to_ino file_id;
1169
    file_id.id = info->FileId128;
1170
    result->st_ino = file_id.st_ino;
1171
    result->st_ino_high = file_id.st_ino_high;
1172
    /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
1173
       open other name surrogate reparse points without traversing them. To
1174
       detect/handle these, check st_file_attributes and st_reparse_tag. */
1175
    result->st_reparse_tag = info->ReparseTag;
1176
    if (info->FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
1177
        info->ReparseTag == IO_REPARSE_TAG_SYMLINK) {
1178
        /* set the bits that make this a symlink */
1179
        result->st_mode = (result->st_mode & ~S_IFMT) | S_IFLNK;
1180
    }
1181
    result->st_file_attributes = info->FileAttributes;
1182
    switch (info->DeviceType) {
1183
    case FILE_DEVICE_DISK:
1184
    case FILE_DEVICE_VIRTUAL_DISK:
1185
    case FILE_DEVICE_DFS:
1186
    case FILE_DEVICE_CD_ROM:
1187
    case FILE_DEVICE_CONTROLLER:
1188
    case FILE_DEVICE_DATALINK:
1189
        break;
1190
    case FILE_DEVICE_DISK_FILE_SYSTEM:
1191
    case FILE_DEVICE_CD_ROM_FILE_SYSTEM:
1192
    case FILE_DEVICE_NETWORK_FILE_SYSTEM:
1193
        result->st_mode = (result->st_mode & ~S_IFMT) | 0x6000; /* _S_IFBLK */
1194
        break;
1195
    case FILE_DEVICE_CONSOLE:
1196
    case FILE_DEVICE_NULL:
1197
    case FILE_DEVICE_KEYBOARD:
1198
    case FILE_DEVICE_MODEM:
1199
    case FILE_DEVICE_MOUSE:
1200
    case FILE_DEVICE_PARALLEL_PORT:
1201
    case FILE_DEVICE_PRINTER:
1202
    case FILE_DEVICE_SCREEN:
1203
    case FILE_DEVICE_SERIAL_PORT:
1204
    case FILE_DEVICE_SOUND:
1205
        result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFCHR;
1206
        break;
1207
    case FILE_DEVICE_NAMED_PIPE:
1208
        result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFIFO;
1209
        break;
1210
    default:
1211
        if (info->FileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
1212
            result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFDIR;
1213
        }
1214
        break;
1215
    }
1216
}
1217
1218
#endif
1219
1220
/* Return information about a file.
1221
1222
   On POSIX, use fstat().
1223
1224
   On Windows, use GetFileType() and GetFileInformationByHandle() which support
1225
   files larger than 2 GiB.  fstat() may fail with EOVERFLOW on files larger
1226
   than 2 GiB because the file size type is a signed 32-bit integer: see issue
1227
   #23152.
1228
1229
   On Windows, set the last Windows error and return nonzero on error. On
1230
   POSIX, set errno and return nonzero on error. Fill status and return 0 on
1231
   success. */
1232
int
1233
_Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
1234
1.22k
{
1235
#ifdef MS_WINDOWS
1236
    BY_HANDLE_FILE_INFORMATION info;
1237
    FILE_BASIC_INFO basicInfo;
1238
    FILE_ID_INFO idInfo;
1239
    FILE_ID_INFO *pIdInfo = &idInfo;
1240
    HANDLE h;
1241
    int type;
1242
1243
    h = _Py_get_osfhandle_noraise(fd);
1244
1245
    if (h == INVALID_HANDLE_VALUE) {
1246
        /* errno is already set by _get_osfhandle, but we also set
1247
           the Win32 error for callers who expect that */
1248
        SetLastError(ERROR_INVALID_HANDLE);
1249
        return -1;
1250
    }
1251
    memset(status, 0, sizeof(*status));
1252
1253
    type = GetFileType(h);
1254
    if (type == FILE_TYPE_UNKNOWN) {
1255
        DWORD error = GetLastError();
1256
        if (error != 0) {
1257
            errno = winerror_to_errno(error);
1258
            return -1;
1259
        }
1260
        /* else: valid but unknown file */
1261
    }
1262
1263
    if (type != FILE_TYPE_DISK) {
1264
        if (type == FILE_TYPE_CHAR)
1265
            status->st_mode = _S_IFCHR;
1266
        else if (type == FILE_TYPE_PIPE)
1267
            status->st_mode = _S_IFIFO;
1268
        return 0;
1269
    }
1270
1271
    if (!GetFileInformationByHandle(h, &info) ||
1272
        !GetFileInformationByHandleEx(h, FileBasicInfo, &basicInfo, sizeof(basicInfo))) {
1273
        /* The Win32 error is already set, but we also set errno for
1274
           callers who expect it */
1275
        errno = winerror_to_errno(GetLastError());
1276
        return -1;
1277
    }
1278
1279
    if (!GetFileInformationByHandleEx(h, FileIdInfo, &idInfo, sizeof(idInfo))) {
1280
        /* Failed to get FileIdInfo, so do not pass it along */
1281
        pIdInfo = NULL;
1282
    }
1283
1284
    _Py_attribute_data_to_stat(&info, 0, &basicInfo, pIdInfo, status);
1285
    return 0;
1286
#else
1287
1.22k
    return fstat(fd, status);
1288
1.22k
#endif
1289
1.22k
}
1290
1291
/* Return information about a file.
1292
1293
   On POSIX, use fstat().
1294
1295
   On Windows, use GetFileType() and GetFileInformationByHandle() which support
1296
   files larger than 2 GiB.  fstat() may fail with EOVERFLOW on files larger
1297
   than 2 GiB because the file size type is a signed 32-bit integer: see issue
1298
   #23152.
1299
1300
   Raise an exception and return -1 on error. On Windows, set the last Windows
1301
   error on error. On POSIX, set errno on error. Fill status and return 0 on
1302
   success.
1303
1304
   Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
1305
   to call fstat(). The caller must hold the GIL. */
1306
int
1307
_Py_fstat(int fd, struct _Py_stat_struct *status)
1308
0
{
1309
0
    int res;
1310
1311
0
    _Py_AssertHoldsTstate();
1312
1313
0
    Py_BEGIN_ALLOW_THREADS
1314
0
    res = _Py_fstat_noraise(fd, status);
1315
0
    Py_END_ALLOW_THREADS
1316
1317
0
    if (res != 0) {
1318
#ifdef MS_WINDOWS
1319
        PyErr_SetFromWindowsErr(0);
1320
#else
1321
0
        PyErr_SetFromErrno(PyExc_OSError);
1322
0
#endif
1323
0
        return -1;
1324
0
    }
1325
0
    return 0;
1326
0
}
1327
1328
/* Like _Py_stat() but with a raw filename. */
1329
int
1330
_Py_wstat(const wchar_t* path, struct stat *buf)
1331
48
{
1332
48
    int err;
1333
#ifdef MS_WINDOWS
1334
    struct _stat wstatbuf;
1335
    err = _wstat(path, &wstatbuf);
1336
    if (!err) {
1337
        buf->st_mode = wstatbuf.st_mode;
1338
    }
1339
#else
1340
48
    char *fname;
1341
48
    fname = _Py_EncodeLocaleRaw(path, NULL);
1342
48
    if (fname == NULL) {
1343
0
        errno = EINVAL;
1344
0
        return -1;
1345
0
    }
1346
48
    err = stat(fname, buf);
1347
48
    PyMem_RawFree(fname);
1348
48
#endif
1349
48
    return err;
1350
48
}
1351
1352
1353
/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
1354
   call stat() otherwise. Only fill st_mode attribute on Windows.
1355
1356
   Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
1357
   raised. */
1358
1359
int
1360
_Py_stat(PyObject *path, struct stat *statbuf)
1361
0
{
1362
#ifdef MS_WINDOWS
1363
    int err;
1364
1365
    wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1366
    if (wpath == NULL)
1367
        return -2;
1368
1369
    err = _Py_wstat(wpath, statbuf);
1370
    PyMem_Free(wpath);
1371
    return err;
1372
#else
1373
0
    int ret;
1374
0
    PyObject *bytes;
1375
0
    char *cpath;
1376
1377
0
    bytes = PyUnicode_EncodeFSDefault(path);
1378
0
    if (bytes == NULL)
1379
0
        return -2;
1380
1381
    /* check for embedded null bytes */
1382
0
    if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
1383
0
        Py_DECREF(bytes);
1384
0
        return -2;
1385
0
    }
1386
1387
0
    ret = stat(cpath, statbuf);
1388
0
    Py_DECREF(bytes);
1389
0
    return ret;
1390
0
#endif
1391
0
}
1392
1393
#ifdef MS_WINDOWS
1394
// For some Windows API partitions, SetHandleInformation() is declared
1395
// but none of the handle flags are defined.
1396
#ifndef HANDLE_FLAG_INHERIT
1397
#define HANDLE_FLAG_INHERIT 0x00000001
1398
#endif
1399
#endif
1400
1401
/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1402
static int
1403
get_inheritable(int fd, int raise)
1404
16
{
1405
#ifdef MS_WINDOWS
1406
    HANDLE handle;
1407
    DWORD flags;
1408
1409
    handle = _Py_get_osfhandle_noraise(fd);
1410
    if (handle == INVALID_HANDLE_VALUE) {
1411
        if (raise)
1412
            PyErr_SetFromErrno(PyExc_OSError);
1413
        return -1;
1414
    }
1415
1416
    if (!GetHandleInformation(handle, &flags)) {
1417
        if (raise)
1418
            PyErr_SetFromWindowsErr(0);
1419
        return -1;
1420
    }
1421
1422
    return (flags & HANDLE_FLAG_INHERIT);
1423
#else
1424
16
    int flags;
1425
1426
16
    flags = fcntl(fd, F_GETFD, 0);
1427
16
    if (flags == -1) {
1428
0
        if (raise)
1429
0
            PyErr_SetFromErrno(PyExc_OSError);
1430
0
        return -1;
1431
0
    }
1432
16
    return !(flags & FD_CLOEXEC);
1433
16
#endif
1434
16
}
1435
1436
/* Get the inheritable flag of the specified file descriptor.
1437
   Return 1 if the file descriptor can be inherited, 0 if it cannot,
1438
   raise an exception and return -1 on error. */
1439
int
1440
_Py_get_inheritable(int fd)
1441
0
{
1442
0
    return get_inheritable(fd, 1);
1443
0
}
1444
1445
1446
/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1447
static int
1448
set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
1449
1.16k
{
1450
#ifdef MS_WINDOWS
1451
    HANDLE handle;
1452
    DWORD flags;
1453
#else
1454
1.16k
#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1455
1.16k
    static int ioctl_works = -1;
1456
1.16k
    int request;
1457
1.16k
    int err;
1458
1.16k
#endif
1459
1.16k
    int flags, new_flags;
1460
1.16k
    int res;
1461
1.16k
#endif
1462
1463
    /* atomic_flag_works can only be used to make the file descriptor
1464
       non-inheritable */
1465
1.16k
    assert(!(atomic_flag_works != NULL && inheritable));
1466
1467
1.16k
    if (atomic_flag_works != NULL && !inheritable) {
1468
1.16k
        if (_Py_atomic_load_int_relaxed(atomic_flag_works) == -1) {
1469
16
            int isInheritable = get_inheritable(fd, raise);
1470
16
            if (isInheritable == -1)
1471
0
                return -1;
1472
16
            _Py_atomic_store_int_relaxed(atomic_flag_works, !isInheritable);
1473
16
        }
1474
1475
1.16k
        if (_Py_atomic_load_int_relaxed(atomic_flag_works))
1476
1.16k
            return 0;
1477
1.16k
    }
1478
1479
#ifdef MS_WINDOWS
1480
    handle = _Py_get_osfhandle_noraise(fd);
1481
    if (handle == INVALID_HANDLE_VALUE) {
1482
        if (raise)
1483
            PyErr_SetFromErrno(PyExc_OSError);
1484
        return -1;
1485
    }
1486
1487
    if (inheritable)
1488
        flags = HANDLE_FLAG_INHERIT;
1489
    else
1490
        flags = 0;
1491
1492
    if (!SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
1493
        if (raise)
1494
            PyErr_SetFromWindowsErr(0);
1495
        return -1;
1496
    }
1497
    return 0;
1498
1499
#else
1500
1501
0
#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1502
0
    if (raise != 0 && _Py_atomic_load_int_relaxed(&ioctl_works) != 0) {
1503
        /* fast-path: ioctl() only requires one syscall */
1504
        /* caveat: raise=0 is an indicator that we must be async-signal-safe
1505
         * thus avoid using ioctl() so we skip the fast-path. */
1506
0
        if (inheritable)
1507
0
            request = FIONCLEX;
1508
0
        else
1509
0
            request = FIOCLEX;
1510
0
        err = ioctl(fd, request, NULL);
1511
0
        if (!err) {
1512
0
            if (_Py_atomic_load_int_relaxed(&ioctl_works) == -1) {
1513
0
                _Py_atomic_store_int_relaxed(&ioctl_works, 1);
1514
0
            }
1515
0
            return 0;
1516
0
        }
1517
1518
0
#ifdef O_PATH
1519
0
        if (errno == EBADF) {
1520
            // bpo-44849: On Linux and FreeBSD, ioctl(FIOCLEX) fails with EBADF
1521
            // on O_PATH file descriptors. Fall through to the fcntl()
1522
            // implementation.
1523
0
        }
1524
0
        else
1525
0
#endif
1526
0
        if (errno != ENOTTY && errno != EACCES) {
1527
0
            if (raise)
1528
0
                PyErr_SetFromErrno(PyExc_OSError);
1529
0
            return -1;
1530
0
        }
1531
0
        else {
1532
            /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1533
               device". The ioctl is declared but not supported by the kernel.
1534
               Remember that ioctl() doesn't work. It is the case on
1535
               Illumos-based OS for example.
1536
1537
               Issue #27057: When SELinux policy disallows ioctl it will fail
1538
               with EACCES. While FIOCLEX is safe operation it may be
1539
               unavailable because ioctl was denied altogether.
1540
               This can be the case on Android. */
1541
0
            _Py_atomic_store_int_relaxed(&ioctl_works, 0);
1542
0
        }
1543
        /* fallback to fcntl() if ioctl() does not work */
1544
0
    }
1545
0
#endif
1546
1547
    /* slow-path: fcntl() requires two syscalls */
1548
0
    flags = fcntl(fd, F_GETFD);
1549
0
    if (flags < 0) {
1550
0
        if (raise)
1551
0
            PyErr_SetFromErrno(PyExc_OSError);
1552
0
        return -1;
1553
0
    }
1554
1555
0
    if (inheritable) {
1556
0
        new_flags = flags & ~FD_CLOEXEC;
1557
0
    }
1558
0
    else {
1559
0
        new_flags = flags | FD_CLOEXEC;
1560
0
    }
1561
1562
0
    if (new_flags == flags) {
1563
        /* FD_CLOEXEC flag already set/cleared: nothing to do */
1564
0
        return 0;
1565
0
    }
1566
1567
0
    res = fcntl(fd, F_SETFD, new_flags);
1568
0
    if (res < 0) {
1569
0
        if (raise)
1570
0
            PyErr_SetFromErrno(PyExc_OSError);
1571
0
        return -1;
1572
0
    }
1573
0
    return 0;
1574
0
#endif
1575
0
}
1576
1577
/* Make the file descriptor non-inheritable.
1578
   Return 0 on success, set errno and return -1 on error. */
1579
static int
1580
make_non_inheritable(int fd)
1581
0
{
1582
0
    return set_inheritable(fd, 0, 0, NULL);
1583
0
}
1584
1585
/* Set the inheritable flag of the specified file descriptor.
1586
   On success: return 0, on error: raise an exception and return -1.
1587
1588
   If atomic_flag_works is not NULL:
1589
1590
    * if *atomic_flag_works==-1, check if the inheritable is set on the file
1591
      descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1592
      set the inheritable flag
1593
    * if *atomic_flag_works==1: do nothing
1594
    * if *atomic_flag_works==0: set inheritable flag to False
1595
1596
   Set atomic_flag_works to NULL if no atomic flag was used to create the
1597
   file descriptor.
1598
1599
   atomic_flag_works can only be used to make a file descriptor
1600
   non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1601
int
1602
_Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1603
1.16k
{
1604
1.16k
    return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1605
1.16k
}
1606
1607
/* Same as _Py_set_inheritable() but on error, set errno and
1608
   don't raise an exception.
1609
   This function is async-signal-safe. */
1610
int
1611
_Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1612
0
{
1613
0
    return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1614
0
}
1615
1616
static int
1617
_Py_open_impl(const char *pathname, int flags, int gil_held)
1618
0
{
1619
0
    int fd;
1620
0
    int async_err = 0;
1621
0
#ifndef MS_WINDOWS
1622
0
    int *atomic_flag_works;
1623
0
#endif
1624
1625
#ifdef MS_WINDOWS
1626
    flags |= O_NOINHERIT;
1627
#elif defined(O_CLOEXEC)
1628
    atomic_flag_works = &_Py_open_cloexec_works;
1629
0
    flags |= O_CLOEXEC;
1630
#else
1631
    atomic_flag_works = NULL;
1632
#endif
1633
1634
0
    if (gil_held) {
1635
0
        PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1636
0
        if (pathname_obj == NULL) {
1637
0
            return -1;
1638
0
        }
1639
0
        if (PySys_Audit("open", "OOi", pathname_obj, Py_None, flags) < 0) {
1640
0
            Py_DECREF(pathname_obj);
1641
0
            return -1;
1642
0
        }
1643
1644
0
        do {
1645
0
            Py_BEGIN_ALLOW_THREADS
1646
0
            fd = open(pathname, flags);
1647
0
            Py_END_ALLOW_THREADS
1648
0
        } while (fd < 0
1649
0
                 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1650
0
        if (async_err) {
1651
0
            Py_DECREF(pathname_obj);
1652
0
            return -1;
1653
0
        }
1654
0
        if (fd < 0) {
1655
0
            PyErr_SetFromErrnoWithFilenameObjects(PyExc_OSError, pathname_obj, NULL);
1656
0
            Py_DECREF(pathname_obj);
1657
0
            return -1;
1658
0
        }
1659
0
        Py_DECREF(pathname_obj);
1660
0
    }
1661
0
    else {
1662
0
        fd = open(pathname, flags);
1663
0
        if (fd < 0)
1664
0
            return -1;
1665
0
    }
1666
1667
0
#ifndef MS_WINDOWS
1668
0
    if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
1669
0
        close(fd);
1670
0
        return -1;
1671
0
    }
1672
0
#endif
1673
1674
0
    return fd;
1675
0
}
1676
1677
/* Open a file with the specified flags (wrapper to open() function).
1678
   Return a file descriptor on success. Raise an exception and return -1 on
1679
   error.
1680
1681
   The file descriptor is created non-inheritable.
1682
1683
   When interrupted by a signal (open() fails with EINTR), retry the syscall,
1684
   except if the Python signal handler raises an exception.
1685
1686
   Release the GIL to call open(). The caller must hold the GIL. */
1687
int
1688
_Py_open(const char *pathname, int flags)
1689
0
{
1690
    /* _Py_open() must be called with the GIL held. */
1691
0
    _Py_AssertHoldsTstate();
1692
0
    return _Py_open_impl(pathname, flags, 1);
1693
0
}
1694
1695
/* Open a file with the specified flags (wrapper to open() function).
1696
   Return a file descriptor on success. Set errno and return -1 on error.
1697
1698
   The file descriptor is created non-inheritable.
1699
1700
   If interrupted by a signal, fail with EINTR. */
1701
int
1702
_Py_open_noraise(const char *pathname, int flags)
1703
0
{
1704
0
    return _Py_open_impl(pathname, flags, 0);
1705
0
}
1706
1707
/* Open a file. Use _wfopen() on Windows, encode the path to the locale
1708
   encoding and use fopen() otherwise.
1709
1710
   The file descriptor is created non-inheritable.
1711
1712
   If interrupted by a signal, fail with EINTR. */
1713
FILE *
1714
_Py_wfopen(const wchar_t *path, const wchar_t *mode)
1715
80
{
1716
80
    FILE *f;
1717
80
    if (PySys_Audit("open", "uui", path, mode, 0) < 0) {
1718
0
        return NULL;
1719
0
    }
1720
80
#ifndef MS_WINDOWS
1721
80
    char *cpath;
1722
80
    char cmode[10];
1723
80
    size_t r;
1724
80
    r = wcstombs(cmode, mode, 10);
1725
80
    if (r == DECODE_ERROR || r >= 10) {
1726
0
        errno = EINVAL;
1727
0
        return NULL;
1728
0
    }
1729
80
    cpath = _Py_EncodeLocaleRaw(path, NULL);
1730
80
    if (cpath == NULL) {
1731
0
        return NULL;
1732
0
    }
1733
80
    f = fopen(cpath, cmode);
1734
80
    PyMem_RawFree(cpath);
1735
#else
1736
    f = _wfopen(path, mode);
1737
#endif
1738
80
    if (f == NULL)
1739
80
        return NULL;
1740
0
    if (make_non_inheritable(fileno(f)) < 0) {
1741
0
        fclose(f);
1742
0
        return NULL;
1743
0
    }
1744
0
    return f;
1745
0
}
1746
1747
1748
/* Open a file.
1749
1750
   On Windows, if 'path' is a Unicode string, call _wfopen(). Otherwise, encode
1751
   the path to the filesystem encoding and call fopen().
1752
1753
   Return the new file object on success. Raise an exception and return NULL
1754
   on error.
1755
1756
   The file descriptor is created non-inheritable.
1757
1758
   When interrupted by a signal (open() fails with EINTR), retry the syscall,
1759
   except if the Python signal handler raises an exception.
1760
1761
   Release the GIL to call _wfopen() or fopen(). The caller must hold
1762
   the GIL. */
1763
FILE*
1764
Py_fopen(PyObject *path, const char *mode)
1765
11.8k
{
1766
11.8k
    _Py_AssertHoldsTstate();
1767
1768
11.8k
    if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1769
0
        return NULL;
1770
0
    }
1771
1772
11.8k
    FILE *f;
1773
11.8k
    int async_err = 0;
1774
11.8k
    int saved_errno;
1775
#ifdef MS_WINDOWS
1776
    PyObject *unicode;
1777
    if (!PyUnicode_FSDecoder(path, &unicode)) {
1778
        return NULL;
1779
    }
1780
1781
    wchar_t *wpath = PyUnicode_AsWideCharString(unicode, NULL);
1782
    Py_DECREF(unicode);
1783
    if (wpath == NULL) {
1784
        return NULL;
1785
    }
1786
1787
    wchar_t wmode[10];
1788
    int usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1789
                                    wmode, Py_ARRAY_LENGTH(wmode));
1790
    if (usize == 0) {
1791
        PyErr_SetFromWindowsErr(0);
1792
        PyMem_Free(wpath);
1793
        return NULL;
1794
    }
1795
1796
    do {
1797
        Py_BEGIN_ALLOW_THREADS
1798
        _Py_BEGIN_SUPPRESS_IPH
1799
        f = _wfopen(wpath, wmode);
1800
        _Py_END_SUPPRESS_IPH
1801
        Py_END_ALLOW_THREADS
1802
    } while (f == NULL
1803
             && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1804
    saved_errno = errno;
1805
    PyMem_Free(wpath);
1806
#else
1807
11.8k
    PyObject *bytes;
1808
11.8k
    if (!PyUnicode_FSConverter(path, &bytes)) {
1809
0
        return NULL;
1810
0
    }
1811
11.8k
    const char *path_bytes = PyBytes_AS_STRING(bytes);
1812
1813
11.8k
    do {
1814
11.8k
        Py_BEGIN_ALLOW_THREADS
1815
11.8k
        f = fopen(path_bytes, mode);
1816
11.8k
        Py_END_ALLOW_THREADS
1817
11.8k
    } while (f == NULL
1818
11.8k
             && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1819
11.8k
    saved_errno = errno;
1820
11.8k
    Py_DECREF(bytes);
1821
11.8k
#endif
1822
1823
11.8k
    if (async_err) {
1824
0
        return NULL;
1825
0
    }
1826
1827
11.8k
    if (f == NULL) {
1828
11.8k
        errno = saved_errno;
1829
11.8k
        PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
1830
11.8k
        return NULL;
1831
11.8k
    }
1832
1833
0
    if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
1834
0
        fclose(f);
1835
0
        return NULL;
1836
0
    }
1837
0
    return f;
1838
0
}
1839
1840
1841
// Call fclose().
1842
//
1843
// On Windows, files opened by Py_fopen() in the Python DLL must be closed by
1844
// the Python DLL to use the same C runtime version. Otherwise, calling
1845
// fclose() directly can cause undefined behavior.
1846
int
1847
Py_fclose(FILE *file)
1848
0
{
1849
0
    return fclose(file);
1850
0
}
1851
1852
1853
/* Read count bytes from fd into buf.
1854
1855
   On success, return the number of read bytes, it can be lower than count.
1856
   If the current file offset is at or past the end of file, no bytes are read,
1857
   and read() returns zero.
1858
1859
   On error, raise an exception, set errno and return -1.
1860
1861
   When interrupted by a signal (read() fails with EINTR), retry the syscall.
1862
   If the Python signal handler raises an exception, the function returns -1
1863
   (the syscall is not retried).
1864
1865
   Release the GIL to call read(). The caller must hold the GIL. */
1866
Py_ssize_t
1867
_Py_read(int fd, void *buf, size_t count)
1868
1.88k
{
1869
1.88k
    Py_ssize_t n;
1870
1.88k
    int err;
1871
1.88k
    int async_err = 0;
1872
1873
1.88k
    _Py_AssertHoldsTstate();
1874
1875
    /* _Py_read() must not be called with an exception set, otherwise the
1876
     * caller may think that read() was interrupted by a signal and the signal
1877
     * handler raised an exception. */
1878
1.88k
    assert(!PyErr_Occurred());
1879
1880
1.88k
    if (count > _PY_READ_MAX) {
1881
0
        count = _PY_READ_MAX;
1882
0
    }
1883
1884
1.88k
    _Py_BEGIN_SUPPRESS_IPH
1885
1.88k
    do {
1886
1.88k
        Py_BEGIN_ALLOW_THREADS
1887
1.88k
        errno = 0;
1888
#ifdef MS_WINDOWS
1889
        _doserrno = 0;
1890
        n = read(fd, buf, (int)count);
1891
        // read() on a non-blocking empty pipe fails with EINVAL, which is
1892
        // mapped from the Windows error code ERROR_NO_DATA.
1893
        if (n < 0 && errno == EINVAL) {
1894
            if (_doserrno == ERROR_NO_DATA) {
1895
                errno = EAGAIN;
1896
            }
1897
        }
1898
#else
1899
1.88k
        n = read(fd, buf, count);
1900
1.88k
#endif
1901
        /* save/restore errno because PyErr_CheckSignals()
1902
         * and PyErr_SetFromErrno() can modify it */
1903
1.88k
        err = errno;
1904
1.88k
        Py_END_ALLOW_THREADS
1905
1.88k
    } while (n < 0 && err == EINTR &&
1906
1.88k
            !(async_err = PyErr_CheckSignals()));
1907
1.88k
    _Py_END_SUPPRESS_IPH
1908
1909
1.88k
    if (async_err) {
1910
        /* read() was interrupted by a signal (failed with EINTR)
1911
         * and the Python signal handler raised an exception */
1912
0
        errno = err;
1913
0
        assert(errno == EINTR && PyErr_Occurred());
1914
0
        return -1;
1915
0
    }
1916
1.88k
    if (n < 0) {
1917
0
        PyErr_SetFromErrno(PyExc_OSError);
1918
0
        errno = err;
1919
0
        return -1;
1920
0
    }
1921
1922
1.88k
    return n;
1923
1.88k
}
1924
1925
static Py_ssize_t
1926
_Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
1927
216
{
1928
216
    Py_ssize_t n;
1929
216
    int err;
1930
216
    int async_err = 0;
1931
1932
216
    _Py_BEGIN_SUPPRESS_IPH
1933
#ifdef MS_WINDOWS
1934
    if (count > 32767) {
1935
        /* Issue #11395: the Windows console returns an error (12: not
1936
           enough space error) on writing into stdout if stdout mode is
1937
           binary and the length is greater than 66,000 bytes (or less,
1938
           depending on heap usage). */
1939
        if (gil_held) {
1940
            Py_BEGIN_ALLOW_THREADS
1941
            if (isatty(fd)) {
1942
                count = 32767;
1943
            }
1944
            Py_END_ALLOW_THREADS
1945
        } else {
1946
            if (isatty(fd)) {
1947
                count = 32767;
1948
            }
1949
        }
1950
    }
1951
1952
#endif
1953
216
    if (count > _PY_WRITE_MAX) {
1954
0
        count = _PY_WRITE_MAX;
1955
0
    }
1956
1957
216
    if (gil_held) {
1958
216
        do {
1959
216
            Py_BEGIN_ALLOW_THREADS
1960
216
            errno = 0;
1961
#ifdef MS_WINDOWS
1962
            // write() on a non-blocking pipe fails with ENOSPC on Windows if
1963
            // the pipe lacks available space for the entire buffer.
1964
            int c = (int)count;
1965
            do {
1966
                _doserrno = 0;
1967
                n = write(fd, buf, c);
1968
                if (n >= 0 || errno != ENOSPC || _doserrno != 0) {
1969
                    break;
1970
                }
1971
                errno = EAGAIN;
1972
                c /= 2;
1973
            } while (c > 0);
1974
#else
1975
216
            n = write(fd, buf, count);
1976
216
#endif
1977
            /* save/restore errno because PyErr_CheckSignals()
1978
             * and PyErr_SetFromErrno() can modify it */
1979
216
            err = errno;
1980
216
            Py_END_ALLOW_THREADS
1981
216
        } while (n < 0 && err == EINTR &&
1982
216
                !(async_err = PyErr_CheckSignals()));
1983
216
    }
1984
0
    else {
1985
0
        do {
1986
0
            errno = 0;
1987
#ifdef MS_WINDOWS
1988
            // write() on a non-blocking pipe fails with ENOSPC on Windows if
1989
            // the pipe lacks available space for the entire buffer.
1990
            int c = (int)count;
1991
            do {
1992
                _doserrno = 0;
1993
                n = write(fd, buf, c);
1994
                if (n >= 0 || errno != ENOSPC || _doserrno != 0) {
1995
                    break;
1996
                }
1997
                errno = EAGAIN;
1998
                c /= 2;
1999
            } while (c > 0);
2000
#else
2001
0
            n = write(fd, buf, count);
2002
0
#endif
2003
0
            err = errno;
2004
0
        } while (n < 0 && err == EINTR);
2005
0
    }
2006
216
    _Py_END_SUPPRESS_IPH
2007
2008
216
    if (async_err) {
2009
        /* write() was interrupted by a signal (failed with EINTR)
2010
           and the Python signal handler raised an exception (if gil_held is
2011
           nonzero). */
2012
0
        errno = err;
2013
0
        assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
2014
0
        return -1;
2015
0
    }
2016
216
    if (n < 0) {
2017
0
        if (gil_held)
2018
0
            PyErr_SetFromErrno(PyExc_OSError);
2019
0
        errno = err;
2020
0
        return -1;
2021
0
    }
2022
2023
216
    return n;
2024
216
}
2025
2026
/* Write count bytes of buf into fd.
2027
2028
   On success, return the number of written bytes, it can be lower than count
2029
   including 0. On error, raise an exception, set errno and return -1.
2030
2031
   When interrupted by a signal (write() fails with EINTR), retry the syscall.
2032
   If the Python signal handler raises an exception, the function returns -1
2033
   (the syscall is not retried).
2034
2035
   Release the GIL to call write(). The caller must hold the GIL. */
2036
Py_ssize_t
2037
_Py_write(int fd, const void *buf, size_t count)
2038
216
{
2039
216
    _Py_AssertHoldsTstate();
2040
2041
    /* _Py_write() must not be called with an exception set, otherwise the
2042
     * caller may think that write() was interrupted by a signal and the signal
2043
     * handler raised an exception. */
2044
216
    assert(!PyErr_Occurred());
2045
2046
216
    return _Py_write_impl(fd, buf, count, 1);
2047
216
}
2048
2049
/* Write count bytes of buf into fd.
2050
 *
2051
 * On success, return the number of written bytes, it can be lower than count
2052
 * including 0. On error, set errno and return -1.
2053
 *
2054
 * When interrupted by a signal (write() fails with EINTR), retry the syscall
2055
 * without calling the Python signal handler. */
2056
Py_ssize_t
2057
_Py_write_noraise(int fd, const void *buf, size_t count)
2058
0
{
2059
0
    return _Py_write_impl(fd, buf, count, 0);
2060
0
}
2061
2062
#ifdef HAVE_READLINK
2063
2064
/* Read value of symbolic link. Encode the path to the locale encoding, decode
2065
   the result from the locale encoding.
2066
2067
   Return -1 on encoding error, on readlink() error, if the internal buffer is
2068
   too short, on decoding error, or if 'buf' is too short. */
2069
int
2070
_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t buflen)
2071
32
{
2072
32
    char *cpath;
2073
32
    char cbuf[MAXPATHLEN];
2074
32
    size_t cbuf_len = Py_ARRAY_LENGTH(cbuf);
2075
32
    wchar_t *wbuf;
2076
32
    Py_ssize_t res;
2077
32
    size_t r1;
2078
2079
32
    cpath = _Py_EncodeLocaleRaw(path, NULL);
2080
32
    if (cpath == NULL) {
2081
0
        errno = EINVAL;
2082
0
        return -1;
2083
0
    }
2084
32
    res = readlink(cpath, cbuf, cbuf_len);
2085
32
    PyMem_RawFree(cpath);
2086
32
    if (res == -1) {
2087
16
        return -1;
2088
16
    }
2089
16
    if ((size_t)res == cbuf_len) {
2090
0
        errno = EINVAL;
2091
0
        return -1;
2092
0
    }
2093
16
    cbuf[res] = '\0'; /* buf will be null terminated */
2094
16
    wbuf = Py_DecodeLocale(cbuf, &r1);
2095
16
    if (wbuf == NULL) {
2096
0
        errno = EINVAL;
2097
0
        return -1;
2098
0
    }
2099
    /* wbuf must have space to store the trailing NUL character */
2100
16
    if (buflen <= r1) {
2101
0
        PyMem_RawFree(wbuf);
2102
0
        errno = EINVAL;
2103
0
        return -1;
2104
0
    }
2105
16
    wcsncpy(buf, wbuf, buflen);
2106
16
    PyMem_RawFree(wbuf);
2107
16
    return (int)r1;
2108
16
}
2109
#endif
2110
2111
#ifdef HAVE_REALPATH
2112
2113
/* Return the canonicalized absolute pathname. Encode path to the locale
2114
   encoding, decode the result from the locale encoding.
2115
2116
   Return NULL on encoding error, realpath() error, decoding error
2117
   or if 'resolved_path' is too short. */
2118
wchar_t*
2119
_Py_wrealpath(const wchar_t *path,
2120
              wchar_t *resolved_path, size_t resolved_path_len)
2121
0
{
2122
0
    char *cpath;
2123
0
    char cresolved_path[MAXPATHLEN];
2124
0
    wchar_t *wresolved_path;
2125
0
    char *res;
2126
0
    size_t r;
2127
0
    cpath = _Py_EncodeLocaleRaw(path, NULL);
2128
0
    if (cpath == NULL) {
2129
0
        errno = EINVAL;
2130
0
        return NULL;
2131
0
    }
2132
0
    res = realpath(cpath, cresolved_path);
2133
0
    PyMem_RawFree(cpath);
2134
0
    if (res == NULL)
2135
0
        return NULL;
2136
2137
0
    wresolved_path = Py_DecodeLocale(cresolved_path, &r);
2138
0
    if (wresolved_path == NULL) {
2139
0
        errno = EINVAL;
2140
0
        return NULL;
2141
0
    }
2142
    /* wresolved_path must have space to store the trailing NUL character */
2143
0
    if (resolved_path_len <= r) {
2144
0
        PyMem_RawFree(wresolved_path);
2145
0
        errno = EINVAL;
2146
0
        return NULL;
2147
0
    }
2148
0
    wcsncpy(resolved_path, wresolved_path, resolved_path_len);
2149
0
    PyMem_RawFree(wresolved_path);
2150
0
    return resolved_path;
2151
0
}
2152
#endif
2153
2154
2155
int
2156
_Py_isabs(const wchar_t *path)
2157
304
{
2158
#ifdef MS_WINDOWS
2159
    const wchar_t *tail;
2160
    HRESULT hr = PathCchSkipRoot(path, &tail);
2161
    if (FAILED(hr) || path == tail) {
2162
        return 0;
2163
    }
2164
    if (tail == &path[1] && (path[0] == SEP || path[0] == ALTSEP)) {
2165
        // Exclude paths with leading SEP
2166
        return 0;
2167
    }
2168
    if (tail == &path[2] && path[1] == L':') {
2169
        // Exclude drive-relative paths (e.g. C:filename.ext)
2170
        return 0;
2171
    }
2172
    return 1;
2173
#else
2174
304
    return (path[0] == SEP);
2175
304
#endif
2176
304
}
2177
2178
2179
/* Get an absolute path.
2180
   On error (ex: fail to get the current directory), return -1.
2181
   On memory allocation failure, set *abspath_p to NULL and return 0.
2182
   On success, return a newly allocated to *abspath_p to and return 0.
2183
   The string must be freed by PyMem_RawFree(). */
2184
int
2185
_Py_abspath(const wchar_t *path, wchar_t **abspath_p)
2186
0
{
2187
0
    if (path[0] == '\0' || !wcscmp(path, L".")) {
2188
0
        wchar_t cwd[MAXPATHLEN + 1];
2189
0
        cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
2190
0
        if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
2191
            /* unable to get the current directory */
2192
0
            return -1;
2193
0
        }
2194
0
        *abspath_p = _PyMem_RawWcsdup(cwd);
2195
0
        return 0;
2196
0
    }
2197
2198
0
    if (_Py_isabs(path)) {
2199
0
        *abspath_p = _PyMem_RawWcsdup(path);
2200
0
        return 0;
2201
0
    }
2202
2203
#ifdef MS_WINDOWS
2204
    return _PyOS_getfullpathname(path, abspath_p);
2205
#else
2206
0
    wchar_t cwd[MAXPATHLEN + 1];
2207
0
    cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
2208
0
    if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
2209
        /* unable to get the current directory */
2210
0
        return -1;
2211
0
    }
2212
2213
0
    size_t cwd_len = wcslen(cwd);
2214
0
    size_t path_len = wcslen(path);
2215
0
    size_t len = cwd_len + 1 + path_len + 1;
2216
0
    if (len <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
2217
0
        *abspath_p = PyMem_RawMalloc(len * sizeof(wchar_t));
2218
0
    }
2219
0
    else {
2220
0
        *abspath_p = NULL;
2221
0
    }
2222
0
    if (*abspath_p == NULL) {
2223
0
        return 0;
2224
0
    }
2225
2226
0
    wchar_t *abspath = *abspath_p;
2227
0
    memcpy(abspath, cwd, cwd_len * sizeof(wchar_t));
2228
0
    abspath += cwd_len;
2229
2230
0
    *abspath = (wchar_t)SEP;
2231
0
    abspath++;
2232
2233
0
    memcpy(abspath, path, path_len * sizeof(wchar_t));
2234
0
    abspath += path_len;
2235
2236
0
    *abspath = 0;
2237
0
    return 0;
2238
0
#endif
2239
0
}
2240
2241
// The Windows Games API family implements the PathCch* APIs in the Xbox OS,
2242
// but does not expose them yet. Load them dynamically until
2243
// 1) they are officially exposed
2244
// 2) we stop supporting older versions of the GDK which do not expose them
2245
#if defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP)
2246
HRESULT
2247
PathCchSkipRoot(const wchar_t *path, const wchar_t **rootEnd)
2248
{
2249
    static int initialized = 0;
2250
    typedef HRESULT(__stdcall *PPathCchSkipRoot) (PCWSTR pszPath,
2251
                                                  PCWSTR *ppszRootEnd);
2252
    static PPathCchSkipRoot _PathCchSkipRoot;
2253
2254
    if (initialized == 0) {
2255
        HMODULE pathapi = LoadLibraryExW(L"api-ms-win-core-path-l1-1-0.dll", NULL,
2256
                                         LOAD_LIBRARY_SEARCH_SYSTEM32);
2257
        if (pathapi) {
2258
            _PathCchSkipRoot = (PPathCchSkipRoot)GetProcAddress(
2259
                pathapi, "PathCchSkipRoot");
2260
        }
2261
        else {
2262
            _PathCchSkipRoot = NULL;
2263
        }
2264
        initialized = 1;
2265
    }
2266
2267
    if (!_PathCchSkipRoot) {
2268
        return E_NOINTERFACE;
2269
    }
2270
2271
    return _PathCchSkipRoot(path, rootEnd);
2272
}
2273
2274
static HRESULT
2275
PathCchCombineEx(wchar_t *buffer, size_t bufsize, const wchar_t *dirname,
2276
                 const wchar_t *relfile, unsigned long flags)
2277
{
2278
    static int initialized = 0;
2279
    typedef HRESULT(__stdcall *PPathCchCombineEx) (PWSTR pszPathOut,
2280
                                                   size_t cchPathOut,
2281
                                                   PCWSTR pszPathIn,
2282
                                                   PCWSTR pszMore,
2283
                                                   unsigned long dwFlags);
2284
    static PPathCchCombineEx _PathCchCombineEx;
2285
2286
    if (initialized == 0) {
2287
        HMODULE pathapi = LoadLibraryExW(L"api-ms-win-core-path-l1-1-0.dll", NULL,
2288
                                         LOAD_LIBRARY_SEARCH_SYSTEM32);
2289
        if (pathapi) {
2290
            _PathCchCombineEx = (PPathCchCombineEx)GetProcAddress(
2291
                pathapi, "PathCchCombineEx");
2292
        }
2293
        else {
2294
            _PathCchCombineEx = NULL;
2295
        }
2296
        initialized = 1;
2297
    }
2298
2299
    if (!_PathCchCombineEx) {
2300
        return E_NOINTERFACE;
2301
    }
2302
2303
    return _PathCchCombineEx(buffer, bufsize, dirname, relfile, flags);
2304
}
2305
2306
#endif /* defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) */
2307
2308
void
2309
_Py_skiproot(const wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize,
2310
             Py_ssize_t *rootsize)
2311
224
{
2312
224
    assert(drvsize);
2313
224
    assert(rootsize);
2314
224
#ifndef MS_WINDOWS
2315
672
#define IS_SEP(x) (*(x) == SEP)
2316
224
    *drvsize = 0;
2317
224
    if (!IS_SEP(&path[0])) {
2318
        // Relative path, e.g.: 'foo'
2319
0
        *rootsize = 0;
2320
0
    }
2321
224
    else if (!IS_SEP(&path[1]) || IS_SEP(&path[2])) {
2322
        // Absolute path, e.g.: '/foo', '///foo', '////foo', etc.
2323
224
        *rootsize = 1;
2324
224
    }
2325
0
    else {
2326
        // Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see
2327
        // https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13
2328
0
        *rootsize = 2;
2329
0
    }
2330
224
#undef IS_SEP
2331
#else
2332
    const wchar_t *pEnd = size >= 0 ? &path[size] : NULL;
2333
#define IS_END(x) (pEnd ? (x) == pEnd : !*(x))
2334
#define IS_SEP(x) (*(x) == SEP || *(x) == ALTSEP)
2335
#define SEP_OR_END(x) (IS_SEP(x) || IS_END(x))
2336
    if (IS_SEP(&path[0])) {
2337
        if (IS_SEP(&path[1])) {
2338
            // Device drives, e.g. \\.\device or \\?\device
2339
            // UNC drives, e.g. \\server\share or \\?\UNC\server\share
2340
            Py_ssize_t idx;
2341
            if (path[2] == L'?' && IS_SEP(&path[3]) &&
2342
                (path[4] == L'U' || path[4] == L'u') &&
2343
                (path[5] == L'N' || path[5] == L'n') &&
2344
                (path[6] == L'C' || path[6] == L'c') &&
2345
                IS_SEP(&path[7]))
2346
            {
2347
                idx = 8;
2348
            }
2349
            else {
2350
                idx = 2;
2351
            }
2352
            while (!SEP_OR_END(&path[idx])) {
2353
                idx++;
2354
            }
2355
            if (IS_END(&path[idx])) {
2356
                *drvsize = idx;
2357
                *rootsize = 0;
2358
            }
2359
            else {
2360
                idx++;
2361
                while (!SEP_OR_END(&path[idx])) {
2362
                    idx++;
2363
                }
2364
                *drvsize = idx;
2365
                if (IS_END(&path[idx])) {
2366
                    *rootsize = 0;
2367
                }
2368
                else {
2369
                    *rootsize = 1;
2370
                }
2371
            }
2372
        }
2373
        else {
2374
            // Relative path with root, e.g. \Windows
2375
            *drvsize = 0;
2376
            *rootsize = 1;
2377
        }
2378
    }
2379
    else if (!IS_END(&path[0]) && path[1] == L':') {
2380
        *drvsize = 2;
2381
        if (IS_SEP(&path[2])) {
2382
            // Absolute drive-letter path, e.g. X:\Windows
2383
            *rootsize = 1;
2384
        }
2385
        else {
2386
            // Relative path with drive, e.g. X:Windows
2387
            *rootsize = 0;
2388
        }
2389
    }
2390
    else {
2391
        // Relative path, e.g. Windows
2392
        *drvsize = 0;
2393
        *rootsize = 0;
2394
    }
2395
#undef SEP_OR_END
2396
#undef IS_SEP
2397
#undef IS_END
2398
#endif
2399
224
}
2400
2401
// The caller must ensure "buffer" is big enough.
2402
static int
2403
join_relfile(wchar_t *buffer, size_t bufsize,
2404
             const wchar_t *dirname, const wchar_t *relfile)
2405
144
{
2406
#ifdef MS_WINDOWS
2407
    if (FAILED(PathCchCombineEx(buffer, bufsize, dirname, relfile,
2408
        PATHCCH_ALLOW_LONG_PATHS))) {
2409
        return -1;
2410
    }
2411
#else
2412
144
    assert(!_Py_isabs(relfile));
2413
144
    size_t dirlen = wcslen(dirname);
2414
144
    size_t rellen = wcslen(relfile);
2415
144
    size_t maxlen = bufsize - 1;
2416
144
    if (maxlen > MAXPATHLEN || dirlen >= maxlen || rellen >= maxlen - dirlen) {
2417
0
        return -1;
2418
0
    }
2419
144
    if (dirlen == 0) {
2420
        // We do not add a leading separator.
2421
0
        wcscpy(buffer, relfile);
2422
0
    }
2423
144
    else {
2424
144
        if (dirname != buffer) {
2425
0
            wcscpy(buffer, dirname);
2426
0
        }
2427
144
        size_t relstart = dirlen;
2428
144
        if (dirlen > 1 && dirname[dirlen - 1] != SEP) {
2429
144
            buffer[dirlen] = SEP;
2430
144
            relstart += 1;
2431
144
        }
2432
144
        wcscpy(&buffer[relstart], relfile);
2433
144
    }
2434
144
#endif
2435
144
    return 0;
2436
144
}
2437
2438
/* Join the two paths together, like os.path.join().  Return NULL
2439
   if memory could not be allocated.  The caller is responsible
2440
   for calling PyMem_RawFree() on the result. */
2441
wchar_t *
2442
_Py_join_relfile(const wchar_t *dirname, const wchar_t *relfile)
2443
0
{
2444
0
    assert(dirname != NULL && relfile != NULL);
2445
0
#ifndef MS_WINDOWS
2446
0
    assert(!_Py_isabs(relfile));
2447
0
#endif
2448
0
    size_t maxlen = wcslen(dirname) + 1 + wcslen(relfile);
2449
0
    size_t bufsize = maxlen + 1;
2450
0
    wchar_t *filename = PyMem_RawMalloc(bufsize * sizeof(wchar_t));
2451
0
    if (filename == NULL) {
2452
0
        return NULL;
2453
0
    }
2454
0
    assert(wcslen(dirname) < MAXPATHLEN);
2455
0
    assert(wcslen(relfile) < MAXPATHLEN - wcslen(dirname));
2456
0
    if (join_relfile(filename, bufsize, dirname, relfile) < 0) {
2457
0
        PyMem_RawFree(filename);
2458
0
        return NULL;
2459
0
    }
2460
0
    return filename;
2461
0
}
2462
2463
/* Join the two paths together, like os.path.join().
2464
     dirname: the target buffer with the dirname already in place,
2465
              including trailing NUL
2466
     relfile: this must be a relative path
2467
     bufsize: total allocated size of the buffer
2468
   Return -1 if anything is wrong with the path lengths. */
2469
int
2470
_Py_add_relfile(wchar_t *dirname, const wchar_t *relfile, size_t bufsize)
2471
144
{
2472
144
    assert(dirname != NULL && relfile != NULL);
2473
144
    assert(bufsize > 0);
2474
144
    return join_relfile(dirname, bufsize, dirname, relfile);
2475
144
}
2476
2477
2478
size_t
2479
_Py_find_basename(const wchar_t *filename)
2480
0
{
2481
0
    for (size_t i = wcslen(filename); i > 0; --i) {
2482
0
        if (filename[i] == SEP) {
2483
0
            return i + 1;
2484
0
        }
2485
0
    }
2486
0
    return 0;
2487
0
}
2488
2489
/* In-place path normalisation. Returns the start of the normalized
2490
   path, which will be within the original buffer. Guaranteed to not
2491
   make the path longer, and will not fail. 'size' is the length of
2492
   the path, if known. If -1, the first null character will be assumed
2493
   to be the end of the path. 'normsize' will be set to contain the
2494
   length of the resulting normalized path. */
2495
wchar_t *
2496
_Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t *normsize)
2497
224
{
2498
224
    assert(path != NULL);
2499
224
    if ((size < 0 && !path[0]) || size == 0) {
2500
0
        *normsize = 0;
2501
0
        return path;
2502
0
    }
2503
224
    wchar_t *pEnd = size >= 0 ? &path[size] : NULL;
2504
224
    wchar_t *p1 = path;     // sequentially scanned address in the path
2505
224
    wchar_t *p2 = path;     // destination of a scanned character to be ljusted
2506
224
    wchar_t *minP2 = path;  // the beginning of the destination range
2507
224
    wchar_t lastC = L'\0';  // the last ljusted character, p2[-1] in most cases
2508
2509
11.4k
#define IS_END(x) (pEnd ? (x) == pEnd : !*(x))
2510
#ifdef ALTSEP
2511
#define IS_SEP(x) (*(x) == SEP || *(x) == ALTSEP)
2512
#else
2513
224
#define IS_SEP(x) (*(x) == SEP)
2514
224
#endif
2515
224
#define SEP_OR_END(x) (IS_SEP(x) || IS_END(x))
2516
2517
224
    Py_ssize_t drvsize, rootsize;
2518
224
    _Py_skiproot(path, size, &drvsize, &rootsize);
2519
224
    if (drvsize || rootsize) {
2520
        // Skip past root and update minP2
2521
224
        p1 = &path[drvsize + rootsize];
2522
224
#ifndef ALTSEP
2523
224
        p2 = p1;
2524
#else
2525
        for (; p2 < p1; ++p2) {
2526
            if (*p2 == ALTSEP) {
2527
                *p2 = SEP;
2528
            }
2529
        }
2530
#endif
2531
224
        minP2 = p2 - 1;
2532
224
        lastC = *minP2;
2533
#ifdef MS_WINDOWS
2534
        if (lastC != SEP) {
2535
            minP2++;
2536
        }
2537
#endif
2538
224
    }
2539
224
    if (p1[0] == L'.' && SEP_OR_END(&p1[1])) {
2540
        // Skip leading '.\'
2541
0
        lastC = *++p1;
2542
#ifdef ALTSEP
2543
        if (lastC == ALTSEP) {
2544
            lastC = SEP;
2545
        }
2546
#endif
2547
0
        while (IS_SEP(p1)) {
2548
0
            p1++;
2549
0
        }
2550
0
    }
2551
2552
    /* if pEnd is specified, check that. Else, check for null terminator */
2553
11.4k
    for (; !IS_END(p1); ++p1) {
2554
11.2k
        wchar_t c = *p1;
2555
#ifdef ALTSEP
2556
        if (c == ALTSEP) {
2557
            c = SEP;
2558
        }
2559
#endif
2560
11.2k
        if (lastC == SEP) {
2561
1.16k
            if (c == L'.') {
2562
0
                int sep_at_1 = SEP_OR_END(&p1[1]);
2563
0
                int sep_at_2 = !sep_at_1 && SEP_OR_END(&p1[2]);
2564
0
                if (sep_at_2 && p1[1] == L'.') {
2565
0
                    wchar_t *p3 = p2;
2566
0
                    while (p3 != minP2 && *--p3 == SEP) { }
2567
0
                    while (p3 != minP2 && *(p3 - 1) != SEP) { --p3; }
2568
0
                    if (p2 == minP2
2569
0
                        || (p3[0] == L'.' && p3[1] == L'.' && IS_SEP(&p3[2])))
2570
0
                    {
2571
                        // Previous segment is also ../, so append instead.
2572
                        // Relative path does not absorb ../ at minP2 as well.
2573
0
                        *p2++ = L'.';
2574
0
                        *p2++ = L'.';
2575
0
                        lastC = L'.';
2576
0
                    } else if (p3[0] == SEP) {
2577
                        // Absolute path, so absorb segment
2578
0
                        p2 = p3 + 1;
2579
0
                    } else {
2580
0
                        p2 = p3;
2581
0
                    }
2582
0
                    p1 += 1;
2583
0
                } else if (sep_at_1) {
2584
0
                } else {
2585
0
                    *p2++ = lastC = c;
2586
0
                }
2587
1.16k
            } else if (c == SEP) {
2588
1.16k
            } else {
2589
1.16k
                *p2++ = lastC = c;
2590
1.16k
            }
2591
10.0k
        } else {
2592
10.0k
            *p2++ = lastC = c;
2593
10.0k
        }
2594
11.2k
    }
2595
224
    *p2 = L'\0';
2596
224
    if (p2 != minP2) {
2597
224
        while (--p2 != minP2 && *p2 == SEP) {
2598
0
            *p2 = L'\0';
2599
0
        }
2600
224
    } else {
2601
0
        --p2;
2602
0
    }
2603
224
    *normsize = p2 - path + 1;
2604
224
#undef SEP_OR_END
2605
224
#undef IS_SEP
2606
224
#undef IS_END
2607
224
    return path;
2608
224
}
2609
2610
/* In-place path normalisation. Returns the start of the normalized
2611
   path, which will be within the original buffer. Guaranteed to not
2612
   make the path longer, and will not fail. 'size' is the length of
2613
   the path, if known. If -1, the first null character will be assumed
2614
   to be the end of the path. */
2615
wchar_t *
2616
_Py_normpath(wchar_t *path, Py_ssize_t size)
2617
144
{
2618
144
    Py_ssize_t norm_length;
2619
144
    return _Py_normpath_and_size(path, size, &norm_length);
2620
144
}
2621
2622
2623
/* Get the current directory. buflen is the buffer size in wide characters
2624
   including the null character. Decode the path from the locale encoding.
2625
2626
   Return NULL on getcwd() error, on decoding error, or if 'buf' is
2627
   too short. */
2628
wchar_t*
2629
_Py_wgetcwd(wchar_t *buf, size_t buflen)
2630
0
{
2631
#ifdef MS_WINDOWS
2632
    int ibuflen = (int)Py_MIN(buflen, INT_MAX);
2633
    return _wgetcwd(buf, ibuflen);
2634
#else
2635
0
    char fname[MAXPATHLEN];
2636
0
    wchar_t *wname;
2637
0
    size_t len;
2638
2639
0
    if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
2640
0
        return NULL;
2641
0
    wname = Py_DecodeLocale(fname, &len);
2642
0
    if (wname == NULL)
2643
0
        return NULL;
2644
    /* wname must have space to store the trailing NUL character */
2645
0
    if (buflen <= len) {
2646
0
        PyMem_RawFree(wname);
2647
0
        return NULL;
2648
0
    }
2649
0
    wcsncpy(buf, wname, buflen);
2650
0
    PyMem_RawFree(wname);
2651
0
    return buf;
2652
0
#endif
2653
0
}
2654
2655
/* Duplicate a file descriptor. The new file descriptor is created as
2656
   non-inheritable. Return a new file descriptor on success, raise an OSError
2657
   exception and return -1 on error.
2658
2659
   The GIL is released to call dup(). The caller must hold the GIL. */
2660
int
2661
_Py_dup(int fd)
2662
0
{
2663
#ifdef MS_WINDOWS
2664
    HANDLE handle;
2665
#endif
2666
2667
0
    _Py_AssertHoldsTstate();
2668
2669
#ifdef MS_WINDOWS
2670
    handle = _Py_get_osfhandle(fd);
2671
    if (handle == INVALID_HANDLE_VALUE)
2672
        return -1;
2673
2674
    Py_BEGIN_ALLOW_THREADS
2675
    _Py_BEGIN_SUPPRESS_IPH
2676
    fd = dup(fd);
2677
    _Py_END_SUPPRESS_IPH
2678
    Py_END_ALLOW_THREADS
2679
    if (fd < 0) {
2680
        PyErr_SetFromErrno(PyExc_OSError);
2681
        return -1;
2682
    }
2683
2684
    if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2685
        _Py_BEGIN_SUPPRESS_IPH
2686
        close(fd);
2687
        _Py_END_SUPPRESS_IPH
2688
        return -1;
2689
    }
2690
#elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
2691
0
    Py_BEGIN_ALLOW_THREADS
2692
0
    _Py_BEGIN_SUPPRESS_IPH
2693
0
    fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
2694
0
    _Py_END_SUPPRESS_IPH
2695
0
    Py_END_ALLOW_THREADS
2696
0
    if (fd < 0) {
2697
0
        PyErr_SetFromErrno(PyExc_OSError);
2698
0
        return -1;
2699
0
    }
2700
2701
#elif HAVE_DUP
2702
    Py_BEGIN_ALLOW_THREADS
2703
    _Py_BEGIN_SUPPRESS_IPH
2704
    fd = dup(fd);
2705
    _Py_END_SUPPRESS_IPH
2706
    Py_END_ALLOW_THREADS
2707
    if (fd < 0) {
2708
        PyErr_SetFromErrno(PyExc_OSError);
2709
        return -1;
2710
    }
2711
2712
    if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2713
        _Py_BEGIN_SUPPRESS_IPH
2714
        close(fd);
2715
        _Py_END_SUPPRESS_IPH
2716
        return -1;
2717
    }
2718
#else
2719
    errno = ENOTSUP;
2720
    PyErr_SetFromErrno(PyExc_OSError);
2721
    return -1;
2722
#endif
2723
0
    return fd;
2724
0
}
2725
2726
#ifndef MS_WINDOWS
2727
/* Get the blocking mode of the file descriptor.
2728
   Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
2729
   raise an exception and return -1 on error. */
2730
int
2731
_Py_get_blocking(int fd)
2732
0
{
2733
0
    int flags;
2734
0
    _Py_BEGIN_SUPPRESS_IPH
2735
0
    flags = fcntl(fd, F_GETFL, 0);
2736
0
    _Py_END_SUPPRESS_IPH
2737
0
    if (flags < 0) {
2738
0
        PyErr_SetFromErrno(PyExc_OSError);
2739
0
        return -1;
2740
0
    }
2741
2742
0
    return !(flags & O_NONBLOCK);
2743
0
}
2744
2745
/* Set the blocking mode of the specified file descriptor.
2746
2747
   Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
2748
   otherwise.
2749
2750
   Return 0 on success, raise an exception and return -1 on error. */
2751
int
2752
_Py_set_blocking(int fd, int blocking)
2753
0
{
2754
/* bpo-41462: On VxWorks, ioctl(FIONBIO) only works on sockets.
2755
   Use fcntl() instead. */
2756
0
#if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO) && !defined(__VXWORKS__)
2757
0
    int arg = !blocking;
2758
0
    if (ioctl(fd, FIONBIO, &arg) < 0)
2759
0
        goto error;
2760
#else
2761
    int flags, res;
2762
2763
    _Py_BEGIN_SUPPRESS_IPH
2764
    flags = fcntl(fd, F_GETFL, 0);
2765
    if (flags >= 0) {
2766
        if (blocking)
2767
            flags = flags & (~O_NONBLOCK);
2768
        else
2769
            flags = flags | O_NONBLOCK;
2770
2771
        res = fcntl(fd, F_SETFL, flags);
2772
    } else {
2773
        res = -1;
2774
    }
2775
    _Py_END_SUPPRESS_IPH
2776
2777
    if (res < 0)
2778
        goto error;
2779
#endif
2780
0
    return 0;
2781
2782
0
error:
2783
0
    PyErr_SetFromErrno(PyExc_OSError);
2784
0
    return -1;
2785
0
}
2786
#else   /* MS_WINDOWS */
2787
2788
// The Windows Games API family doesn't expose GetNamedPipeHandleStateW so attempt
2789
// to load it directly from the Kernel32.dll
2790
#if !defined(MS_WINDOWS_APP) && !defined(MS_WINDOWS_SYSTEM)
2791
BOOL
2792
GetNamedPipeHandleStateW(HANDLE hNamedPipe, LPDWORD lpState, LPDWORD lpCurInstances, LPDWORD lpMaxCollectionCount,
2793
                         LPDWORD lpCollectDataTimeout, LPWSTR lpUserName, DWORD nMaxUserNameSize)
2794
{
2795
    static int initialized = 0;
2796
    typedef BOOL(__stdcall* PGetNamedPipeHandleStateW) (
2797
        HANDLE hNamedPipe, LPDWORD lpState, LPDWORD lpCurInstances, LPDWORD lpMaxCollectionCount,
2798
        LPDWORD lpCollectDataTimeout, LPWSTR lpUserName, DWORD nMaxUserNameSize);
2799
    static PGetNamedPipeHandleStateW _GetNamedPipeHandleStateW;
2800
2801
    if (initialized == 0) {
2802
        HMODULE api = LoadLibraryExW(L"Kernel32.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32);
2803
        if (api) {
2804
            _GetNamedPipeHandleStateW = (PGetNamedPipeHandleStateW)GetProcAddress(
2805
                api, "GetNamedPipeHandleStateW");
2806
        }
2807
        else {
2808
            _GetNamedPipeHandleStateW = NULL;
2809
        }
2810
        initialized = 1;
2811
    }
2812
2813
    if (!_GetNamedPipeHandleStateW) {
2814
        SetLastError(E_NOINTERFACE);
2815
        return FALSE;
2816
    }
2817
2818
    return _GetNamedPipeHandleStateW(
2819
        hNamedPipe, lpState, lpCurInstances, lpMaxCollectionCount, lpCollectDataTimeout, lpUserName, nMaxUserNameSize
2820
    );
2821
}
2822
#endif /* !MS_WINDOWS_APP && !MS_WINDOWS_SYSTEM */
2823
2824
int
2825
_Py_get_blocking(int fd)
2826
{
2827
    HANDLE handle;
2828
    DWORD mode;
2829
    BOOL success;
2830
2831
    handle = _Py_get_osfhandle(fd);
2832
    if (handle == INVALID_HANDLE_VALUE) {
2833
        return -1;
2834
    }
2835
2836
    Py_BEGIN_ALLOW_THREADS
2837
    success = GetNamedPipeHandleStateW(handle, &mode,
2838
                                       NULL, NULL, NULL, NULL, 0);
2839
    Py_END_ALLOW_THREADS
2840
2841
    if (!success) {
2842
        PyErr_SetFromWindowsErr(0);
2843
        return -1;
2844
    }
2845
2846
    return !(mode & PIPE_NOWAIT);
2847
}
2848
2849
int
2850
_Py_set_blocking(int fd, int blocking)
2851
{
2852
    HANDLE handle;
2853
    DWORD mode;
2854
    BOOL success;
2855
2856
    handle = _Py_get_osfhandle(fd);
2857
    if (handle == INVALID_HANDLE_VALUE) {
2858
        return -1;
2859
    }
2860
2861
    Py_BEGIN_ALLOW_THREADS
2862
    success = GetNamedPipeHandleStateW(handle, &mode,
2863
                                       NULL, NULL, NULL, NULL, 0);
2864
    if (success) {
2865
        if (blocking) {
2866
            mode &= ~PIPE_NOWAIT;
2867
        }
2868
        else {
2869
            mode |= PIPE_NOWAIT;
2870
        }
2871
        success = SetNamedPipeHandleState(handle, &mode, NULL, NULL);
2872
    }
2873
    Py_END_ALLOW_THREADS
2874
2875
    if (!success) {
2876
        PyErr_SetFromWindowsErr(0);
2877
        return -1;
2878
    }
2879
    return 0;
2880
}
2881
2882
void*
2883
_Py_get_osfhandle_noraise(int fd)
2884
{
2885
    void *handle;
2886
    _Py_BEGIN_SUPPRESS_IPH
2887
    handle = (void*)_get_osfhandle(fd);
2888
    _Py_END_SUPPRESS_IPH
2889
    return handle;
2890
}
2891
2892
void*
2893
_Py_get_osfhandle(int fd)
2894
{
2895
    void *handle = _Py_get_osfhandle_noraise(fd);
2896
    if (handle == INVALID_HANDLE_VALUE)
2897
        PyErr_SetFromErrno(PyExc_OSError);
2898
2899
    return handle;
2900
}
2901
2902
int
2903
_Py_open_osfhandle_noraise(void *handle, int flags)
2904
{
2905
    int fd;
2906
    _Py_BEGIN_SUPPRESS_IPH
2907
    fd = _open_osfhandle((intptr_t)handle, flags);
2908
    _Py_END_SUPPRESS_IPH
2909
    return fd;
2910
}
2911
2912
int
2913
_Py_open_osfhandle(void *handle, int flags)
2914
{
2915
    int fd = _Py_open_osfhandle_noraise(handle, flags);
2916
    if (fd == -1)
2917
        PyErr_SetFromErrno(PyExc_OSError);
2918
2919
    return fd;
2920
}
2921
#endif  /* MS_WINDOWS */
2922
2923
int
2924
_Py_GetLocaleconvNumeric(struct lconv *lc,
2925
                         PyObject **decimal_point, PyObject **thousands_sep)
2926
0
{
2927
0
    assert(decimal_point != NULL);
2928
0
    assert(thousands_sep != NULL);
2929
2930
0
#ifndef MS_WINDOWS
2931
0
    int change_locale = 0;
2932
0
    if ((strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) {
2933
0
        change_locale = 1;
2934
0
    }
2935
0
    if ((strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) {
2936
0
        change_locale = 1;
2937
0
    }
2938
2939
    /* Keep a copy of the LC_CTYPE locale */
2940
0
    char *oldloc = NULL, *loc = NULL;
2941
0
    if (change_locale) {
2942
0
        oldloc = setlocale(LC_CTYPE, NULL);
2943
0
        if (!oldloc) {
2944
0
            PyErr_SetString(PyExc_RuntimeWarning,
2945
0
                            "failed to get LC_CTYPE locale");
2946
0
            return -1;
2947
0
        }
2948
2949
0
        oldloc = _PyMem_Strdup(oldloc);
2950
0
        if (!oldloc) {
2951
0
            PyErr_NoMemory();
2952
0
            return -1;
2953
0
        }
2954
2955
0
        loc = setlocale(LC_NUMERIC, NULL);
2956
0
        if (loc != NULL && strcmp(loc, oldloc) == 0) {
2957
0
            loc = NULL;
2958
0
        }
2959
2960
0
        if (loc != NULL) {
2961
            /* Only set the locale temporarily the LC_CTYPE locale
2962
               if LC_NUMERIC locale is different than LC_CTYPE locale and
2963
               decimal_point and/or thousands_sep are non-ASCII or longer than
2964
               1 byte */
2965
0
            setlocale(LC_CTYPE, loc);
2966
0
        }
2967
0
    }
2968
2969
0
#define GET_LOCALE_STRING(ATTR) PyUnicode_DecodeLocale(lc->ATTR, NULL)
2970
#else /* MS_WINDOWS */
2971
/* Use _W_* fields of Windows strcut lconv */
2972
#define GET_LOCALE_STRING(ATTR) PyUnicode_FromWideChar(lc->_W_ ## ATTR, -1)
2973
#endif /* MS_WINDOWS */
2974
2975
0
    int res = -1;
2976
2977
0
    *decimal_point = GET_LOCALE_STRING(decimal_point);
2978
0
    if (*decimal_point == NULL) {
2979
0
        goto done;
2980
0
    }
2981
2982
0
    *thousands_sep = GET_LOCALE_STRING(thousands_sep);
2983
0
    if (*thousands_sep == NULL) {
2984
0
        goto done;
2985
0
    }
2986
2987
0
    res = 0;
2988
2989
0
done:
2990
0
#ifndef MS_WINDOWS
2991
0
    if (loc != NULL) {
2992
0
        setlocale(LC_CTYPE, oldloc);
2993
0
    }
2994
0
    PyMem_Free(oldloc);
2995
0
#endif
2996
0
    return res;
2997
2998
0
#undef GET_LOCALE_STRING
2999
0
}
3000
3001
/* Our selection logic for which function to use is as follows:
3002
 * 1. If close_range(2) is available, always prefer that; it's better for
3003
 *    contiguous ranges like this than fdwalk(3) which entails iterating over
3004
 *    the entire fd space and simply doing nothing for those outside the range.
3005
 * 2. If closefrom(2) is available, we'll attempt to use that next if we're
3006
 *    closing up to sysconf(_SC_OPEN_MAX).
3007
 * 2a. Fallback to fdwalk(3) if we're not closing up to sysconf(_SC_OPEN_MAX),
3008
 *    as that will be more performant if the range happens to have any chunk of
3009
 *    non-opened fd in the middle.
3010
 * 2b. If fdwalk(3) isn't available, just do a plain close(2) loop.
3011
 */
3012
#ifdef HAVE_CLOSEFROM
3013
#  define USE_CLOSEFROM
3014
#endif /* HAVE_CLOSEFROM */
3015
3016
#ifdef HAVE_FDWALK
3017
#  define USE_FDWALK
3018
#endif /* HAVE_FDWALK */
3019
3020
#ifdef USE_FDWALK
3021
static int
3022
_fdwalk_close_func(void *lohi, int fd)
3023
{
3024
    int lo = ((int *)lohi)[0];
3025
    int hi = ((int *)lohi)[1];
3026
3027
    if (fd >= hi) {
3028
        return 1;
3029
    }
3030
    else if (fd >= lo) {
3031
        /* Ignore errors */
3032
        (void)close(fd);
3033
    }
3034
    return 0;
3035
}
3036
#endif /* USE_FDWALK */
3037
3038
/* Closes all file descriptors in [first, last], ignoring errors. */
3039
void
3040
_Py_closerange(int first, int last)
3041
0
{
3042
0
    first = Py_MAX(first, 0);
3043
0
    _Py_BEGIN_SUPPRESS_IPH
3044
#ifdef HAVE_CLOSE_RANGE
3045
    if (close_range(first, last, 0) == 0) {
3046
        /* close_range() ignores errors when it closes file descriptors.
3047
         * Possible reasons of an error return are lack of kernel support
3048
         * or denial of the underlying syscall by a seccomp sandbox on Linux.
3049
         * Fallback to other methods in case of any error. */
3050
    }
3051
    else
3052
#endif /* HAVE_CLOSE_RANGE */
3053
#ifdef USE_CLOSEFROM
3054
    if (last >= sysconf(_SC_OPEN_MAX)) {
3055
        /* Any errors encountered while closing file descriptors are ignored */
3056
        (void)closefrom(first);
3057
    }
3058
    else
3059
#endif /* USE_CLOSEFROM */
3060
#ifdef USE_FDWALK
3061
    {
3062
        int lohi[2];
3063
        lohi[0] = first;
3064
        lohi[1] = last + 1;
3065
        fdwalk(_fdwalk_close_func, lohi);
3066
    }
3067
#else
3068
0
    {
3069
0
        for (int i = first; i <= last; i++) {
3070
            /* Ignore errors */
3071
0
            (void)close(i);
3072
0
        }
3073
0
    }
3074
0
#endif /* USE_FDWALK */
3075
0
    _Py_END_SUPPRESS_IPH
3076
0
}
3077
3078
3079
#ifndef MS_WINDOWS
3080
// Ticks per second used by clock() and times() functions.
3081
// See os.times() and time.process_time() implementations.
3082
int
3083
_Py_GetTicksPerSecond(long *ticks_per_second)
3084
32
{
3085
32
#if defined(HAVE_SYSCONF) && defined(_SC_CLK_TCK)
3086
32
    long value = sysconf(_SC_CLK_TCK);
3087
32
    if (value < 1) {
3088
0
        return -1;
3089
0
    }
3090
32
    *ticks_per_second = value;
3091
#elif defined(HZ)
3092
    assert(HZ >= 1);
3093
    *ticks_per_second = HZ;
3094
#else
3095
    // Magic fallback value; may be bogus
3096
    *ticks_per_second = 60;
3097
#endif
3098
32
    return 0;
3099
32
}
3100
#endif
3101
3102
3103
/* Check if a file descriptor is valid or not.
3104
   Return 0 if the file descriptor is invalid, return non-zero otherwise. */
3105
int
3106
_Py_IsValidFD(int fd)
3107
48
{
3108
/* dup() is faster than fstat(): fstat() can require input/output operations,
3109
   whereas dup() doesn't. There is a low risk of EMFILE/ENFILE at Python
3110
   startup. Problem: dup() doesn't check if the file descriptor is valid on
3111
   some platforms.
3112
3113
   fcntl(fd, F_GETFD) is even faster, because it only checks the process table.
3114
   It is preferred over dup() when available, since it cannot fail with the
3115
   "too many open files" error (EMFILE).
3116
3117
   bpo-30225: On macOS Tiger, when stdout is redirected to a pipe and the other
3118
   side of the pipe is closed, dup(1) succeed, whereas fstat(1, &st) fails with
3119
   EBADF. FreeBSD has similar issue (bpo-32849).
3120
3121
   Only use dup() on Linux where dup() is enough to detect invalid FD
3122
   (bpo-32849).
3123
*/
3124
48
    if (fd < 0) {
3125
0
        return 0;
3126
0
    }
3127
48
#if defined(F_GETFD) && ( \
3128
48
        defined(__linux__) || \
3129
48
        defined(__APPLE__) || \
3130
48
        (defined(__wasm__) && !defined(__wasi__)))
3131
48
    return fcntl(fd, F_GETFD) >= 0;
3132
#elif defined(__linux__)
3133
    int fd2 = dup(fd);
3134
    if (fd2 >= 0) {
3135
        close(fd2);
3136
    }
3137
    return (fd2 >= 0);
3138
#elif defined(MS_WINDOWS)
3139
    HANDLE hfile;
3140
    _Py_BEGIN_SUPPRESS_IPH
3141
    hfile = (HANDLE)_get_osfhandle(fd);
3142
    _Py_END_SUPPRESS_IPH
3143
    return (hfile != INVALID_HANDLE_VALUE
3144
            && GetFileType(hfile) != FILE_TYPE_UNKNOWN);
3145
#else
3146
    struct stat st;
3147
    return (fstat(fd, &st) == 0);
3148
#endif
3149
48
}