Coverage Report

Created: 2025-11-24 06:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/fileutils.c
Line
Count
Source
1
#include "Python.h"
2
#include "pycore_fileutils.h"     // fileutils definitions
3
#include "pycore_runtime.h"       // _PyRuntime
4
#include "pycore_pystate.h"       // _Py_AssertHoldsTstate()
5
#include "pycore_unicodeobject.h" // _Py_MAX_UNICODE
6
#include "osdefs.h"               // SEP
7
8
#include <stdlib.h>               // mbstowcs()
9
#ifdef HAVE_UNISTD_H
10
#  include <unistd.h>             // getcwd()
11
#endif
12
13
#ifdef MS_WINDOWS
14
#  include <malloc.h>
15
#  include <windows.h>
16
#  include <winioctl.h>             // FILE_DEVICE_* constants
17
#  include "pycore_fileutils_windows.h" // FILE_STAT_BASIC_INFORMATION
18
#  if defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP)
19
#    define PATHCCH_ALLOW_LONG_PATHS 0x01
20
#  else
21
#    include <pathcch.h>            // PathCchCombineEx
22
#  endif
23
extern int winerror_to_errno(int);
24
#endif
25
26
#ifdef HAVE_LANGINFO_H
27
#  include <langinfo.h>           // nl_langinfo(CODESET)
28
#endif
29
30
#ifdef HAVE_SYS_IOCTL_H
31
#include <sys/ioctl.h>
32
#endif
33
34
#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
35
#  include <iconv.h>              // iconv_open()
36
#endif
37
38
#ifdef HAVE_FCNTL_H
39
#  include <fcntl.h>              // fcntl(F_GETFD)
40
#endif
41
42
#ifdef O_CLOEXEC
43
/* Does open() support the O_CLOEXEC flag? Possible values:
44
45
   -1: unknown
46
    0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
47
    1: open() supports O_CLOEXEC flag, close-on-exec is set
48
49
   The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
50
   and os.open(). */
51
int _Py_open_cloexec_works = -1;
52
#endif
53
54
// mbstowcs() and mbrtowc() errors
55
static const size_t DECODE_ERROR = ((size_t)-1);
56
#ifdef HAVE_MBRTOWC
57
static const size_t INCOMPLETE_CHARACTER = (size_t)-2;
58
#endif
59
60
61
static int
62
get_surrogateescape(_Py_error_handler errors, int *surrogateescape)
63
253k
{
64
253k
    switch (errors)
65
253k
    {
66
0
    case _Py_ERROR_STRICT:
67
0
        *surrogateescape = 0;
68
0
        return 0;
69
253k
    case _Py_ERROR_SURROGATEESCAPE:
70
253k
        *surrogateescape = 1;
71
253k
        return 0;
72
0
    default:
73
0
        return -1;
74
253k
    }
75
253k
}
76
77
78
PyObject *
79
_Py_device_encoding(int fd)
80
0
{
81
0
    int valid;
82
0
    Py_BEGIN_ALLOW_THREADS
83
0
    _Py_BEGIN_SUPPRESS_IPH
84
0
    valid = isatty(fd);
85
0
    _Py_END_SUPPRESS_IPH
86
0
    Py_END_ALLOW_THREADS
87
0
    if (!valid)
88
0
        Py_RETURN_NONE;
89
90
#ifdef MS_WINDOWS
91
#ifdef HAVE_WINDOWS_CONSOLE_IO
92
    UINT cp;
93
    if (fd == 0)
94
        cp = GetConsoleCP();
95
    else if (fd == 1 || fd == 2)
96
        cp = GetConsoleOutputCP();
97
    else
98
        cp = 0;
99
    /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
100
       has no console */
101
    if (cp == 0) {
102
        Py_RETURN_NONE;
103
    }
104
105
    return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
106
#else
107
    Py_RETURN_NONE;
108
#endif /* HAVE_WINDOWS_CONSOLE_IO */
109
#else
110
0
    if (_PyRuntime.preconfig.utf8_mode) {
111
0
        _Py_DECLARE_STR(utf_8, "utf-8");
112
0
        return &_Py_STR(utf_8);
113
0
    }
114
0
    return _Py_GetLocaleEncodingObject();
115
0
#endif
116
0
}
117
118
119
static int
120
is_valid_wide_char(wchar_t ch)
121
5.95M
{
122
#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
123
    /* Oracle Solaris doesn't use Unicode code points as wchar_t encoding
124
       for non-Unicode locales, which makes values higher than _Py_MAX_UNICODE
125
       possibly valid. */
126
    return 1;
127
#endif
128
5.95M
    if (Py_UNICODE_IS_SURROGATE(ch)) {
129
        // Reject lone surrogate characters
130
0
        return 0;
131
0
    }
132
5.95M
#if SIZEOF_WCHAR_T > 2
133
5.95M
    if (ch > _Py_MAX_UNICODE) {
134
        // bpo-35883: Reject characters outside [U+0000; U+10ffff] range.
135
        // The glibc mbstowcs() UTF-8 decoder does not respect the RFC 3629,
136
        // it creates characters outside the [U+0000; U+10ffff] range:
137
        // https://sourceware.org/bugzilla/show_bug.cgi?id=2373
138
0
        return 0;
139
0
    }
140
5.95M
#endif
141
5.95M
    return 1;
142
5.95M
}
143
144
145
static size_t
146
_Py_mbstowcs(wchar_t *dest, const char *src, size_t n)
147
506k
{
148
506k
    size_t count = mbstowcs(dest, src, n);
149
506k
    if (dest != NULL && count != DECODE_ERROR) {
150
6.21M
        for (size_t i=0; i < count; i++) {
151
5.95M
            wchar_t ch = dest[i];
152
5.95M
            if (!is_valid_wide_char(ch)) {
153
0
                return DECODE_ERROR;
154
0
            }
155
5.95M
        }
156
253k
    }
157
506k
    return count;
158
506k
}
159
160
161
#ifdef HAVE_MBRTOWC
162
static size_t
163
_Py_mbrtowc(wchar_t *pwc, const char *str, size_t len, mbstate_t *pmbs)
164
0
{
165
0
    assert(pwc != NULL);
166
0
    size_t count = mbrtowc(pwc, str, len, pmbs);
167
0
    if (count != 0 && count != DECODE_ERROR && count != INCOMPLETE_CHARACTER) {
168
0
        if (!is_valid_wide_char(*pwc)) {
169
0
            return DECODE_ERROR;
170
0
        }
171
0
    }
172
0
    return count;
173
0
}
174
#endif
175
176
177
#if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
178
179
#define USE_FORCE_ASCII
180
181
extern int _Py_normalize_encoding(const char *, char *, size_t, int);
182
183
/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale
184
   and POSIX locale. nl_langinfo(CODESET) announces an alias of the
185
   ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
186
   ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
187
   locale.getpreferredencoding() codec. For example, if command line arguments
188
   are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
189
   UnicodeEncodeError instead of retrieving the original byte string.
190
191
   The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
192
   nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
193
   one byte in range 0x80-0xff can be decoded from the locale encoding. The
194
   workaround is also enabled on error, for example if getting the locale
195
   failed.
196
197
   On HP-UX with the C locale or the POSIX locale, nl_langinfo(CODESET)
198
   announces "roman8" but mbstowcs() uses Latin1 in practice. Force also the
199
   ASCII encoding in this case.
200
201
   Values of force_ascii:
202
203
       1: the workaround is used: Py_EncodeLocale() uses
204
          encode_ascii_surrogateescape() and Py_DecodeLocale() uses
205
          decode_ascii()
206
       0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
207
          Py_DecodeLocale() uses mbstowcs()
208
      -1: unknown, need to call check_force_ascii() to get the value
209
*/
210
56
#define force_ascii (_PyRuntime.fileutils.force_ascii)
211
212
static int
213
check_force_ascii(void)
214
0
{
215
0
    char *loc = setlocale(LC_CTYPE, NULL);
216
0
    if (loc == NULL) {
217
0
        goto error;
218
0
    }
219
0
    if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
220
        /* the LC_CTYPE locale is different than C and POSIX */
221
0
        return 0;
222
0
    }
223
224
0
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
225
0
    const char *codeset = nl_langinfo(CODESET);
226
0
    if (!codeset || codeset[0] == '\0') {
227
        /* CODESET is not set or empty */
228
0
        goto error;
229
0
    }
230
231
0
    char encoding[20];   /* longest name: "iso_646.irv_1991\0" */
232
0
    if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding), 1)) {
233
0
        goto error;
234
0
    }
235
236
#ifdef __hpux
237
    if (strcmp(encoding, "roman8") == 0) {
238
        unsigned char ch;
239
        wchar_t wch;
240
        size_t res;
241
242
        ch = (unsigned char)0xA7;
243
        res = _Py_mbstowcs(&wch, (char*)&ch, 1);
244
        if (res != DECODE_ERROR && wch == L'\xA7') {
245
            /* On HP-UX with C locale or the POSIX locale,
246
               nl_langinfo(CODESET) announces "roman8", whereas mbstowcs() uses
247
               Latin1 encoding in practice. Force ASCII in this case.
248
249
               Roman8 decodes 0xA7 to U+00CF. Latin1 decodes 0xA7 to U+00A7. */
250
            return 1;
251
        }
252
    }
253
#else
254
0
    const char* ascii_aliases[] = {
255
0
        "ascii",
256
        /* Aliases from Lib/encodings/aliases.py */
257
0
        "646",
258
0
        "ansi_x3.4_1968",
259
0
        "ansi_x3.4_1986",
260
0
        "ansi_x3_4_1968",
261
0
        "cp367",
262
0
        "csascii",
263
0
        "ibm367",
264
0
        "iso646_us",
265
0
        "iso_646.irv_1991",
266
0
        "iso_ir_6",
267
0
        "us",
268
0
        "us_ascii",
269
0
        NULL
270
0
    };
271
272
0
    int is_ascii = 0;
273
0
    for (const char **alias=ascii_aliases; *alias != NULL; alias++) {
274
0
        if (strcmp(encoding, *alias) == 0) {
275
0
            is_ascii = 1;
276
0
            break;
277
0
        }
278
0
    }
279
0
    if (!is_ascii) {
280
        /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
281
0
        return 0;
282
0
    }
283
284
0
    for (unsigned int i=0x80; i<=0xff; i++) {
285
0
        char ch[1];
286
0
        wchar_t wch[1];
287
0
        size_t res;
288
289
0
        unsigned uch = (unsigned char)i;
290
0
        ch[0] = (char)uch;
291
0
        res = _Py_mbstowcs(wch, ch, 1);
292
0
        if (res != DECODE_ERROR) {
293
            /* decoding a non-ASCII character from the locale encoding succeed:
294
               the locale encoding is not ASCII, force ASCII */
295
0
            return 1;
296
0
        }
297
0
    }
298
    /* None of the bytes in the range 0x80-0xff can be decoded from the locale
299
       encoding: the locale encoding is really ASCII */
300
0
#endif   /* !defined(__hpux) */
301
0
    return 0;
302
#else
303
    /* nl_langinfo(CODESET) is not available: always force ASCII */
304
    return 1;
305
#endif   /* defined(HAVE_LANGINFO_H) && defined(CODESET) */
306
307
0
error:
308
    /* if an error occurred, force the ASCII encoding */
309
0
    return 1;
310
0
}
311
312
313
int
314
_Py_GetForceASCII(void)
315
0
{
316
0
    if (force_ascii == -1) {
317
0
        force_ascii = check_force_ascii();
318
0
    }
319
0
    return force_ascii;
320
0
}
321
322
323
void
324
_Py_ResetForceASCII(void)
325
56
{
326
56
    force_ascii = -1;
327
56
}
328
329
330
static int
331
encode_ascii(const wchar_t *text, char **str,
332
             size_t *error_pos, const char **reason,
333
             int raw_malloc, _Py_error_handler errors)
334
0
{
335
0
    char *result = NULL, *out;
336
0
    size_t len, i;
337
0
    wchar_t ch;
338
339
0
    int surrogateescape;
340
0
    if (get_surrogateescape(errors, &surrogateescape) < 0) {
341
0
        return -3;
342
0
    }
343
344
0
    len = wcslen(text);
345
346
    /* +1 for NULL byte */
347
0
    if (raw_malloc) {
348
0
        result = PyMem_RawMalloc(len + 1);
349
0
    }
350
0
    else {
351
0
        result = PyMem_Malloc(len + 1);
352
0
    }
353
0
    if (result == NULL) {
354
0
        return -1;
355
0
    }
356
357
0
    out = result;
358
0
    for (i=0; i<len; i++) {
359
0
        ch = text[i];
360
361
0
        if (ch <= 0x7f) {
362
            /* ASCII character */
363
0
            *out++ = (char)ch;
364
0
        }
365
0
        else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
366
            /* UTF-8b surrogate */
367
0
            *out++ = (char)(ch - 0xdc00);
368
0
        }
369
0
        else {
370
0
            if (raw_malloc) {
371
0
                PyMem_RawFree(result);
372
0
            }
373
0
            else {
374
0
                PyMem_Free(result);
375
0
            }
376
0
            if (error_pos != NULL) {
377
0
                *error_pos = i;
378
0
            }
379
0
            if (reason) {
380
0
                *reason = "encoding error";
381
0
            }
382
0
            return -2;
383
0
        }
384
0
    }
385
0
    *out = '\0';
386
0
    *str = result;
387
0
    return 0;
388
0
}
389
#else
390
int
391
_Py_GetForceASCII(void)
392
{
393
    return 0;
394
}
395
396
void
397
_Py_ResetForceASCII(void)
398
{
399
    /* nothing to do */
400
}
401
#endif   /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
402
403
404
#if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
405
static int
406
decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
407
             const char **reason, _Py_error_handler errors)
408
0
{
409
0
    wchar_t *res;
410
0
    unsigned char *in;
411
0
    wchar_t *out;
412
0
    size_t argsize = strlen(arg) + 1;
413
414
0
    int surrogateescape;
415
0
    if (get_surrogateescape(errors, &surrogateescape) < 0) {
416
0
        return -3;
417
0
    }
418
419
0
    if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
420
0
        return -1;
421
0
    }
422
0
    res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
423
0
    if (!res) {
424
0
        return -1;
425
0
    }
426
427
0
    out = res;
428
0
    for (in = (unsigned char*)arg; *in; in++) {
429
0
        unsigned char ch = *in;
430
0
        if (ch < 128) {
431
0
            *out++ = ch;
432
0
        }
433
0
        else {
434
0
            if (!surrogateescape) {
435
0
                PyMem_RawFree(res);
436
0
                if (wlen) {
437
0
                    *wlen = in - (unsigned char*)arg;
438
0
                }
439
0
                if (reason) {
440
0
                    *reason = "decoding error";
441
0
                }
442
0
                return -2;
443
0
            }
444
0
            *out++ = 0xdc00 + ch;
445
0
        }
446
0
    }
447
0
    *out = 0;
448
449
0
    if (wlen != NULL) {
450
0
        *wlen = out - res;
451
0
    }
452
0
    *wstr = res;
453
0
    return 0;
454
0
}
455
#endif   /* !HAVE_MBRTOWC */
456
457
static int
458
decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
459
                      const char **reason, _Py_error_handler errors)
460
253k
{
461
253k
    wchar_t *res;
462
253k
    size_t argsize;
463
253k
    size_t count;
464
253k
#ifdef HAVE_MBRTOWC
465
253k
    unsigned char *in;
466
253k
    wchar_t *out;
467
253k
    mbstate_t mbs;
468
253k
#endif
469
470
253k
    int surrogateescape;
471
253k
    if (get_surrogateescape(errors, &surrogateescape) < 0) {
472
0
        return -3;
473
0
    }
474
475
#ifdef HAVE_BROKEN_MBSTOWCS
476
    /* Some platforms have a broken implementation of
477
     * mbstowcs which does not count the characters that
478
     * would result from conversion.  Use an upper bound.
479
     */
480
    argsize = strlen(arg);
481
#else
482
253k
    argsize = _Py_mbstowcs(NULL, arg, 0);
483
253k
#endif
484
253k
    if (argsize != DECODE_ERROR) {
485
253k
        if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
486
0
            return -1;
487
0
        }
488
253k
        res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
489
253k
        if (!res) {
490
0
            return -1;
491
0
        }
492
493
253k
        count = _Py_mbstowcs(res, arg, argsize + 1);
494
253k
        if (count != DECODE_ERROR) {
495
253k
            *wstr = res;
496
253k
            if (wlen != NULL) {
497
253k
                *wlen = count;
498
253k
            }
499
253k
            return 0;
500
253k
        }
501
0
        PyMem_RawFree(res);
502
0
    }
503
504
    /* Conversion failed. Fall back to escaping with surrogateescape. */
505
0
#ifdef HAVE_MBRTOWC
506
    /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
507
508
    /* Overallocate; as multi-byte characters are in the argument, the
509
       actual output could use less memory. */
510
0
    argsize = strlen(arg) + 1;
511
0
    if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
512
0
        return -1;
513
0
    }
514
0
    res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
515
0
    if (!res) {
516
0
        return -1;
517
0
    }
518
519
0
    in = (unsigned char*)arg;
520
0
    out = res;
521
0
    memset(&mbs, 0, sizeof mbs);
522
0
    while (argsize) {
523
0
        size_t converted = _Py_mbrtowc(out, (char*)in, argsize, &mbs);
524
0
        if (converted == 0) {
525
            /* Reached end of string; null char stored. */
526
0
            break;
527
0
        }
528
529
0
        if (converted == DECODE_ERROR || converted == INCOMPLETE_CHARACTER) {
530
0
            if (!surrogateescape) {
531
0
                goto decode_error;
532
0
            }
533
534
            /* Decoding error. Escape as UTF-8b, and start over in the initial
535
               shift state. */
536
0
            *out++ = 0xdc00 + *in++;
537
0
            argsize--;
538
0
            memset(&mbs, 0, sizeof mbs);
539
0
            continue;
540
0
        }
541
542
        // _Py_mbrtowc() reject lone surrogate characters
543
0
        assert(!Py_UNICODE_IS_SURROGATE(*out));
544
545
        /* successfully converted some bytes */
546
0
        in += converted;
547
0
        argsize -= converted;
548
0
        out++;
549
0
    }
550
0
    if (wlen != NULL) {
551
0
        *wlen = out - res;
552
0
    }
553
0
    *wstr = res;
554
0
    return 0;
555
556
0
decode_error:
557
0
    PyMem_RawFree(res);
558
0
    if (wlen) {
559
0
        *wlen = in - (unsigned char*)arg;
560
0
    }
561
0
    if (reason) {
562
0
        *reason = "decoding error";
563
0
    }
564
0
    return -2;
565
#else   /* HAVE_MBRTOWC */
566
    /* Cannot use C locale for escaping; manually escape as if charset
567
       is ASCII (i.e. escape all bytes > 128. This will still roundtrip
568
       correctly in the locale's charset, which must be an ASCII superset. */
569
    return decode_ascii(arg, wstr, wlen, reason, errors);
570
#endif   /* HAVE_MBRTOWC */
571
0
}
572
573
574
/* Decode a byte string from the locale encoding.
575
576
   Use the strict error handler if 'surrogateescape' is zero.  Use the
577
   surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
578
   bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
579
   can be decoded as a surrogate character, escape the bytes using the
580
   surrogateescape error handler instead of decoding them.
581
582
   On success, return 0 and write the newly allocated wide character string into
583
   *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
584
   the number of wide characters excluding the null character into *wlen.
585
586
   On memory allocation failure, return -1.
587
588
   On decoding error, return -2. If wlen is not NULL, write the start of
589
   invalid byte sequence in the input string into *wlen. If reason is not NULL,
590
   write the decoding error message into *reason.
591
592
   Return -3 if the error handler 'errors' is not supported.
593
594
   Use the Py_EncodeLocaleEx() function to encode the character string back to
595
   a byte string. */
596
int
597
_Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
598
                   const char **reason,
599
                   int current_locale, _Py_error_handler errors)
600
262k
{
601
262k
    if (current_locale) {
602
#ifdef _Py_FORCE_UTF8_LOCALE
603
        return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
604
                                errors);
605
#else
606
253k
        return decode_current_locale(arg, wstr, wlen, reason, errors);
607
253k
#endif
608
253k
    }
609
610
#ifdef _Py_FORCE_UTF8_FS_ENCODING
611
    return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
612
                            errors);
613
#else
614
9.18k
    int use_utf8 = (_PyRuntime.preconfig.utf8_mode >= 1);
615
#ifdef MS_WINDOWS
616
    use_utf8 |= (_PyRuntime.preconfig.legacy_windows_fs_encoding == 0);
617
#endif
618
9.18k
    if (use_utf8) {
619
9.18k
        return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
620
9.18k
                                errors);
621
9.18k
    }
622
623
0
#ifdef USE_FORCE_ASCII
624
0
    if (force_ascii == -1) {
625
0
        force_ascii = check_force_ascii();
626
0
    }
627
628
0
    if (force_ascii) {
629
        /* force ASCII encoding to workaround mbstowcs() issue */
630
0
        return decode_ascii(arg, wstr, wlen, reason, errors);
631
0
    }
632
0
#endif
633
634
0
    return decode_current_locale(arg, wstr, wlen, reason, errors);
635
0
#endif   /* !_Py_FORCE_UTF8_FS_ENCODING */
636
0
}
637
638
639
/* Decode a byte string from the locale encoding with the
640
   surrogateescape error handler: undecodable bytes are decoded as characters
641
   in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
642
   character, escape the bytes using the surrogateescape error handler instead
643
   of decoding them.
644
645
   Return a pointer to a newly allocated wide character string, use
646
   PyMem_RawFree() to free the memory. If size is not NULL, write the number of
647
   wide characters excluding the null character into *size
648
649
   Return NULL on decoding error or memory allocation error. If *size* is not
650
   NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
651
   decoding error.
652
653
   Decoding errors should never happen, unless there is a bug in the C
654
   library.
655
656
   Use the Py_EncodeLocale() function to encode the character string back to a
657
   byte string. */
658
wchar_t*
659
Py_DecodeLocale(const char* arg, size_t *wlen)
660
196
{
661
196
    wchar_t *wstr;
662
196
    int res = _Py_DecodeLocaleEx(arg, &wstr, wlen,
663
196
                                 NULL, 0,
664
196
                                 _Py_ERROR_SURROGATEESCAPE);
665
196
    if (res != 0) {
666
0
        assert(res != -3);
667
0
        if (wlen != NULL) {
668
0
            *wlen = (size_t)res;
669
0
        }
670
0
        return NULL;
671
0
    }
672
196
    return wstr;
673
196
}
674
675
676
static int
677
encode_current_locale(const wchar_t *text, char **str,
678
                      size_t *error_pos, const char **reason,
679
                      int raw_malloc, _Py_error_handler errors)
680
0
{
681
0
    const size_t len = wcslen(text);
682
0
    char *result = NULL, *bytes = NULL;
683
0
    size_t i, size, converted;
684
0
    wchar_t c, buf[2];
685
686
0
    int surrogateescape;
687
0
    if (get_surrogateescape(errors, &surrogateescape) < 0) {
688
0
        return -3;
689
0
    }
690
691
    /* The function works in two steps:
692
       1. compute the length of the output buffer in bytes (size)
693
       2. outputs the bytes */
694
0
    size = 0;
695
0
    buf[1] = 0;
696
0
    while (1) {
697
0
        for (i=0; i < len; i++) {
698
0
            c = text[i];
699
0
            if (c >= 0xdc80 && c <= 0xdcff) {
700
0
                if (!surrogateescape) {
701
0
                    goto encode_error;
702
0
                }
703
                /* UTF-8b surrogate */
704
0
                if (bytes != NULL) {
705
0
                    *bytes++ = c - 0xdc00;
706
0
                    size--;
707
0
                }
708
0
                else {
709
0
                    size++;
710
0
                }
711
0
                continue;
712
0
            }
713
0
            else {
714
0
                buf[0] = c;
715
0
                if (bytes != NULL) {
716
0
                    converted = wcstombs(bytes, buf, size);
717
0
                }
718
0
                else {
719
0
                    converted = wcstombs(NULL, buf, 0);
720
0
                }
721
0
                if (converted == DECODE_ERROR) {
722
0
                    goto encode_error;
723
0
                }
724
0
                if (bytes != NULL) {
725
0
                    bytes += converted;
726
0
                    size -= converted;
727
0
                }
728
0
                else {
729
0
                    size += converted;
730
0
                }
731
0
            }
732
0
        }
733
0
        if (result != NULL) {
734
0
            *bytes = '\0';
735
0
            break;
736
0
        }
737
738
0
        size += 1; /* nul byte at the end */
739
0
        if (raw_malloc) {
740
0
            result = PyMem_RawMalloc(size);
741
0
        }
742
0
        else {
743
0
            result = PyMem_Malloc(size);
744
0
        }
745
0
        if (result == NULL) {
746
0
            return -1;
747
0
        }
748
0
        bytes = result;
749
0
    }
750
0
    *str = result;
751
0
    return 0;
752
753
0
encode_error:
754
0
    if (raw_malloc) {
755
0
        PyMem_RawFree(result);
756
0
    }
757
0
    else {
758
0
        PyMem_Free(result);
759
0
    }
760
0
    if (error_pos != NULL) {
761
0
        *error_pos = i;
762
0
    }
763
0
    if (reason) {
764
0
        *reason = "encoding error";
765
0
    }
766
0
    return -2;
767
0
}
768
769
770
/* Encode a string to the locale encoding.
771
772
   Parameters:
773
774
   * raw_malloc: if non-zero, allocate memory using PyMem_RawMalloc() instead
775
     of PyMem_Malloc().
776
   * current_locale: if non-zero, use the current LC_CTYPE, otherwise use
777
     Python filesystem encoding.
778
   * errors: error handler like "strict" or "surrogateescape".
779
780
   Return value:
781
782
    0: success, *str is set to a newly allocated decoded string.
783
   -1: memory allocation failure
784
   -2: encoding error, set *error_pos and *reason (if set).
785
   -3: the error handler 'errors' is not supported.
786
 */
787
static int
788
encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
789
                 const char **reason,
790
                 int raw_malloc, int current_locale, _Py_error_handler errors)
791
976
{
792
976
    if (current_locale) {
793
#ifdef _Py_FORCE_UTF8_LOCALE
794
        return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
795
                                raw_malloc, errors);
796
#else
797
0
        return encode_current_locale(text, str, error_pos, reason,
798
0
                                     raw_malloc, errors);
799
0
#endif
800
0
    }
801
802
#ifdef _Py_FORCE_UTF8_FS_ENCODING
803
    return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
804
                            raw_malloc, errors);
805
#else
806
976
    int use_utf8 = (_PyRuntime.preconfig.utf8_mode >= 1);
807
#ifdef MS_WINDOWS
808
    use_utf8 |= (_PyRuntime.preconfig.legacy_windows_fs_encoding == 0);
809
#endif
810
976
    if (use_utf8) {
811
976
        return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
812
976
                                raw_malloc, errors);
813
976
    }
814
815
0
#ifdef USE_FORCE_ASCII
816
0
    if (force_ascii == -1) {
817
0
        force_ascii = check_force_ascii();
818
0
    }
819
820
0
    if (force_ascii) {
821
0
        return encode_ascii(text, str, error_pos, reason,
822
0
                            raw_malloc, errors);
823
0
    }
824
0
#endif
825
826
0
    return encode_current_locale(text, str, error_pos, reason,
827
0
                                 raw_malloc, errors);
828
0
#endif   /* _Py_FORCE_UTF8_FS_ENCODING */
829
0
}
830
831
static char*
832
encode_locale(const wchar_t *text, size_t *error_pos,
833
              int raw_malloc, int current_locale)
834
280
{
835
280
    char *str;
836
280
    int res = encode_locale_ex(text, &str, error_pos, NULL,
837
280
                               raw_malloc, current_locale,
838
280
                               _Py_ERROR_SURROGATEESCAPE);
839
280
    if (res != -2 && error_pos) {
840
0
        *error_pos = (size_t)-1;
841
0
    }
842
280
    if (res != 0) {
843
0
        return NULL;
844
0
    }
845
280
    return str;
846
280
}
847
848
/* Encode a wide character string to the locale encoding with the
849
   surrogateescape error handler: surrogate characters in the range
850
   U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
851
852
   Return a pointer to a newly allocated byte string, use PyMem_Free() to free
853
   the memory. Return NULL on encoding or memory allocation error.
854
855
   If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
856
   to the index of the invalid character on encoding error.
857
858
   Use the Py_DecodeLocale() function to decode the bytes string back to a wide
859
   character string. */
860
char*
861
Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
862
0
{
863
0
    return encode_locale(text, error_pos, 0, 0);
864
0
}
865
866
867
/* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
868
   instead of PyMem_Free(). */
869
char*
870
_Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
871
280
{
872
280
    return encode_locale(text, error_pos, 1, 0);
873
280
}
874
875
876
int
877
_Py_EncodeLocaleEx(const wchar_t *text, char **str,
878
                   size_t *error_pos, const char **reason,
879
                   int current_locale, _Py_error_handler errors)
880
696
{
881
696
    return encode_locale_ex(text, str, error_pos, reason, 1,
882
696
                            current_locale, errors);
883
696
}
884
885
886
// Get the current locale encoding name:
887
//
888
// - Return "utf-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
889
// - Return "utf-8" if the UTF-8 Mode is enabled
890
// - On Windows, return the ANSI code page (ex: "cp1250")
891
// - Return "utf-8" if nl_langinfo(CODESET) returns an empty string.
892
// - Otherwise, return nl_langinfo(CODESET).
893
//
894
// Return NULL on memory allocation failure.
895
//
896
// See also config_get_locale_encoding()
897
wchar_t*
898
_Py_GetLocaleEncoding(void)
899
0
{
900
#ifdef _Py_FORCE_UTF8_LOCALE
901
    // On Android langinfo.h and CODESET are missing,
902
    // and UTF-8 is always used in mbstowcs() and wcstombs().
903
    return _PyMem_RawWcsdup(L"utf-8");
904
#else
905
906
#ifdef MS_WINDOWS
907
    wchar_t encoding[23];
908
    unsigned int ansi_codepage = GetACP();
909
    swprintf(encoding, Py_ARRAY_LENGTH(encoding), L"cp%u", ansi_codepage);
910
    encoding[Py_ARRAY_LENGTH(encoding) - 1] = 0;
911
    return _PyMem_RawWcsdup(encoding);
912
#else
913
0
    const char *encoding = nl_langinfo(CODESET);
914
0
    if (!encoding || encoding[0] == '\0') {
915
        // Use UTF-8 if nl_langinfo() returns an empty string. It can happen on
916
        // macOS if the LC_CTYPE locale is not supported.
917
0
        return _PyMem_RawWcsdup(L"utf-8");
918
0
    }
919
920
0
    wchar_t *wstr;
921
0
    int res = decode_current_locale(encoding, &wstr, NULL,
922
0
                                    NULL, _Py_ERROR_SURROGATEESCAPE);
923
0
    if (res < 0) {
924
0
        return NULL;
925
0
    }
926
0
    return wstr;
927
0
#endif  // !MS_WINDOWS
928
929
0
#endif  // !_Py_FORCE_UTF8_LOCALE
930
0
}
931
932
933
PyObject *
934
_Py_GetLocaleEncodingObject(void)
935
0
{
936
0
    wchar_t *encoding = _Py_GetLocaleEncoding();
937
0
    if (encoding == NULL) {
938
0
        PyErr_NoMemory();
939
0
        return NULL;
940
0
    }
941
942
0
    PyObject *str = PyUnicode_FromWideChar(encoding, -1);
943
0
    PyMem_RawFree(encoding);
944
0
    return str;
945
0
}
946
947
#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
948
949
/* Check whether current locale uses Unicode as internal wchar_t form. */
950
int
951
_Py_LocaleUsesNonUnicodeWchar(void)
952
{
953
    /* Oracle Solaris uses non-Unicode internal wchar_t form for
954
       non-Unicode locales and hence needs conversion to UTF first. */
955
    char* codeset = nl_langinfo(CODESET);
956
    if (!codeset) {
957
        return 0;
958
    }
959
    /* 646 refers to ISO/IEC 646 standard that corresponds to ASCII encoding */
960
    return (strcmp(codeset, "UTF-8") != 0 && strcmp(codeset, "646") != 0);
961
}
962
963
static wchar_t *
964
_Py_ConvertWCharForm(const wchar_t *source, Py_ssize_t size,
965
                     const char *tocode, const char *fromcode)
966
{
967
    static_assert(sizeof(wchar_t) == 4, "wchar_t must be 32-bit");
968
969
    /* Ensure we won't overflow the size. */
970
    if (size > (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t))) {
971
        PyErr_NoMemory();
972
        return NULL;
973
    }
974
975
    /* the string doesn't have to be NULL terminated */
976
    wchar_t* target = PyMem_Malloc(size * sizeof(wchar_t));
977
    if (target == NULL) {
978
        PyErr_NoMemory();
979
        return NULL;
980
    }
981
982
    iconv_t cd = iconv_open(tocode, fromcode);
983
    if (cd == (iconv_t)-1) {
984
        PyErr_Format(PyExc_ValueError, "iconv_open() failed");
985
        PyMem_Free(target);
986
        return NULL;
987
    }
988
989
    char *inbuf = (char *) source;
990
    char *outbuf = (char *) target;
991
    size_t inbytesleft = sizeof(wchar_t) * size;
992
    size_t outbytesleft = inbytesleft;
993
994
    size_t ret = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
995
    if (ret == DECODE_ERROR) {
996
        PyErr_Format(PyExc_ValueError, "iconv() failed");
997
        PyMem_Free(target);
998
        iconv_close(cd);
999
        return NULL;
1000
    }
1001
1002
    iconv_close(cd);
1003
    return target;
1004
}
1005
1006
/* Convert a wide character string to the UCS-4 encoded string. This
1007
   is necessary on systems where internal form of wchar_t are not Unicode
1008
   code points (e.g. Oracle Solaris).
1009
1010
   Return a pointer to a newly allocated string, use PyMem_Free() to free
1011
   the memory. Return NULL and raise exception on conversion or memory
1012
   allocation error. */
1013
wchar_t *
1014
_Py_DecodeNonUnicodeWchar(const wchar_t *native, Py_ssize_t size)
1015
{
1016
    return _Py_ConvertWCharForm(native, size, "UCS-4-INTERNAL", "wchar_t");
1017
}
1018
1019
/* Convert a UCS-4 encoded string to native wide character string. This
1020
   is necessary on systems where internal form of wchar_t are not Unicode
1021
   code points (e.g. Oracle Solaris).
1022
1023
   The conversion is done in place. This can be done because both wchar_t
1024
   and UCS-4 use 4-byte encoding, and one wchar_t symbol always correspond
1025
   to a single UCS-4 symbol and vice versa. (This is true for Oracle Solaris,
1026
   which is currently the only system using these functions; it doesn't have
1027
   to be for other systems).
1028
1029
   Return 0 on success. Return -1 and raise exception on conversion
1030
   or memory allocation error. */
1031
int
1032
_Py_EncodeNonUnicodeWchar_InPlace(wchar_t *unicode, Py_ssize_t size)
1033
{
1034
    wchar_t* result = _Py_ConvertWCharForm(unicode, size, "wchar_t", "UCS-4-INTERNAL");
1035
    if (!result) {
1036
        return -1;
1037
    }
1038
    memcpy(unicode, result, size * sizeof(wchar_t));
1039
    PyMem_Free(result);
1040
    return 0;
1041
}
1042
#endif /* HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION */
1043
1044
#ifdef MS_WINDOWS
1045
static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
1046
1047
static void
1048
FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
1049
{
1050
    /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
1051
    /* Cannot simply cast and dereference in_ptr,
1052
       since it might not be aligned properly */
1053
    __int64 in;
1054
    memcpy(&in, in_ptr, sizeof(in));
1055
    *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
1056
    *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
1057
}
1058
1059
static void
1060
LARGE_INTEGER_to_time_t_nsec(LARGE_INTEGER *in_ptr, time_t *time_out, int* nsec_out)
1061
{
1062
    *nsec_out = (int)(in_ptr->QuadPart % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
1063
    *time_out = Py_SAFE_DOWNCAST((in_ptr->QuadPart / 10000000) - secs_between_epochs, __int64, time_t);
1064
}
1065
1066
void
1067
_Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
1068
{
1069
    /* XXX endianness */
1070
    __int64 out;
1071
    out = time_in + secs_between_epochs;
1072
    out = out * 10000000 + nsec_in / 100;
1073
    memcpy(out_ptr, &out, sizeof(out));
1074
}
1075
1076
/* Below, we *know* that ugo+r is 0444 */
1077
#if _S_IREAD != 0400
1078
#error Unsupported C library
1079
#endif
1080
static int
1081
attributes_to_mode(DWORD attr)
1082
{
1083
    int m = 0;
1084
    if (attr & FILE_ATTRIBUTE_DIRECTORY)
1085
        m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
1086
    else
1087
        m |= _S_IFREG;
1088
    if (attr & FILE_ATTRIBUTE_READONLY)
1089
        m |= 0444;
1090
    else
1091
        m |= 0666;
1092
    return m;
1093
}
1094
1095
1096
typedef union {
1097
    FILE_ID_128 id;
1098
    struct {
1099
        uint64_t st_ino;
1100
        uint64_t st_ino_high;
1101
    };
1102
} id_128_to_ino;
1103
1104
1105
void
1106
_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
1107
                           FILE_BASIC_INFO *basic_info, FILE_ID_INFO *id_info,
1108
                           struct _Py_stat_struct *result)
1109
{
1110
    memset(result, 0, sizeof(*result));
1111
    result->st_mode = attributes_to_mode(info->dwFileAttributes);
1112
    result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
1113
    result->st_dev = id_info ? id_info->VolumeSerialNumber : info->dwVolumeSerialNumber;
1114
    result->st_rdev = 0;
1115
    /* st_ctime is deprecated, but we preserve the legacy value in our caller, not here */
1116
    if (basic_info) {
1117
        LARGE_INTEGER_to_time_t_nsec(&basic_info->CreationTime, &result->st_birthtime, &result->st_birthtime_nsec);
1118
        LARGE_INTEGER_to_time_t_nsec(&basic_info->ChangeTime, &result->st_ctime, &result->st_ctime_nsec);
1119
        LARGE_INTEGER_to_time_t_nsec(&basic_info->LastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
1120
        LARGE_INTEGER_to_time_t_nsec(&basic_info->LastAccessTime, &result->st_atime, &result->st_atime_nsec);
1121
    } else {
1122
        FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_birthtime, &result->st_birthtime_nsec);
1123
        FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
1124
        FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
1125
    }
1126
    result->st_nlink = info->nNumberOfLinks;
1127
1128
    if (id_info) {
1129
        id_128_to_ino file_id;
1130
        file_id.id = id_info->FileId;
1131
        result->st_ino = file_id.st_ino;
1132
        result->st_ino_high = file_id.st_ino_high;
1133
    }
1134
    if (!result->st_ino && !result->st_ino_high) {
1135
        /* should only occur for DirEntry_from_find_data, in which case the
1136
           index is likely to be zero anyway. */
1137
        result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
1138
    }
1139
1140
    /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
1141
       open other name surrogate reparse points without traversing them. To
1142
       detect/handle these, check st_file_attributes and st_reparse_tag. */
1143
    result->st_reparse_tag = reparse_tag;
1144
    if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
1145
        reparse_tag == IO_REPARSE_TAG_SYMLINK) {
1146
        /* set the bits that make this a symlink */
1147
        result->st_mode = (result->st_mode & ~S_IFMT) | S_IFLNK;
1148
    }
1149
    result->st_file_attributes = info->dwFileAttributes;
1150
}
1151
1152
void
1153
_Py_stat_basic_info_to_stat(FILE_STAT_BASIC_INFORMATION *info,
1154
                            struct _Py_stat_struct *result)
1155
{
1156
    memset(result, 0, sizeof(*result));
1157
    result->st_mode = attributes_to_mode(info->FileAttributes);
1158
    result->st_size = info->EndOfFile.QuadPart;
1159
    LARGE_INTEGER_to_time_t_nsec(&info->CreationTime, &result->st_birthtime, &result->st_birthtime_nsec);
1160
    LARGE_INTEGER_to_time_t_nsec(&info->ChangeTime, &result->st_ctime, &result->st_ctime_nsec);
1161
    LARGE_INTEGER_to_time_t_nsec(&info->LastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
1162
    LARGE_INTEGER_to_time_t_nsec(&info->LastAccessTime, &result->st_atime, &result->st_atime_nsec);
1163
    result->st_nlink = info->NumberOfLinks;
1164
    result->st_dev = info->VolumeSerialNumber.QuadPart;
1165
    /* File systems with less than 128-bits zero pad into this field */
1166
    id_128_to_ino file_id;
1167
    file_id.id = info->FileId128;
1168
    result->st_ino = file_id.st_ino;
1169
    result->st_ino_high = file_id.st_ino_high;
1170
    /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
1171
       open other name surrogate reparse points without traversing them. To
1172
       detect/handle these, check st_file_attributes and st_reparse_tag. */
1173
    result->st_reparse_tag = info->ReparseTag;
1174
    if (info->FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
1175
        info->ReparseTag == IO_REPARSE_TAG_SYMLINK) {
1176
        /* set the bits that make this a symlink */
1177
        result->st_mode = (result->st_mode & ~S_IFMT) | S_IFLNK;
1178
    }
1179
    result->st_file_attributes = info->FileAttributes;
1180
    switch (info->DeviceType) {
1181
    case FILE_DEVICE_DISK:
1182
    case FILE_DEVICE_VIRTUAL_DISK:
1183
    case FILE_DEVICE_DFS:
1184
    case FILE_DEVICE_CD_ROM:
1185
    case FILE_DEVICE_CONTROLLER:
1186
    case FILE_DEVICE_DATALINK:
1187
        break;
1188
    case FILE_DEVICE_DISK_FILE_SYSTEM:
1189
    case FILE_DEVICE_CD_ROM_FILE_SYSTEM:
1190
    case FILE_DEVICE_NETWORK_FILE_SYSTEM:
1191
        result->st_mode = (result->st_mode & ~S_IFMT) | 0x6000; /* _S_IFBLK */
1192
        break;
1193
    case FILE_DEVICE_CONSOLE:
1194
    case FILE_DEVICE_NULL:
1195
    case FILE_DEVICE_KEYBOARD:
1196
    case FILE_DEVICE_MODEM:
1197
    case FILE_DEVICE_MOUSE:
1198
    case FILE_DEVICE_PARALLEL_PORT:
1199
    case FILE_DEVICE_PRINTER:
1200
    case FILE_DEVICE_SCREEN:
1201
    case FILE_DEVICE_SERIAL_PORT:
1202
    case FILE_DEVICE_SOUND:
1203
        result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFCHR;
1204
        break;
1205
    case FILE_DEVICE_NAMED_PIPE:
1206
        result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFIFO;
1207
        break;
1208
    default:
1209
        if (info->FileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
1210
            result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFDIR;
1211
        }
1212
        break;
1213
    }
1214
}
1215
1216
#endif
1217
1218
/* Return information about a file.
1219
1220
   On POSIX, use fstat().
1221
1222
   On Windows, use GetFileType() and GetFileInformationByHandle() which support
1223
   files larger than 2 GiB.  fstat() may fail with EOVERFLOW on files larger
1224
   than 2 GiB because the file size type is a signed 32-bit integer: see issue
1225
   #23152.
1226
1227
   On Windows, set the last Windows error and return nonzero on error. On
1228
   POSIX, set errno and return nonzero on error. Fill status and return 0 on
1229
   success. */
1230
int
1231
_Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
1232
7.99k
{
1233
#ifdef MS_WINDOWS
1234
    BY_HANDLE_FILE_INFORMATION info;
1235
    FILE_BASIC_INFO basicInfo;
1236
    FILE_ID_INFO idInfo;
1237
    FILE_ID_INFO *pIdInfo = &idInfo;
1238
    HANDLE h;
1239
    int type;
1240
1241
    h = _Py_get_osfhandle_noraise(fd);
1242
1243
    if (h == INVALID_HANDLE_VALUE) {
1244
        /* errno is already set by _get_osfhandle, but we also set
1245
           the Win32 error for callers who expect that */
1246
        SetLastError(ERROR_INVALID_HANDLE);
1247
        return -1;
1248
    }
1249
    memset(status, 0, sizeof(*status));
1250
1251
    type = GetFileType(h);
1252
    if (type == FILE_TYPE_UNKNOWN) {
1253
        DWORD error = GetLastError();
1254
        if (error != 0) {
1255
            errno = winerror_to_errno(error);
1256
            return -1;
1257
        }
1258
        /* else: valid but unknown file */
1259
    }
1260
1261
    if (type != FILE_TYPE_DISK) {
1262
        if (type == FILE_TYPE_CHAR)
1263
            status->st_mode = _S_IFCHR;
1264
        else if (type == FILE_TYPE_PIPE)
1265
            status->st_mode = _S_IFIFO;
1266
        return 0;
1267
    }
1268
1269
    if (!GetFileInformationByHandle(h, &info) ||
1270
        !GetFileInformationByHandleEx(h, FileBasicInfo, &basicInfo, sizeof(basicInfo))) {
1271
        /* The Win32 error is already set, but we also set errno for
1272
           callers who expect it */
1273
        errno = winerror_to_errno(GetLastError());
1274
        return -1;
1275
    }
1276
1277
    if (!GetFileInformationByHandleEx(h, FileIdInfo, &idInfo, sizeof(idInfo))) {
1278
        /* Failed to get FileIdInfo, so do not pass it along */
1279
        pIdInfo = NULL;
1280
    }
1281
1282
    _Py_attribute_data_to_stat(&info, 0, &basicInfo, pIdInfo, status);
1283
    return 0;
1284
#else
1285
7.99k
    return fstat(fd, status);
1286
7.99k
#endif
1287
7.99k
}
1288
1289
/* Return information about a file.
1290
1291
   On POSIX, use fstat().
1292
1293
   On Windows, use GetFileType() and GetFileInformationByHandle() which support
1294
   files larger than 2 GiB.  fstat() may fail with EOVERFLOW on files larger
1295
   than 2 GiB because the file size type is a signed 32-bit integer: see issue
1296
   #23152.
1297
1298
   Raise an exception and return -1 on error. On Windows, set the last Windows
1299
   error on error. On POSIX, set errno on error. Fill status and return 0 on
1300
   success.
1301
1302
   Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
1303
   to call fstat(). The caller must hold the GIL. */
1304
int
1305
_Py_fstat(int fd, struct _Py_stat_struct *status)
1306
0
{
1307
0
    int res;
1308
1309
0
    _Py_AssertHoldsTstate();
1310
1311
0
    Py_BEGIN_ALLOW_THREADS
1312
0
    res = _Py_fstat_noraise(fd, status);
1313
0
    Py_END_ALLOW_THREADS
1314
1315
0
    if (res != 0) {
1316
#ifdef MS_WINDOWS
1317
        PyErr_SetFromWindowsErr(0);
1318
#else
1319
0
        PyErr_SetFromErrno(PyExc_OSError);
1320
0
#endif
1321
0
        return -1;
1322
0
    }
1323
0
    return 0;
1324
0
}
1325
1326
/* Like _Py_stat() but with a raw filename. */
1327
int
1328
_Py_wstat(const wchar_t* path, struct stat *buf)
1329
84
{
1330
84
    int err;
1331
#ifdef MS_WINDOWS
1332
    struct _stat wstatbuf;
1333
    err = _wstat(path, &wstatbuf);
1334
    if (!err) {
1335
        buf->st_mode = wstatbuf.st_mode;
1336
    }
1337
#else
1338
84
    char *fname;
1339
84
    fname = _Py_EncodeLocaleRaw(path, NULL);
1340
84
    if (fname == NULL) {
1341
0
        errno = EINVAL;
1342
0
        return -1;
1343
0
    }
1344
84
    err = stat(fname, buf);
1345
84
    PyMem_RawFree(fname);
1346
84
#endif
1347
84
    return err;
1348
84
}
1349
1350
1351
/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
1352
   call stat() otherwise. Only fill st_mode attribute on Windows.
1353
1354
   Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
1355
   raised. */
1356
1357
int
1358
_Py_stat(PyObject *path, struct stat *statbuf)
1359
0
{
1360
#ifdef MS_WINDOWS
1361
    int err;
1362
1363
    wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1364
    if (wpath == NULL)
1365
        return -2;
1366
1367
    err = _Py_wstat(wpath, statbuf);
1368
    PyMem_Free(wpath);
1369
    return err;
1370
#else
1371
0
    int ret;
1372
0
    PyObject *bytes;
1373
0
    char *cpath;
1374
1375
0
    bytes = PyUnicode_EncodeFSDefault(path);
1376
0
    if (bytes == NULL)
1377
0
        return -2;
1378
1379
    /* check for embedded null bytes */
1380
0
    if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
1381
0
        Py_DECREF(bytes);
1382
0
        return -2;
1383
0
    }
1384
1385
0
    ret = stat(cpath, statbuf);
1386
0
    Py_DECREF(bytes);
1387
0
    return ret;
1388
0
#endif
1389
0
}
1390
1391
#ifdef MS_WINDOWS
1392
// For some Windows API partitions, SetHandleInformation() is declared
1393
// but none of the handle flags are defined.
1394
#ifndef HANDLE_FLAG_INHERIT
1395
#define HANDLE_FLAG_INHERIT 0x00000001
1396
#endif
1397
#endif
1398
1399
/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1400
static int
1401
get_inheritable(int fd, int raise)
1402
28
{
1403
#ifdef MS_WINDOWS
1404
    HANDLE handle;
1405
    DWORD flags;
1406
1407
    handle = _Py_get_osfhandle_noraise(fd);
1408
    if (handle == INVALID_HANDLE_VALUE) {
1409
        if (raise)
1410
            PyErr_SetFromErrno(PyExc_OSError);
1411
        return -1;
1412
    }
1413
1414
    if (!GetHandleInformation(handle, &flags)) {
1415
        if (raise)
1416
            PyErr_SetFromWindowsErr(0);
1417
        return -1;
1418
    }
1419
1420
    return (flags & HANDLE_FLAG_INHERIT);
1421
#else
1422
28
    int flags;
1423
1424
28
    flags = fcntl(fd, F_GETFD, 0);
1425
28
    if (flags == -1) {
1426
0
        if (raise)
1427
0
            PyErr_SetFromErrno(PyExc_OSError);
1428
0
        return -1;
1429
0
    }
1430
28
    return !(flags & FD_CLOEXEC);
1431
28
#endif
1432
28
}
1433
1434
/* Get the inheritable flag of the specified file descriptor.
1435
   Return 1 if the file descriptor can be inherited, 0 if it cannot,
1436
   raise an exception and return -1 on error. */
1437
int
1438
_Py_get_inheritable(int fd)
1439
0
{
1440
0
    return get_inheritable(fd, 1);
1441
0
}
1442
1443
1444
/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1445
static int
1446
set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
1447
146k
{
1448
#ifdef MS_WINDOWS
1449
    HANDLE handle;
1450
    DWORD flags;
1451
#else
1452
146k
#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1453
146k
    static int ioctl_works = -1;
1454
146k
    int request;
1455
146k
    int err;
1456
146k
#endif
1457
146k
    int flags, new_flags;
1458
146k
    int res;
1459
146k
#endif
1460
1461
    /* atomic_flag_works can only be used to make the file descriptor
1462
       non-inheritable */
1463
146k
    assert(!(atomic_flag_works != NULL && inheritable));
1464
1465
146k
    if (atomic_flag_works != NULL && !inheritable) {
1466
146k
        if (_Py_atomic_load_int_relaxed(atomic_flag_works) == -1) {
1467
28
            int isInheritable = get_inheritable(fd, raise);
1468
28
            if (isInheritable == -1)
1469
0
                return -1;
1470
28
            _Py_atomic_store_int_relaxed(atomic_flag_works, !isInheritable);
1471
28
        }
1472
1473
146k
        if (_Py_atomic_load_int_relaxed(atomic_flag_works))
1474
146k
            return 0;
1475
146k
    }
1476
1477
#ifdef MS_WINDOWS
1478
    handle = _Py_get_osfhandle_noraise(fd);
1479
    if (handle == INVALID_HANDLE_VALUE) {
1480
        if (raise)
1481
            PyErr_SetFromErrno(PyExc_OSError);
1482
        return -1;
1483
    }
1484
1485
    if (inheritable)
1486
        flags = HANDLE_FLAG_INHERIT;
1487
    else
1488
        flags = 0;
1489
1490
    if (!SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
1491
        if (raise)
1492
            PyErr_SetFromWindowsErr(0);
1493
        return -1;
1494
    }
1495
    return 0;
1496
1497
#else
1498
1499
0
#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1500
0
    if (raise != 0 && _Py_atomic_load_int_relaxed(&ioctl_works) != 0) {
1501
        /* fast-path: ioctl() only requires one syscall */
1502
        /* caveat: raise=0 is an indicator that we must be async-signal-safe
1503
         * thus avoid using ioctl() so we skip the fast-path. */
1504
0
        if (inheritable)
1505
0
            request = FIONCLEX;
1506
0
        else
1507
0
            request = FIOCLEX;
1508
0
        err = ioctl(fd, request, NULL);
1509
0
        if (!err) {
1510
0
            if (_Py_atomic_load_int_relaxed(&ioctl_works) == -1) {
1511
0
                _Py_atomic_store_int_relaxed(&ioctl_works, 1);
1512
0
            }
1513
0
            return 0;
1514
0
        }
1515
1516
0
#ifdef O_PATH
1517
0
        if (errno == EBADF) {
1518
            // bpo-44849: On Linux and FreeBSD, ioctl(FIOCLEX) fails with EBADF
1519
            // on O_PATH file descriptors. Fall through to the fcntl()
1520
            // implementation.
1521
0
        }
1522
0
        else
1523
0
#endif
1524
0
        if (errno != ENOTTY && errno != EACCES) {
1525
0
            if (raise)
1526
0
                PyErr_SetFromErrno(PyExc_OSError);
1527
0
            return -1;
1528
0
        }
1529
0
        else {
1530
            /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1531
               device". The ioctl is declared but not supported by the kernel.
1532
               Remember that ioctl() doesn't work. It is the case on
1533
               Illumos-based OS for example.
1534
1535
               Issue #27057: When SELinux policy disallows ioctl it will fail
1536
               with EACCES. While FIOCLEX is safe operation it may be
1537
               unavailable because ioctl was denied altogether.
1538
               This can be the case on Android. */
1539
0
            _Py_atomic_store_int_relaxed(&ioctl_works, 0);
1540
0
        }
1541
        /* fallback to fcntl() if ioctl() does not work */
1542
0
    }
1543
0
#endif
1544
1545
    /* slow-path: fcntl() requires two syscalls */
1546
0
    flags = fcntl(fd, F_GETFD);
1547
0
    if (flags < 0) {
1548
0
        if (raise)
1549
0
            PyErr_SetFromErrno(PyExc_OSError);
1550
0
        return -1;
1551
0
    }
1552
1553
0
    if (inheritable) {
1554
0
        new_flags = flags & ~FD_CLOEXEC;
1555
0
    }
1556
0
    else {
1557
0
        new_flags = flags | FD_CLOEXEC;
1558
0
    }
1559
1560
0
    if (new_flags == flags) {
1561
        /* FD_CLOEXEC flag already set/cleared: nothing to do */
1562
0
        return 0;
1563
0
    }
1564
1565
0
    res = fcntl(fd, F_SETFD, new_flags);
1566
0
    if (res < 0) {
1567
0
        if (raise)
1568
0
            PyErr_SetFromErrno(PyExc_OSError);
1569
0
        return -1;
1570
0
    }
1571
0
    return 0;
1572
0
#endif
1573
0
}
1574
1575
/* Make the file descriptor non-inheritable.
1576
   Return 0 on success, set errno and return -1 on error. */
1577
static int
1578
make_non_inheritable(int fd)
1579
0
{
1580
0
    return set_inheritable(fd, 0, 0, NULL);
1581
0
}
1582
1583
/* Set the inheritable flag of the specified file descriptor.
1584
   On success: return 0, on error: raise an exception and return -1.
1585
1586
   If atomic_flag_works is not NULL:
1587
1588
    * if *atomic_flag_works==-1, check if the inheritable is set on the file
1589
      descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1590
      set the inheritable flag
1591
    * if *atomic_flag_works==1: do nothing
1592
    * if *atomic_flag_works==0: set inheritable flag to False
1593
1594
   Set atomic_flag_works to NULL if no atomic flag was used to create the
1595
   file descriptor.
1596
1597
   atomic_flag_works can only be used to make a file descriptor
1598
   non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1599
int
1600
_Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1601
146k
{
1602
146k
    return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1603
146k
}
1604
1605
/* Same as _Py_set_inheritable() but on error, set errno and
1606
   don't raise an exception.
1607
   This function is async-signal-safe. */
1608
int
1609
_Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1610
0
{
1611
0
    return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1612
0
}
1613
1614
static int
1615
_Py_open_impl(const char *pathname, int flags, int gil_held)
1616
0
{
1617
0
    int fd;
1618
0
    int async_err = 0;
1619
0
#ifndef MS_WINDOWS
1620
0
    int *atomic_flag_works;
1621
0
#endif
1622
1623
#ifdef MS_WINDOWS
1624
    flags |= O_NOINHERIT;
1625
#elif defined(O_CLOEXEC)
1626
    atomic_flag_works = &_Py_open_cloexec_works;
1627
0
    flags |= O_CLOEXEC;
1628
#else
1629
    atomic_flag_works = NULL;
1630
#endif
1631
1632
0
    if (gil_held) {
1633
0
        PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1634
0
        if (pathname_obj == NULL) {
1635
0
            return -1;
1636
0
        }
1637
0
        if (PySys_Audit("open", "OOi", pathname_obj, Py_None, flags) < 0) {
1638
0
            Py_DECREF(pathname_obj);
1639
0
            return -1;
1640
0
        }
1641
1642
0
        do {
1643
0
            Py_BEGIN_ALLOW_THREADS
1644
0
            fd = open(pathname, flags);
1645
0
            Py_END_ALLOW_THREADS
1646
0
        } while (fd < 0
1647
0
                 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1648
0
        if (async_err) {
1649
0
            Py_DECREF(pathname_obj);
1650
0
            return -1;
1651
0
        }
1652
0
        if (fd < 0) {
1653
0
            PyErr_SetFromErrnoWithFilenameObjects(PyExc_OSError, pathname_obj, NULL);
1654
0
            Py_DECREF(pathname_obj);
1655
0
            return -1;
1656
0
        }
1657
0
        Py_DECREF(pathname_obj);
1658
0
    }
1659
0
    else {
1660
0
        fd = open(pathname, flags);
1661
0
        if (fd < 0)
1662
0
            return -1;
1663
0
    }
1664
1665
0
#ifndef MS_WINDOWS
1666
0
    if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
1667
0
        close(fd);
1668
0
        return -1;
1669
0
    }
1670
0
#endif
1671
1672
0
    return fd;
1673
0
}
1674
1675
/* Open a file with the specified flags (wrapper to open() function).
1676
   Return a file descriptor on success. Raise an exception and return -1 on
1677
   error.
1678
1679
   The file descriptor is created non-inheritable.
1680
1681
   When interrupted by a signal (open() fails with EINTR), retry the syscall,
1682
   except if the Python signal handler raises an exception.
1683
1684
   Release the GIL to call open(). The caller must hold the GIL. */
1685
int
1686
_Py_open(const char *pathname, int flags)
1687
0
{
1688
    /* _Py_open() must be called with the GIL held. */
1689
0
    _Py_AssertHoldsTstate();
1690
0
    return _Py_open_impl(pathname, flags, 1);
1691
0
}
1692
1693
/* Open a file with the specified flags (wrapper to open() function).
1694
   Return a file descriptor on success. Set errno and return -1 on error.
1695
1696
   The file descriptor is created non-inheritable.
1697
1698
   If interrupted by a signal, fail with EINTR. */
1699
int
1700
_Py_open_noraise(const char *pathname, int flags)
1701
0
{
1702
0
    return _Py_open_impl(pathname, flags, 0);
1703
0
}
1704
1705
/* Open a file. Use _wfopen() on Windows, encode the path to the locale
1706
   encoding and use fopen() otherwise.
1707
1708
   The file descriptor is created non-inheritable.
1709
1710
   If interrupted by a signal, fail with EINTR. */
1711
FILE *
1712
_Py_wfopen(const wchar_t *path, const wchar_t *mode)
1713
140
{
1714
140
    FILE *f;
1715
140
    if (PySys_Audit("open", "uui", path, mode, 0) < 0) {
1716
0
        return NULL;
1717
0
    }
1718
140
#ifndef MS_WINDOWS
1719
140
    char *cpath;
1720
140
    char cmode[10];
1721
140
    size_t r;
1722
140
    r = wcstombs(cmode, mode, 10);
1723
140
    if (r == DECODE_ERROR || r >= 10) {
1724
0
        errno = EINVAL;
1725
0
        return NULL;
1726
0
    }
1727
140
    cpath = _Py_EncodeLocaleRaw(path, NULL);
1728
140
    if (cpath == NULL) {
1729
0
        return NULL;
1730
0
    }
1731
140
    f = fopen(cpath, cmode);
1732
140
    PyMem_RawFree(cpath);
1733
#else
1734
    f = _wfopen(path, mode);
1735
#endif
1736
140
    if (f == NULL)
1737
140
        return NULL;
1738
0
    if (make_non_inheritable(fileno(f)) < 0) {
1739
0
        fclose(f);
1740
0
        return NULL;
1741
0
    }
1742
0
    return f;
1743
0
}
1744
1745
1746
/* Open a file.
1747
1748
   On Windows, if 'path' is a Unicode string, call _wfopen(). Otherwise, encode
1749
   the path to the filesystem encoding and call fopen().
1750
1751
   Return the new file object on success. Raise an exception and return NULL
1752
   on error.
1753
1754
   The file descriptor is created non-inheritable.
1755
1756
   When interrupted by a signal (open() fails with EINTR), retry the syscall,
1757
   except if the Python signal handler raises an exception.
1758
1759
   Release the GIL to call _wfopen() or fopen(). The caller must hold
1760
   the GIL. */
1761
FILE*
1762
Py_fopen(PyObject *path, const char *mode)
1763
10.2k
{
1764
10.2k
    _Py_AssertHoldsTstate();
1765
1766
10.2k
    if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1767
0
        return NULL;
1768
0
    }
1769
1770
10.2k
    FILE *f;
1771
10.2k
    int async_err = 0;
1772
10.2k
    int saved_errno;
1773
#ifdef MS_WINDOWS
1774
    PyObject *unicode;
1775
    if (!PyUnicode_FSDecoder(path, &unicode)) {
1776
        return NULL;
1777
    }
1778
1779
    wchar_t *wpath = PyUnicode_AsWideCharString(unicode, NULL);
1780
    Py_DECREF(unicode);
1781
    if (wpath == NULL) {
1782
        return NULL;
1783
    }
1784
1785
    wchar_t wmode[10];
1786
    int usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1787
                                    wmode, Py_ARRAY_LENGTH(wmode));
1788
    if (usize == 0) {
1789
        PyErr_SetFromWindowsErr(0);
1790
        PyMem_Free(wpath);
1791
        return NULL;
1792
    }
1793
1794
    do {
1795
        Py_BEGIN_ALLOW_THREADS
1796
        _Py_BEGIN_SUPPRESS_IPH
1797
        f = _wfopen(wpath, wmode);
1798
        _Py_END_SUPPRESS_IPH
1799
        Py_END_ALLOW_THREADS
1800
    } while (f == NULL
1801
             && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1802
    saved_errno = errno;
1803
    PyMem_Free(wpath);
1804
#else
1805
10.2k
    PyObject *bytes;
1806
10.2k
    if (!PyUnicode_FSConverter(path, &bytes)) {
1807
0
        return NULL;
1808
0
    }
1809
10.2k
    const char *path_bytes = PyBytes_AS_STRING(bytes);
1810
1811
10.2k
    do {
1812
10.2k
        Py_BEGIN_ALLOW_THREADS
1813
10.2k
        f = fopen(path_bytes, mode);
1814
10.2k
        Py_END_ALLOW_THREADS
1815
10.2k
    } while (f == NULL
1816
10.2k
             && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1817
10.2k
    saved_errno = errno;
1818
10.2k
    Py_DECREF(bytes);
1819
10.2k
#endif
1820
1821
10.2k
    if (async_err) {
1822
0
        return NULL;
1823
0
    }
1824
1825
10.2k
    if (f == NULL) {
1826
10.2k
        errno = saved_errno;
1827
10.2k
        PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
1828
10.2k
        return NULL;
1829
10.2k
    }
1830
1831
0
    if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
1832
0
        fclose(f);
1833
0
        return NULL;
1834
0
    }
1835
0
    return f;
1836
0
}
1837
1838
1839
// Call fclose().
1840
//
1841
// On Windows, files opened by Py_fopen() in the Python DLL must be closed by
1842
// the Python DLL to use the same C runtime version. Otherwise, calling
1843
// fclose() directly can cause undefined behavior.
1844
int
1845
Py_fclose(FILE *file)
1846
0
{
1847
0
    return fclose(file);
1848
0
}
1849
1850
1851
/* Read count bytes from fd into buf.
1852
1853
   On success, return the number of read bytes, it can be lower than count.
1854
   If the current file offset is at or past the end of file, no bytes are read,
1855
   and read() returns zero.
1856
1857
   On error, raise an exception, set errno and return -1.
1858
1859
   When interrupted by a signal (read() fails with EINTR), retry the syscall.
1860
   If the Python signal handler raises an exception, the function returns -1
1861
   (the syscall is not retried).
1862
1863
   Release the GIL to call read(). The caller must hold the GIL. */
1864
Py_ssize_t
1865
_Py_read(int fd, void *buf, size_t count)
1866
13.5k
{
1867
13.5k
    Py_ssize_t n;
1868
13.5k
    int err;
1869
13.5k
    int async_err = 0;
1870
1871
13.5k
    _Py_AssertHoldsTstate();
1872
1873
    /* _Py_read() must not be called with an exception set, otherwise the
1874
     * caller may think that read() was interrupted by a signal and the signal
1875
     * handler raised an exception. */
1876
13.5k
    assert(!PyErr_Occurred());
1877
1878
13.5k
    if (count > _PY_READ_MAX) {
1879
0
        count = _PY_READ_MAX;
1880
0
    }
1881
1882
13.5k
    _Py_BEGIN_SUPPRESS_IPH
1883
13.5k
    do {
1884
13.5k
        Py_BEGIN_ALLOW_THREADS
1885
13.5k
        errno = 0;
1886
#ifdef MS_WINDOWS
1887
        _doserrno = 0;
1888
        n = read(fd, buf, (int)count);
1889
        // read() on a non-blocking empty pipe fails with EINVAL, which is
1890
        // mapped from the Windows error code ERROR_NO_DATA.
1891
        if (n < 0 && errno == EINVAL) {
1892
            if (_doserrno == ERROR_NO_DATA) {
1893
                errno = EAGAIN;
1894
            }
1895
        }
1896
#else
1897
13.5k
        n = read(fd, buf, count);
1898
13.5k
#endif
1899
        /* save/restore errno because PyErr_CheckSignals()
1900
         * and PyErr_SetFromErrno() can modify it */
1901
13.5k
        err = errno;
1902
13.5k
        Py_END_ALLOW_THREADS
1903
13.5k
    } while (n < 0 && err == EINTR &&
1904
0
            !(async_err = PyErr_CheckSignals()));
1905
13.5k
    _Py_END_SUPPRESS_IPH
1906
1907
13.5k
    if (async_err) {
1908
        /* read() was interrupted by a signal (failed with EINTR)
1909
         * and the Python signal handler raised an exception */
1910
0
        errno = err;
1911
0
        assert(errno == EINTR && PyErr_Occurred());
1912
0
        return -1;
1913
0
    }
1914
13.5k
    if (n < 0) {
1915
0
        PyErr_SetFromErrno(PyExc_OSError);
1916
0
        errno = err;
1917
0
        return -1;
1918
0
    }
1919
1920
13.5k
    return n;
1921
13.5k
}
1922
1923
static Py_ssize_t
1924
_Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
1925
162k
{
1926
162k
    Py_ssize_t n;
1927
162k
    int err;
1928
162k
    int async_err = 0;
1929
1930
162k
    _Py_BEGIN_SUPPRESS_IPH
1931
#ifdef MS_WINDOWS
1932
    if (count > 32767) {
1933
        /* Issue #11395: the Windows console returns an error (12: not
1934
           enough space error) on writing into stdout if stdout mode is
1935
           binary and the length is greater than 66,000 bytes (or less,
1936
           depending on heap usage). */
1937
        if (gil_held) {
1938
            Py_BEGIN_ALLOW_THREADS
1939
            if (isatty(fd)) {
1940
                count = 32767;
1941
            }
1942
            Py_END_ALLOW_THREADS
1943
        } else {
1944
            if (isatty(fd)) {
1945
                count = 32767;
1946
            }
1947
        }
1948
    }
1949
1950
#endif
1951
162k
    if (count > _PY_WRITE_MAX) {
1952
0
        count = _PY_WRITE_MAX;
1953
0
    }
1954
1955
162k
    if (gil_held) {
1956
162k
        do {
1957
162k
            Py_BEGIN_ALLOW_THREADS
1958
162k
            errno = 0;
1959
#ifdef MS_WINDOWS
1960
            // write() on a non-blocking pipe fails with ENOSPC on Windows if
1961
            // the pipe lacks available space for the entire buffer.
1962
            int c = (int)count;
1963
            do {
1964
                _doserrno = 0;
1965
                n = write(fd, buf, c);
1966
                if (n >= 0 || errno != ENOSPC || _doserrno != 0) {
1967
                    break;
1968
                }
1969
                errno = EAGAIN;
1970
                c /= 2;
1971
            } while (c > 0);
1972
#else
1973
162k
            n = write(fd, buf, count);
1974
162k
#endif
1975
            /* save/restore errno because PyErr_CheckSignals()
1976
             * and PyErr_SetFromErrno() can modify it */
1977
162k
            err = errno;
1978
162k
            Py_END_ALLOW_THREADS
1979
162k
        } while (n < 0 && err == EINTR &&
1980
0
                !(async_err = PyErr_CheckSignals()));
1981
162k
    }
1982
0
    else {
1983
0
        do {
1984
0
            errno = 0;
1985
#ifdef MS_WINDOWS
1986
            // write() on a non-blocking pipe fails with ENOSPC on Windows if
1987
            // the pipe lacks available space for the entire buffer.
1988
            int c = (int)count;
1989
            do {
1990
                _doserrno = 0;
1991
                n = write(fd, buf, c);
1992
                if (n >= 0 || errno != ENOSPC || _doserrno != 0) {
1993
                    break;
1994
                }
1995
                errno = EAGAIN;
1996
                c /= 2;
1997
            } while (c > 0);
1998
#else
1999
0
            n = write(fd, buf, count);
2000
0
#endif
2001
0
            err = errno;
2002
0
        } while (n < 0 && err == EINTR);
2003
0
    }
2004
162k
    _Py_END_SUPPRESS_IPH
2005
2006
162k
    if (async_err) {
2007
        /* write() was interrupted by a signal (failed with EINTR)
2008
           and the Python signal handler raised an exception (if gil_held is
2009
           nonzero). */
2010
0
        errno = err;
2011
0
        assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
2012
0
        return -1;
2013
0
    }
2014
162k
    if (n < 0) {
2015
0
        if (gil_held)
2016
0
            PyErr_SetFromErrno(PyExc_OSError);
2017
0
        errno = err;
2018
0
        return -1;
2019
0
    }
2020
2021
162k
    return n;
2022
162k
}
2023
2024
/* Write count bytes of buf into fd.
2025
2026
   On success, return the number of written bytes, it can be lower than count
2027
   including 0. On error, raise an exception, set errno and return -1.
2028
2029
   When interrupted by a signal (write() fails with EINTR), retry the syscall.
2030
   If the Python signal handler raises an exception, the function returns -1
2031
   (the syscall is not retried).
2032
2033
   Release the GIL to call write(). The caller must hold the GIL. */
2034
Py_ssize_t
2035
_Py_write(int fd, const void *buf, size_t count)
2036
162k
{
2037
162k
    _Py_AssertHoldsTstate();
2038
2039
    /* _Py_write() must not be called with an exception set, otherwise the
2040
     * caller may think that write() was interrupted by a signal and the signal
2041
     * handler raised an exception. */
2042
162k
    assert(!PyErr_Occurred());
2043
2044
162k
    return _Py_write_impl(fd, buf, count, 1);
2045
162k
}
2046
2047
/* Write count bytes of buf into fd.
2048
 *
2049
 * On success, return the number of written bytes, it can be lower than count
2050
 * including 0. On error, set errno and return -1.
2051
 *
2052
 * When interrupted by a signal (write() fails with EINTR), retry the syscall
2053
 * without calling the Python signal handler. */
2054
Py_ssize_t
2055
_Py_write_noraise(int fd, const void *buf, size_t count)
2056
0
{
2057
0
    return _Py_write_impl(fd, buf, count, 0);
2058
0
}
2059
2060
#ifdef HAVE_READLINK
2061
2062
/* Read value of symbolic link. Encode the path to the locale encoding, decode
2063
   the result from the locale encoding.
2064
2065
   Return -1 on encoding error, on readlink() error, if the internal buffer is
2066
   too short, on decoding error, or if 'buf' is too short. */
2067
int
2068
_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t buflen)
2069
56
{
2070
56
    char *cpath;
2071
56
    char cbuf[MAXPATHLEN];
2072
56
    size_t cbuf_len = Py_ARRAY_LENGTH(cbuf);
2073
56
    wchar_t *wbuf;
2074
56
    Py_ssize_t res;
2075
56
    size_t r1;
2076
2077
56
    cpath = _Py_EncodeLocaleRaw(path, NULL);
2078
56
    if (cpath == NULL) {
2079
0
        errno = EINVAL;
2080
0
        return -1;
2081
0
    }
2082
56
    res = readlink(cpath, cbuf, cbuf_len);
2083
56
    PyMem_RawFree(cpath);
2084
56
    if (res == -1) {
2085
28
        return -1;
2086
28
    }
2087
28
    if ((size_t)res == cbuf_len) {
2088
0
        errno = EINVAL;
2089
0
        return -1;
2090
0
    }
2091
28
    cbuf[res] = '\0'; /* buf will be null terminated */
2092
28
    wbuf = Py_DecodeLocale(cbuf, &r1);
2093
28
    if (wbuf == NULL) {
2094
0
        errno = EINVAL;
2095
0
        return -1;
2096
0
    }
2097
    /* wbuf must have space to store the trailing NUL character */
2098
28
    if (buflen <= r1) {
2099
0
        PyMem_RawFree(wbuf);
2100
0
        errno = EINVAL;
2101
0
        return -1;
2102
0
    }
2103
28
    wcsncpy(buf, wbuf, buflen);
2104
28
    PyMem_RawFree(wbuf);
2105
28
    return (int)r1;
2106
28
}
2107
#endif
2108
2109
#ifdef HAVE_REALPATH
2110
2111
/* Return the canonicalized absolute pathname. Encode path to the locale
2112
   encoding, decode the result from the locale encoding.
2113
2114
   Return NULL on encoding error, realpath() error, decoding error
2115
   or if 'resolved_path' is too short. */
2116
wchar_t*
2117
_Py_wrealpath(const wchar_t *path,
2118
              wchar_t *resolved_path, size_t resolved_path_len)
2119
0
{
2120
0
    char *cpath;
2121
0
    wchar_t *wresolved_path;
2122
0
    char *res;
2123
0
    size_t r;
2124
0
    cpath = _Py_EncodeLocaleRaw(path, NULL);
2125
0
    if (cpath == NULL) {
2126
0
        errno = EINVAL;
2127
0
        return NULL;
2128
0
    }
2129
0
    res = realpath(cpath, NULL);
2130
0
    PyMem_RawFree(cpath);
2131
0
    if (res == NULL)
2132
0
        return NULL;
2133
2134
0
    wresolved_path = Py_DecodeLocale(res, &r);
2135
0
    free(res);
2136
2137
0
    if (wresolved_path == NULL) {
2138
0
        errno = EINVAL;
2139
0
        return NULL;
2140
0
    }
2141
    /* wresolved_path must have space to store the trailing NUL character */
2142
0
    if (resolved_path_len <= r) {
2143
0
        PyMem_RawFree(wresolved_path);
2144
0
        errno = EINVAL;
2145
0
        return NULL;
2146
0
    }
2147
0
    wcsncpy(resolved_path, wresolved_path, resolved_path_len);
2148
0
    PyMem_RawFree(wresolved_path);
2149
0
    return resolved_path;
2150
0
}
2151
#endif
2152
2153
2154
int
2155
_Py_isabs(const wchar_t *path)
2156
532
{
2157
#ifdef MS_WINDOWS
2158
    const wchar_t *tail;
2159
    HRESULT hr = PathCchSkipRoot(path, &tail);
2160
    if (FAILED(hr) || path == tail) {
2161
        return 0;
2162
    }
2163
    if (tail == &path[1] && (path[0] == SEP || path[0] == ALTSEP)) {
2164
        // Exclude paths with leading SEP
2165
        return 0;
2166
    }
2167
    if (tail == &path[2] && path[1] == L':') {
2168
        // Exclude drive-relative paths (e.g. C:filename.ext)
2169
        return 0;
2170
    }
2171
    return 1;
2172
#else
2173
532
    return (path[0] == SEP);
2174
532
#endif
2175
532
}
2176
2177
2178
/* Get an absolute path.
2179
   On error (ex: fail to get the current directory), return -1.
2180
   On memory allocation failure, set *abspath_p to NULL and return 0.
2181
   On success, return a newly allocated to *abspath_p to and return 0.
2182
   The string must be freed by PyMem_RawFree(). */
2183
int
2184
_Py_abspath(const wchar_t *path, wchar_t **abspath_p)
2185
0
{
2186
0
    if (path[0] == '\0' || !wcscmp(path, L".")) {
2187
0
        wchar_t cwd[MAXPATHLEN + 1];
2188
0
        cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
2189
0
        if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
2190
            /* unable to get the current directory */
2191
0
            return -1;
2192
0
        }
2193
0
        *abspath_p = _PyMem_RawWcsdup(cwd);
2194
0
        return 0;
2195
0
    }
2196
2197
0
    if (_Py_isabs(path)) {
2198
0
        *abspath_p = _PyMem_RawWcsdup(path);
2199
0
        return 0;
2200
0
    }
2201
2202
#ifdef MS_WINDOWS
2203
    return _PyOS_getfullpathname(path, abspath_p);
2204
#else
2205
0
    wchar_t cwd[MAXPATHLEN + 1];
2206
0
    cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
2207
0
    if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
2208
        /* unable to get the current directory */
2209
0
        return -1;
2210
0
    }
2211
2212
0
    size_t cwd_len = wcslen(cwd);
2213
0
    size_t path_len = wcslen(path);
2214
0
    size_t len = cwd_len + 1 + path_len + 1;
2215
0
    if (len <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
2216
0
        *abspath_p = PyMem_RawMalloc(len * sizeof(wchar_t));
2217
0
    }
2218
0
    else {
2219
0
        *abspath_p = NULL;
2220
0
    }
2221
0
    if (*abspath_p == NULL) {
2222
0
        return 0;
2223
0
    }
2224
2225
0
    wchar_t *abspath = *abspath_p;
2226
0
    memcpy(abspath, cwd, cwd_len * sizeof(wchar_t));
2227
0
    abspath += cwd_len;
2228
2229
0
    *abspath = (wchar_t)SEP;
2230
0
    abspath++;
2231
2232
0
    memcpy(abspath, path, path_len * sizeof(wchar_t));
2233
0
    abspath += path_len;
2234
2235
0
    *abspath = 0;
2236
0
    return 0;
2237
0
#endif
2238
0
}
2239
2240
// The Windows Games API family implements the PathCch* APIs in the Xbox OS,
2241
// but does not expose them yet. Load them dynamically until
2242
// 1) they are officially exposed
2243
// 2) we stop supporting older versions of the GDK which do not expose them
2244
#if defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP)
2245
HRESULT
2246
PathCchSkipRoot(const wchar_t *path, const wchar_t **rootEnd)
2247
{
2248
    static int initialized = 0;
2249
    typedef HRESULT(__stdcall *PPathCchSkipRoot) (PCWSTR pszPath,
2250
                                                  PCWSTR *ppszRootEnd);
2251
    static PPathCchSkipRoot _PathCchSkipRoot;
2252
2253
    if (initialized == 0) {
2254
        HMODULE pathapi = LoadLibraryExW(L"api-ms-win-core-path-l1-1-0.dll", NULL,
2255
                                         LOAD_LIBRARY_SEARCH_SYSTEM32);
2256
        if (pathapi) {
2257
            _PathCchSkipRoot = (PPathCchSkipRoot)GetProcAddress(
2258
                pathapi, "PathCchSkipRoot");
2259
        }
2260
        else {
2261
            _PathCchSkipRoot = NULL;
2262
        }
2263
        initialized = 1;
2264
    }
2265
2266
    if (!_PathCchSkipRoot) {
2267
        return E_NOINTERFACE;
2268
    }
2269
2270
    return _PathCchSkipRoot(path, rootEnd);
2271
}
2272
2273
static HRESULT
2274
PathCchCombineEx(wchar_t *buffer, size_t bufsize, const wchar_t *dirname,
2275
                 const wchar_t *relfile, unsigned long flags)
2276
{
2277
    static int initialized = 0;
2278
    typedef HRESULT(__stdcall *PPathCchCombineEx) (PWSTR pszPathOut,
2279
                                                   size_t cchPathOut,
2280
                                                   PCWSTR pszPathIn,
2281
                                                   PCWSTR pszMore,
2282
                                                   unsigned long dwFlags);
2283
    static PPathCchCombineEx _PathCchCombineEx;
2284
2285
    if (initialized == 0) {
2286
        HMODULE pathapi = LoadLibraryExW(L"api-ms-win-core-path-l1-1-0.dll", NULL,
2287
                                         LOAD_LIBRARY_SEARCH_SYSTEM32);
2288
        if (pathapi) {
2289
            _PathCchCombineEx = (PPathCchCombineEx)GetProcAddress(
2290
                pathapi, "PathCchCombineEx");
2291
        }
2292
        else {
2293
            _PathCchCombineEx = NULL;
2294
        }
2295
        initialized = 1;
2296
    }
2297
2298
    if (!_PathCchCombineEx) {
2299
        return E_NOINTERFACE;
2300
    }
2301
2302
    return _PathCchCombineEx(buffer, bufsize, dirname, relfile, flags);
2303
}
2304
2305
#endif /* defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) */
2306
2307
void
2308
_Py_skiproot(const wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize,
2309
             Py_ssize_t *rootsize)
2310
149k
{
2311
149k
    assert(drvsize);
2312
149k
    assert(rootsize);
2313
149k
#ifndef MS_WINDOWS
2314
178k
#define IS_SEP(x) (*(x) == SEP)
2315
149k
    *drvsize = 0;
2316
149k
    if (!IS_SEP(&path[0])) {
2317
        // Relative path, e.g.: 'foo'
2318
134k
        *rootsize = 0;
2319
134k
    }
2320
14.7k
    else if (!IS_SEP(&path[1]) || IS_SEP(&path[2])) {
2321
        // Absolute path, e.g.: '/foo', '///foo', '////foo', etc.
2322
14.7k
        *rootsize = 1;
2323
14.7k
    }
2324
0
    else {
2325
        // Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see
2326
        // https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13
2327
0
        *rootsize = 2;
2328
0
    }
2329
149k
#undef IS_SEP
2330
#else
2331
    const wchar_t *pEnd = size >= 0 ? &path[size] : NULL;
2332
#define IS_END(x) (pEnd ? (x) == pEnd : !*(x))
2333
#define IS_SEP(x) (*(x) == SEP || *(x) == ALTSEP)
2334
#define SEP_OR_END(x) (IS_SEP(x) || IS_END(x))
2335
    if (IS_SEP(&path[0])) {
2336
        if (IS_SEP(&path[1])) {
2337
            // Device drives, e.g. \\.\device or \\?\device
2338
            // UNC drives, e.g. \\server\share or \\?\UNC\server\share
2339
            Py_ssize_t idx;
2340
            if (path[2] == L'?' && IS_SEP(&path[3]) &&
2341
                (path[4] == L'U' || path[4] == L'u') &&
2342
                (path[5] == L'N' || path[5] == L'n') &&
2343
                (path[6] == L'C' || path[6] == L'c') &&
2344
                IS_SEP(&path[7]))
2345
            {
2346
                idx = 8;
2347
            }
2348
            else {
2349
                idx = 2;
2350
            }
2351
            while (!SEP_OR_END(&path[idx])) {
2352
                idx++;
2353
            }
2354
            if (IS_END(&path[idx])) {
2355
                *drvsize = idx;
2356
                *rootsize = 0;
2357
            }
2358
            else {
2359
                idx++;
2360
                while (!SEP_OR_END(&path[idx])) {
2361
                    idx++;
2362
                }
2363
                *drvsize = idx;
2364
                if (IS_END(&path[idx])) {
2365
                    *rootsize = 0;
2366
                }
2367
                else {
2368
                    *rootsize = 1;
2369
                }
2370
            }
2371
        }
2372
        else {
2373
            // Relative path with root, e.g. \Windows
2374
            *drvsize = 0;
2375
            *rootsize = 1;
2376
        }
2377
    }
2378
    else if (!IS_END(&path[0]) && path[1] == L':') {
2379
        *drvsize = 2;
2380
        if (IS_SEP(&path[2])) {
2381
            // Absolute drive-letter path, e.g. X:\Windows
2382
            *rootsize = 1;
2383
        }
2384
        else {
2385
            // Relative path with drive, e.g. X:Windows
2386
            *rootsize = 0;
2387
        }
2388
    }
2389
    else {
2390
        // Relative path, e.g. Windows
2391
        *drvsize = 0;
2392
        *rootsize = 0;
2393
    }
2394
#undef SEP_OR_END
2395
#undef IS_SEP
2396
#undef IS_END
2397
#endif
2398
149k
}
2399
2400
// The caller must ensure "buffer" is big enough.
2401
static int
2402
join_relfile(wchar_t *buffer, size_t bufsize,
2403
             const wchar_t *dirname, const wchar_t *relfile)
2404
252
{
2405
#ifdef MS_WINDOWS
2406
    if (FAILED(PathCchCombineEx(buffer, bufsize, dirname, relfile,
2407
        PATHCCH_ALLOW_LONG_PATHS))) {
2408
        return -1;
2409
    }
2410
#else
2411
252
    assert(!_Py_isabs(relfile));
2412
252
    size_t dirlen = wcslen(dirname);
2413
252
    size_t rellen = wcslen(relfile);
2414
252
    size_t maxlen = bufsize - 1;
2415
252
    if (maxlen > MAXPATHLEN || dirlen >= maxlen || rellen >= maxlen - dirlen) {
2416
0
        return -1;
2417
0
    }
2418
252
    if (dirlen == 0) {
2419
        // We do not add a leading separator.
2420
0
        wcscpy(buffer, relfile);
2421
0
    }
2422
252
    else {
2423
252
        if (dirname != buffer) {
2424
0
            wcscpy(buffer, dirname);
2425
0
        }
2426
252
        size_t relstart = dirlen;
2427
252
        if (dirlen > 1 && dirname[dirlen - 1] != SEP) {
2428
252
            buffer[dirlen] = SEP;
2429
252
            relstart += 1;
2430
252
        }
2431
252
        wcscpy(&buffer[relstart], relfile);
2432
252
    }
2433
252
#endif
2434
252
    return 0;
2435
252
}
2436
2437
/* Join the two paths together, like os.path.join().  Return NULL
2438
   if memory could not be allocated.  The caller is responsible
2439
   for calling PyMem_RawFree() on the result. */
2440
wchar_t *
2441
_Py_join_relfile(const wchar_t *dirname, const wchar_t *relfile)
2442
0
{
2443
0
    assert(dirname != NULL && relfile != NULL);
2444
0
#ifndef MS_WINDOWS
2445
0
    assert(!_Py_isabs(relfile));
2446
0
#endif
2447
0
    size_t maxlen = wcslen(dirname) + 1 + wcslen(relfile);
2448
0
    size_t bufsize = maxlen + 1;
2449
0
    wchar_t *filename = PyMem_RawMalloc(bufsize * sizeof(wchar_t));
2450
0
    if (filename == NULL) {
2451
0
        return NULL;
2452
0
    }
2453
0
    assert(wcslen(dirname) < MAXPATHLEN);
2454
0
    assert(wcslen(relfile) < MAXPATHLEN - wcslen(dirname));
2455
0
    if (join_relfile(filename, bufsize, dirname, relfile) < 0) {
2456
0
        PyMem_RawFree(filename);
2457
0
        return NULL;
2458
0
    }
2459
0
    return filename;
2460
0
}
2461
2462
/* Join the two paths together, like os.path.join().
2463
     dirname: the target buffer with the dirname already in place,
2464
              including trailing NUL
2465
     relfile: this must be a relative path
2466
     bufsize: total allocated size of the buffer
2467
   Return -1 if anything is wrong with the path lengths. */
2468
int
2469
_Py_add_relfile(wchar_t *dirname, const wchar_t *relfile, size_t bufsize)
2470
252
{
2471
252
    assert(dirname != NULL && relfile != NULL);
2472
252
    assert(bufsize > 0);
2473
252
    return join_relfile(dirname, bufsize, dirname, relfile);
2474
252
}
2475
2476
2477
size_t
2478
_Py_find_basename(const wchar_t *filename)
2479
0
{
2480
0
    for (size_t i = wcslen(filename); i > 0; --i) {
2481
0
        if (filename[i] == SEP) {
2482
0
            return i + 1;
2483
0
        }
2484
0
    }
2485
0
    return 0;
2486
0
}
2487
2488
/* In-place path normalisation. Returns the start of the normalized
2489
   path, which will be within the original buffer. Guaranteed to not
2490
   make the path longer, and will not fail. 'size' is the length of
2491
   the path, if known. If -1, the first null character will be assumed
2492
   to be the end of the path. 'normsize' will be set to contain the
2493
   length of the resulting normalized path. */
2494
wchar_t *
2495
_Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t *normsize)
2496
7.92k
{
2497
7.92k
    assert(path != NULL);
2498
7.92k
    if ((size < 0 && !path[0]) || size == 0) {
2499
0
        *normsize = 0;
2500
0
        return path;
2501
0
    }
2502
7.92k
    wchar_t *pEnd = size >= 0 ? &path[size] : NULL;
2503
7.92k
    wchar_t *p1 = path;     // sequentially scanned address in the path
2504
7.92k
    wchar_t *p2 = path;     // destination of a scanned character to be ljusted
2505
7.92k
    wchar_t *minP2 = path;  // the beginning of the destination range
2506
7.92k
    wchar_t lastC = L'\0';  // the last ljusted character, p2[-1] in most cases
2507
2508
1.49M
#define IS_END(x) (pEnd ? (x) == pEnd : !*(x))
2509
#ifdef ALTSEP
2510
#define IS_SEP(x) (*(x) == SEP || *(x) == ALTSEP)
2511
#else
2512
10.7k
#define IS_SEP(x) (*(x) == SEP)
2513
7.92k
#endif
2514
7.92k
#define SEP_OR_END(x) (IS_SEP(x) || IS_END(x))
2515
2516
7.92k
    Py_ssize_t drvsize, rootsize;
2517
7.92k
    _Py_skiproot(path, size, &drvsize, &rootsize);
2518
7.92k
    if (drvsize || rootsize) {
2519
        // Skip past root and update minP2
2520
7.92k
        p1 = &path[drvsize + rootsize];
2521
7.92k
#ifndef ALTSEP
2522
7.92k
        p2 = p1;
2523
#else
2524
        for (; p2 < p1; ++p2) {
2525
            if (*p2 == ALTSEP) {
2526
                *p2 = SEP;
2527
            }
2528
        }
2529
#endif
2530
7.92k
        minP2 = p2 - 1;
2531
7.92k
        lastC = *minP2;
2532
#ifdef MS_WINDOWS
2533
        if (lastC != SEP) {
2534
            minP2++;
2535
        }
2536
#endif
2537
7.92k
    }
2538
7.92k
    if (p1[0] == L'.' && SEP_OR_END(&p1[1])) {
2539
        // Skip leading '.\'
2540
0
        lastC = *++p1;
2541
#ifdef ALTSEP
2542
        if (lastC == ALTSEP) {
2543
            lastC = SEP;
2544
        }
2545
#endif
2546
0
        while (IS_SEP(p1)) {
2547
0
            p1++;
2548
0
        }
2549
0
    }
2550
2551
    /* if pEnd is specified, check that. Else, check for null terminator */
2552
1.49M
    for (; !IS_END(p1); ++p1) {
2553
1.48M
        wchar_t c = *p1;
2554
#ifdef ALTSEP
2555
        if (c == ALTSEP) {
2556
            c = SEP;
2557
        }
2558
#endif
2559
1.48M
        if (lastC == SEP) {
2560
256k
            if (c == L'.') {
2561
2.67k
                int sep_at_1 = SEP_OR_END(&p1[1]);
2562
2.67k
                int sep_at_2 = !sep_at_1 && SEP_OR_END(&p1[2]);
2563
2.67k
                if (sep_at_2 && p1[1] == L'.') {
2564
0
                    wchar_t *p3 = p2;
2565
0
                    while (p3 != minP2 && *--p3 == SEP) { }
2566
0
                    while (p3 != minP2 && *(p3 - 1) != SEP) { --p3; }
2567
0
                    if (p2 == minP2
2568
0
                        || (p3[0] == L'.' && p3[1] == L'.' && IS_SEP(&p3[2])))
2569
0
                    {
2570
                        // Previous segment is also ../, so append instead.
2571
                        // Relative path does not absorb ../ at minP2 as well.
2572
0
                        *p2++ = L'.';
2573
0
                        *p2++ = L'.';
2574
0
                        lastC = L'.';
2575
0
                    } else if (p3[0] == SEP) {
2576
                        // Absolute path, so absorb segment
2577
0
                        p2 = p3 + 1;
2578
0
                    } else {
2579
0
                        p2 = p3;
2580
0
                    }
2581
0
                    p1 += 1;
2582
2.67k
                } else if (sep_at_1) {
2583
2.67k
                } else {
2584
2.67k
                    *p2++ = lastC = c;
2585
2.67k
                }
2586
253k
            } else if (c == SEP) {
2587
253k
            } else {
2588
253k
                *p2++ = lastC = c;
2589
253k
            }
2590
1.22M
        } else {
2591
1.22M
            *p2++ = lastC = c;
2592
1.22M
        }
2593
1.48M
    }
2594
7.92k
    *p2 = L'\0';
2595
7.92k
    if (p2 != minP2) {
2596
7.92k
        while (--p2 != minP2 && *p2 == SEP) {
2597
8
            *p2 = L'\0';
2598
8
        }
2599
7.92k
    } else {
2600
0
        --p2;
2601
0
    }
2602
7.92k
    *normsize = p2 - path + 1;
2603
7.92k
#undef SEP_OR_END
2604
7.92k
#undef IS_SEP
2605
7.92k
#undef IS_END
2606
7.92k
    return path;
2607
7.92k
}
2608
2609
/* In-place path normalisation. Returns the start of the normalized
2610
   path, which will be within the original buffer. Guaranteed to not
2611
   make the path longer, and will not fail. 'size' is the length of
2612
   the path, if known. If -1, the first null character will be assumed
2613
   to be the end of the path. */
2614
wchar_t *
2615
_Py_normpath(wchar_t *path, Py_ssize_t size)
2616
252
{
2617
252
    Py_ssize_t norm_length;
2618
252
    return _Py_normpath_and_size(path, size, &norm_length);
2619
252
}
2620
2621
2622
/* Get the current directory. buflen is the buffer size in wide characters
2623
   including the null character. Decode the path from the locale encoding.
2624
2625
   Return NULL on getcwd() error, on decoding error, or if 'buf' is
2626
   too short. */
2627
wchar_t*
2628
_Py_wgetcwd(wchar_t *buf, size_t buflen)
2629
0
{
2630
#ifdef MS_WINDOWS
2631
    int ibuflen = (int)Py_MIN(buflen, INT_MAX);
2632
    return _wgetcwd(buf, ibuflen);
2633
#else
2634
0
    char fname[MAXPATHLEN];
2635
0
    wchar_t *wname;
2636
0
    size_t len;
2637
2638
0
    if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
2639
0
        return NULL;
2640
0
    wname = Py_DecodeLocale(fname, &len);
2641
0
    if (wname == NULL)
2642
0
        return NULL;
2643
    /* wname must have space to store the trailing NUL character */
2644
0
    if (buflen <= len) {
2645
0
        PyMem_RawFree(wname);
2646
0
        return NULL;
2647
0
    }
2648
0
    wcsncpy(buf, wname, buflen);
2649
0
    PyMem_RawFree(wname);
2650
0
    return buf;
2651
0
#endif
2652
0
}
2653
2654
/* Duplicate a file descriptor. The new file descriptor is created as
2655
   non-inheritable. Return a new file descriptor on success, raise an OSError
2656
   exception and return -1 on error.
2657
2658
   The GIL is released to call dup(). The caller must hold the GIL. */
2659
int
2660
_Py_dup(int fd)
2661
138k
{
2662
#ifdef MS_WINDOWS
2663
    HANDLE handle;
2664
#endif
2665
2666
138k
    _Py_AssertHoldsTstate();
2667
2668
#ifdef MS_WINDOWS
2669
    handle = _Py_get_osfhandle(fd);
2670
    if (handle == INVALID_HANDLE_VALUE)
2671
        return -1;
2672
2673
    Py_BEGIN_ALLOW_THREADS
2674
    _Py_BEGIN_SUPPRESS_IPH
2675
    fd = dup(fd);
2676
    _Py_END_SUPPRESS_IPH
2677
    Py_END_ALLOW_THREADS
2678
    if (fd < 0) {
2679
        PyErr_SetFromErrno(PyExc_OSError);
2680
        return -1;
2681
    }
2682
2683
    if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2684
        _Py_BEGIN_SUPPRESS_IPH
2685
        close(fd);
2686
        _Py_END_SUPPRESS_IPH
2687
        return -1;
2688
    }
2689
#elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
2690
138k
    Py_BEGIN_ALLOW_THREADS
2691
138k
    _Py_BEGIN_SUPPRESS_IPH
2692
138k
    fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
2693
138k
    _Py_END_SUPPRESS_IPH
2694
138k
    Py_END_ALLOW_THREADS
2695
138k
    if (fd < 0) {
2696
0
        PyErr_SetFromErrno(PyExc_OSError);
2697
0
        return -1;
2698
0
    }
2699
2700
#elif HAVE_DUP
2701
    Py_BEGIN_ALLOW_THREADS
2702
    _Py_BEGIN_SUPPRESS_IPH
2703
    fd = dup(fd);
2704
    _Py_END_SUPPRESS_IPH
2705
    Py_END_ALLOW_THREADS
2706
    if (fd < 0) {
2707
        PyErr_SetFromErrno(PyExc_OSError);
2708
        return -1;
2709
    }
2710
2711
    if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2712
        _Py_BEGIN_SUPPRESS_IPH
2713
        close(fd);
2714
        _Py_END_SUPPRESS_IPH
2715
        return -1;
2716
    }
2717
#else
2718
    errno = ENOTSUP;
2719
    PyErr_SetFromErrno(PyExc_OSError);
2720
    return -1;
2721
#endif
2722
138k
    return fd;
2723
138k
}
2724
2725
#ifndef MS_WINDOWS
2726
/* Get the blocking mode of the file descriptor.
2727
   Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
2728
   raise an exception and return -1 on error. */
2729
int
2730
_Py_get_blocking(int fd)
2731
0
{
2732
0
    int flags;
2733
0
    _Py_BEGIN_SUPPRESS_IPH
2734
0
    flags = fcntl(fd, F_GETFL, 0);
2735
0
    _Py_END_SUPPRESS_IPH
2736
0
    if (flags < 0) {
2737
0
        PyErr_SetFromErrno(PyExc_OSError);
2738
0
        return -1;
2739
0
    }
2740
2741
0
    return !(flags & O_NONBLOCK);
2742
0
}
2743
2744
/* Set the blocking mode of the specified file descriptor.
2745
2746
   Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
2747
   otherwise.
2748
2749
   Return 0 on success, raise an exception and return -1 on error. */
2750
int
2751
_Py_set_blocking(int fd, int blocking)
2752
0
{
2753
/* bpo-41462: On VxWorks, ioctl(FIONBIO) only works on sockets.
2754
   Use fcntl() instead. */
2755
0
#if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO) && !defined(__VXWORKS__)
2756
0
    int arg = !blocking;
2757
0
    if (ioctl(fd, FIONBIO, &arg) < 0)
2758
0
        goto error;
2759
#else
2760
    int flags, res;
2761
2762
    _Py_BEGIN_SUPPRESS_IPH
2763
    flags = fcntl(fd, F_GETFL, 0);
2764
    if (flags >= 0) {
2765
        if (blocking)
2766
            flags = flags & (~O_NONBLOCK);
2767
        else
2768
            flags = flags | O_NONBLOCK;
2769
2770
        res = fcntl(fd, F_SETFL, flags);
2771
    } else {
2772
        res = -1;
2773
    }
2774
    _Py_END_SUPPRESS_IPH
2775
2776
    if (res < 0)
2777
        goto error;
2778
#endif
2779
0
    return 0;
2780
2781
0
error:
2782
0
    PyErr_SetFromErrno(PyExc_OSError);
2783
0
    return -1;
2784
0
}
2785
#else   /* MS_WINDOWS */
2786
2787
// The Windows Games API family doesn't expose GetNamedPipeHandleStateW so attempt
2788
// to load it directly from the Kernel32.dll
2789
#if !defined(MS_WINDOWS_APP) && !defined(MS_WINDOWS_SYSTEM)
2790
BOOL
2791
GetNamedPipeHandleStateW(HANDLE hNamedPipe, LPDWORD lpState, LPDWORD lpCurInstances, LPDWORD lpMaxCollectionCount,
2792
                         LPDWORD lpCollectDataTimeout, LPWSTR lpUserName, DWORD nMaxUserNameSize)
2793
{
2794
    static int initialized = 0;
2795
    typedef BOOL(__stdcall* PGetNamedPipeHandleStateW) (
2796
        HANDLE hNamedPipe, LPDWORD lpState, LPDWORD lpCurInstances, LPDWORD lpMaxCollectionCount,
2797
        LPDWORD lpCollectDataTimeout, LPWSTR lpUserName, DWORD nMaxUserNameSize);
2798
    static PGetNamedPipeHandleStateW _GetNamedPipeHandleStateW;
2799
2800
    if (initialized == 0) {
2801
        HMODULE api = LoadLibraryExW(L"Kernel32.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32);
2802
        if (api) {
2803
            _GetNamedPipeHandleStateW = (PGetNamedPipeHandleStateW)GetProcAddress(
2804
                api, "GetNamedPipeHandleStateW");
2805
        }
2806
        else {
2807
            _GetNamedPipeHandleStateW = NULL;
2808
        }
2809
        initialized = 1;
2810
    }
2811
2812
    if (!_GetNamedPipeHandleStateW) {
2813
        SetLastError(E_NOINTERFACE);
2814
        return FALSE;
2815
    }
2816
2817
    return _GetNamedPipeHandleStateW(
2818
        hNamedPipe, lpState, lpCurInstances, lpMaxCollectionCount, lpCollectDataTimeout, lpUserName, nMaxUserNameSize
2819
    );
2820
}
2821
#endif /* !MS_WINDOWS_APP && !MS_WINDOWS_SYSTEM */
2822
2823
int
2824
_Py_get_blocking(int fd)
2825
{
2826
    HANDLE handle;
2827
    DWORD mode;
2828
    BOOL success;
2829
2830
    handle = _Py_get_osfhandle(fd);
2831
    if (handle == INVALID_HANDLE_VALUE) {
2832
        return -1;
2833
    }
2834
2835
    Py_BEGIN_ALLOW_THREADS
2836
    success = GetNamedPipeHandleStateW(handle, &mode,
2837
                                       NULL, NULL, NULL, NULL, 0);
2838
    Py_END_ALLOW_THREADS
2839
2840
    if (!success) {
2841
        PyErr_SetFromWindowsErr(0);
2842
        return -1;
2843
    }
2844
2845
    return !(mode & PIPE_NOWAIT);
2846
}
2847
2848
int
2849
_Py_set_blocking(int fd, int blocking)
2850
{
2851
    HANDLE handle;
2852
    DWORD mode;
2853
    BOOL success;
2854
2855
    handle = _Py_get_osfhandle(fd);
2856
    if (handle == INVALID_HANDLE_VALUE) {
2857
        return -1;
2858
    }
2859
2860
    Py_BEGIN_ALLOW_THREADS
2861
    success = GetNamedPipeHandleStateW(handle, &mode,
2862
                                       NULL, NULL, NULL, NULL, 0);
2863
    if (success) {
2864
        if (blocking) {
2865
            mode &= ~PIPE_NOWAIT;
2866
        }
2867
        else {
2868
            mode |= PIPE_NOWAIT;
2869
        }
2870
        success = SetNamedPipeHandleState(handle, &mode, NULL, NULL);
2871
    }
2872
    Py_END_ALLOW_THREADS
2873
2874
    if (!success) {
2875
        PyErr_SetFromWindowsErr(0);
2876
        return -1;
2877
    }
2878
    return 0;
2879
}
2880
2881
void*
2882
_Py_get_osfhandle_noraise(int fd)
2883
{
2884
    void *handle;
2885
    _Py_BEGIN_SUPPRESS_IPH
2886
    handle = (void*)_get_osfhandle(fd);
2887
    _Py_END_SUPPRESS_IPH
2888
    return handle;
2889
}
2890
2891
void*
2892
_Py_get_osfhandle(int fd)
2893
{
2894
    void *handle = _Py_get_osfhandle_noraise(fd);
2895
    if (handle == INVALID_HANDLE_VALUE)
2896
        PyErr_SetFromErrno(PyExc_OSError);
2897
2898
    return handle;
2899
}
2900
2901
int
2902
_Py_open_osfhandle_noraise(void *handle, int flags)
2903
{
2904
    int fd;
2905
    _Py_BEGIN_SUPPRESS_IPH
2906
    fd = _open_osfhandle((intptr_t)handle, flags);
2907
    _Py_END_SUPPRESS_IPH
2908
    return fd;
2909
}
2910
2911
int
2912
_Py_open_osfhandle(void *handle, int flags)
2913
{
2914
    int fd = _Py_open_osfhandle_noraise(handle, flags);
2915
    if (fd == -1)
2916
        PyErr_SetFromErrno(PyExc_OSError);
2917
2918
    return fd;
2919
}
2920
#endif  /* MS_WINDOWS */
2921
2922
int
2923
_Py_GetLocaleconvNumeric(struct lconv *lc,
2924
                         PyObject **decimal_point, PyObject **thousands_sep)
2925
0
{
2926
0
    assert(decimal_point != NULL);
2927
0
    assert(thousands_sep != NULL);
2928
2929
0
#ifndef MS_WINDOWS
2930
0
    int change_locale = 0;
2931
0
    if ((strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) {
2932
0
        change_locale = 1;
2933
0
    }
2934
0
    if ((strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) {
2935
0
        change_locale = 1;
2936
0
    }
2937
2938
    /* Keep a copy of the LC_CTYPE locale */
2939
0
    char *oldloc = NULL, *loc = NULL;
2940
0
    if (change_locale) {
2941
0
        oldloc = setlocale(LC_CTYPE, NULL);
2942
0
        if (!oldloc) {
2943
0
            PyErr_SetString(PyExc_RuntimeWarning,
2944
0
                            "failed to get LC_CTYPE locale");
2945
0
            return -1;
2946
0
        }
2947
2948
0
        oldloc = _PyMem_Strdup(oldloc);
2949
0
        if (!oldloc) {
2950
0
            PyErr_NoMemory();
2951
0
            return -1;
2952
0
        }
2953
2954
0
        loc = setlocale(LC_NUMERIC, NULL);
2955
0
        if (loc != NULL && strcmp(loc, oldloc) == 0) {
2956
0
            loc = NULL;
2957
0
        }
2958
2959
0
        if (loc != NULL) {
2960
            /* Only set the locale temporarily the LC_CTYPE locale
2961
               if LC_NUMERIC locale is different than LC_CTYPE locale and
2962
               decimal_point and/or thousands_sep are non-ASCII or longer than
2963
               1 byte */
2964
0
            setlocale(LC_CTYPE, loc);
2965
0
        }
2966
0
    }
2967
2968
0
#define GET_LOCALE_STRING(ATTR) PyUnicode_DecodeLocale(lc->ATTR, NULL)
2969
#else /* MS_WINDOWS */
2970
/* Use _W_* fields of Windows strcut lconv */
2971
#define GET_LOCALE_STRING(ATTR) PyUnicode_FromWideChar(lc->_W_ ## ATTR, -1)
2972
#endif /* MS_WINDOWS */
2973
2974
0
    int res = -1;
2975
2976
0
    *decimal_point = GET_LOCALE_STRING(decimal_point);
2977
0
    if (*decimal_point == NULL) {
2978
0
        goto done;
2979
0
    }
2980
2981
0
    *thousands_sep = GET_LOCALE_STRING(thousands_sep);
2982
0
    if (*thousands_sep == NULL) {
2983
0
        goto done;
2984
0
    }
2985
2986
0
    res = 0;
2987
2988
0
done:
2989
0
#ifndef MS_WINDOWS
2990
0
    if (loc != NULL) {
2991
0
        setlocale(LC_CTYPE, oldloc);
2992
0
    }
2993
0
    PyMem_Free(oldloc);
2994
0
#endif
2995
0
    return res;
2996
2997
0
#undef GET_LOCALE_STRING
2998
0
}
2999
3000
/* Our selection logic for which function to use is as follows:
3001
 * 1. If close_range(2) is available, always prefer that; it's better for
3002
 *    contiguous ranges like this than fdwalk(3) which entails iterating over
3003
 *    the entire fd space and simply doing nothing for those outside the range.
3004
 * 2. If closefrom(2) is available, we'll attempt to use that next if we're
3005
 *    closing up to sysconf(_SC_OPEN_MAX).
3006
 * 2a. Fallback to fdwalk(3) if we're not closing up to sysconf(_SC_OPEN_MAX),
3007
 *    as that will be more performant if the range happens to have any chunk of
3008
 *    non-opened fd in the middle.
3009
 * 2b. If fdwalk(3) isn't available, just do a plain close(2) loop.
3010
 */
3011
#ifdef HAVE_CLOSEFROM
3012
#  define USE_CLOSEFROM
3013
#endif /* HAVE_CLOSEFROM */
3014
3015
#ifdef HAVE_FDWALK
3016
#  define USE_FDWALK
3017
#endif /* HAVE_FDWALK */
3018
3019
#ifdef USE_FDWALK
3020
static int
3021
_fdwalk_close_func(void *lohi, int fd)
3022
{
3023
    int lo = ((int *)lohi)[0];
3024
    int hi = ((int *)lohi)[1];
3025
3026
    if (fd >= hi) {
3027
        return 1;
3028
    }
3029
    else if (fd >= lo) {
3030
        /* Ignore errors */
3031
        (void)close(fd);
3032
    }
3033
    return 0;
3034
}
3035
#endif /* USE_FDWALK */
3036
3037
/* Closes all file descriptors in [first, last], ignoring errors. */
3038
void
3039
_Py_closerange(int first, int last)
3040
0
{
3041
0
    first = Py_MAX(first, 0);
3042
0
    _Py_BEGIN_SUPPRESS_IPH
3043
#ifdef HAVE_CLOSE_RANGE
3044
    if (close_range(first, last, 0) == 0) {
3045
        /* close_range() ignores errors when it closes file descriptors.
3046
         * Possible reasons of an error return are lack of kernel support
3047
         * or denial of the underlying syscall by a seccomp sandbox on Linux.
3048
         * Fallback to other methods in case of any error. */
3049
    }
3050
    else
3051
#endif /* HAVE_CLOSE_RANGE */
3052
#ifdef USE_CLOSEFROM
3053
    if (last >= sysconf(_SC_OPEN_MAX)) {
3054
        /* Any errors encountered while closing file descriptors are ignored */
3055
        (void)closefrom(first);
3056
    }
3057
    else
3058
#endif /* USE_CLOSEFROM */
3059
#ifdef USE_FDWALK
3060
    {
3061
        int lohi[2];
3062
        lohi[0] = first;
3063
        lohi[1] = last + 1;
3064
        fdwalk(_fdwalk_close_func, lohi);
3065
    }
3066
#else
3067
0
    {
3068
0
        for (int i = first; i <= last; i++) {
3069
            /* Ignore errors */
3070
0
            (void)close(i);
3071
0
        }
3072
0
    }
3073
0
#endif /* USE_FDWALK */
3074
0
    _Py_END_SUPPRESS_IPH
3075
0
}
3076
3077
3078
#ifndef MS_WINDOWS
3079
// Ticks per second used by clock() and times() functions.
3080
// See os.times() and time.process_time() implementations.
3081
int
3082
_Py_GetTicksPerSecond(long *ticks_per_second)
3083
56
{
3084
56
#if defined(HAVE_SYSCONF) && defined(_SC_CLK_TCK)
3085
56
    long value = sysconf(_SC_CLK_TCK);
3086
56
    if (value < 1) {
3087
0
        return -1;
3088
0
    }
3089
56
    *ticks_per_second = value;
3090
#elif defined(HZ)
3091
    assert(HZ >= 1);
3092
    *ticks_per_second = HZ;
3093
#else
3094
    // Magic fallback value; may be bogus
3095
    *ticks_per_second = 60;
3096
#endif
3097
56
    return 0;
3098
56
}
3099
#endif
3100
3101
3102
/* Check if a file descriptor is valid or not.
3103
   Return 0 if the file descriptor is invalid, return non-zero otherwise. */
3104
int
3105
_Py_IsValidFD(int fd)
3106
84
{
3107
/* dup() is faster than fstat(): fstat() can require input/output operations,
3108
   whereas dup() doesn't. There is a low risk of EMFILE/ENFILE at Python
3109
   startup. Problem: dup() doesn't check if the file descriptor is valid on
3110
   some platforms.
3111
3112
   fcntl(fd, F_GETFD) is even faster, because it only checks the process table.
3113
   It is preferred over dup() when available, since it cannot fail with the
3114
   "too many open files" error (EMFILE).
3115
3116
   bpo-30225: On macOS Tiger, when stdout is redirected to a pipe and the other
3117
   side of the pipe is closed, dup(1) succeed, whereas fstat(1, &st) fails with
3118
   EBADF. FreeBSD has similar issue (bpo-32849).
3119
3120
   Only use dup() on Linux where dup() is enough to detect invalid FD
3121
   (bpo-32849).
3122
*/
3123
84
    if (fd < 0) {
3124
0
        return 0;
3125
0
    }
3126
84
#if defined(F_GETFD) && ( \
3127
84
        defined(__linux__) || \
3128
84
        defined(__APPLE__) || \
3129
84
        (defined(__wasm__) && !defined(__wasi__)))
3130
84
    return fcntl(fd, F_GETFD) >= 0;
3131
#elif defined(__linux__)
3132
    int fd2 = dup(fd);
3133
    if (fd2 >= 0) {
3134
        close(fd2);
3135
    }
3136
    return (fd2 >= 0);
3137
#elif defined(MS_WINDOWS)
3138
    HANDLE hfile;
3139
    _Py_BEGIN_SUPPRESS_IPH
3140
    hfile = (HANDLE)_get_osfhandle(fd);
3141
    _Py_END_SUPPRESS_IPH
3142
    return (hfile != INVALID_HANDLE_VALUE
3143
            && GetFileType(hfile) != FILE_TYPE_UNKNOWN);
3144
#else
3145
    struct stat st;
3146
    return (fstat(fd, &st) == 0);
3147
#endif
3148
84
}