Coverage Report

Created: 2025-10-10 06:33

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cpython/Python/fileutils.c
Line
Count
Source
1
#include "Python.h"
2
#include "pycore_fileutils.h"     // fileutils definitions
3
#include "pycore_runtime.h"       // _PyRuntime
4
#include "pycore_pystate.h"       // _Py_AssertHoldsTstate()
5
#include "pycore_unicodeobject.h" // _Py_MAX_UNICODE
6
#include "osdefs.h"               // SEP
7
8
#include <stdlib.h>               // mbstowcs()
9
#ifdef HAVE_UNISTD_H
10
#  include <unistd.h>             // getcwd()
11
#endif
12
13
#ifdef MS_WINDOWS
14
#  include <malloc.h>
15
#  include <windows.h>
16
#  include <winioctl.h>             // FILE_DEVICE_* constants
17
#  include "pycore_fileutils_windows.h" // FILE_STAT_BASIC_INFORMATION
18
#  if defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP)
19
#    define PATHCCH_ALLOW_LONG_PATHS 0x01
20
#  else
21
#    include <pathcch.h>            // PathCchCombineEx
22
#  endif
23
extern int winerror_to_errno(int);
24
#endif
25
26
#ifdef HAVE_LANGINFO_H
27
#  include <langinfo.h>           // nl_langinfo(CODESET)
28
#endif
29
30
#ifdef HAVE_SYS_IOCTL_H
31
#include <sys/ioctl.h>
32
#endif
33
34
#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
35
#  include <iconv.h>              // iconv_open()
36
#endif
37
38
#ifdef HAVE_FCNTL_H
39
#  include <fcntl.h>              // fcntl(F_GETFD)
40
#endif
41
42
#ifdef O_CLOEXEC
43
/* Does open() support the O_CLOEXEC flag? Possible values:
44
45
   -1: unknown
46
    0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23
47
    1: open() supports O_CLOEXEC flag, close-on-exec is set
48
49
   The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO
50
   and os.open(). */
51
int _Py_open_cloexec_works = -1;
52
#endif
53
54
// mbstowcs() and mbrtowc() errors
55
static const size_t DECODE_ERROR = ((size_t)-1);
56
#ifdef HAVE_MBRTOWC
57
static const size_t INCOMPLETE_CHARACTER = (size_t)-2;
58
#endif
59
60
61
static int
62
get_surrogateescape(_Py_error_handler errors, int *surrogateescape)
63
11.8k
{
64
11.8k
    switch (errors)
65
11.8k
    {
66
0
    case _Py_ERROR_STRICT:
67
0
        *surrogateescape = 0;
68
0
        return 0;
69
11.8k
    case _Py_ERROR_SURROGATEESCAPE:
70
11.8k
        *surrogateescape = 1;
71
11.8k
        return 0;
72
0
    default:
73
0
        return -1;
74
11.8k
    }
75
11.8k
}
76
77
78
PyObject *
79
_Py_device_encoding(int fd)
80
0
{
81
0
    int valid;
82
0
    Py_BEGIN_ALLOW_THREADS
83
0
    _Py_BEGIN_SUPPRESS_IPH
84
0
    valid = isatty(fd);
85
0
    _Py_END_SUPPRESS_IPH
86
0
    Py_END_ALLOW_THREADS
87
0
    if (!valid)
88
0
        Py_RETURN_NONE;
89
90
#ifdef MS_WINDOWS
91
#ifdef HAVE_WINDOWS_CONSOLE_IO
92
    UINT cp;
93
    if (fd == 0)
94
        cp = GetConsoleCP();
95
    else if (fd == 1 || fd == 2)
96
        cp = GetConsoleOutputCP();
97
    else
98
        cp = 0;
99
    /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application
100
       has no console */
101
    if (cp == 0) {
102
        Py_RETURN_NONE;
103
    }
104
105
    return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
106
#else
107
    Py_RETURN_NONE;
108
#endif /* HAVE_WINDOWS_CONSOLE_IO */
109
#else
110
0
    if (_PyRuntime.preconfig.utf8_mode) {
111
0
        _Py_DECLARE_STR(utf_8, "utf-8");
112
0
        return &_Py_STR(utf_8);
113
0
    }
114
0
    return _Py_GetLocaleEncodingObject();
115
0
#endif
116
0
}
117
118
119
static int
120
is_valid_wide_char(wchar_t ch)
121
295k
{
122
#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
123
    /* Oracle Solaris doesn't use Unicode code points as wchar_t encoding
124
       for non-Unicode locales, which makes values higher than _Py_MAX_UNICODE
125
       possibly valid. */
126
    return 1;
127
#endif
128
295k
    if (Py_UNICODE_IS_SURROGATE(ch)) {
129
        // Reject lone surrogate characters
130
0
        return 0;
131
0
    }
132
295k
#if SIZEOF_WCHAR_T > 2
133
295k
    if (ch > _Py_MAX_UNICODE) {
134
        // bpo-35883: Reject characters outside [U+0000; U+10ffff] range.
135
        // The glibc mbstowcs() UTF-8 decoder does not respect the RFC 3629,
136
        // it creates characters outside the [U+0000; U+10ffff] range:
137
        // https://sourceware.org/bugzilla/show_bug.cgi?id=2373
138
0
        return 0;
139
0
    }
140
295k
#endif
141
295k
    return 1;
142
295k
}
143
144
145
static size_t
146
_Py_mbstowcs(wchar_t *dest, const char *src, size_t n)
147
23.6k
{
148
23.6k
    size_t count = mbstowcs(dest, src, n);
149
23.6k
    if (dest != NULL && count != DECODE_ERROR) {
150
307k
        for (size_t i=0; i < count; i++) {
151
295k
            wchar_t ch = dest[i];
152
295k
            if (!is_valid_wide_char(ch)) {
153
0
                return DECODE_ERROR;
154
0
            }
155
295k
        }
156
11.8k
    }
157
23.6k
    return count;
158
23.6k
}
159
160
161
#ifdef HAVE_MBRTOWC
162
static size_t
163
_Py_mbrtowc(wchar_t *pwc, const char *str, size_t len, mbstate_t *pmbs)
164
0
{
165
0
    assert(pwc != NULL);
166
0
    size_t count = mbrtowc(pwc, str, len, pmbs);
167
0
    if (count != 0 && count != DECODE_ERROR && count != INCOMPLETE_CHARACTER) {
168
0
        if (!is_valid_wide_char(*pwc)) {
169
0
            return DECODE_ERROR;
170
0
        }
171
0
    }
172
0
    return count;
173
0
}
174
#endif
175
176
177
#if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS)
178
179
#define USE_FORCE_ASCII
180
181
extern int _Py_normalize_encoding(const char *, char *, size_t);
182
183
/* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale
184
   and POSIX locale. nl_langinfo(CODESET) announces an alias of the
185
   ASCII encoding, whereas mbstowcs() and wcstombs() functions use the
186
   ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use
187
   locale.getpreferredencoding() codec. For example, if command line arguments
188
   are decoded by mbstowcs() and encoded back by os.fsencode(), we get a
189
   UnicodeEncodeError instead of retrieving the original byte string.
190
191
   The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C",
192
   nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least
193
   one byte in range 0x80-0xff can be decoded from the locale encoding. The
194
   workaround is also enabled on error, for example if getting the locale
195
   failed.
196
197
   On HP-UX with the C locale or the POSIX locale, nl_langinfo(CODESET)
198
   announces "roman8" but mbstowcs() uses Latin1 in practice. Force also the
199
   ASCII encoding in this case.
200
201
   Values of force_ascii:
202
203
       1: the workaround is used: Py_EncodeLocale() uses
204
          encode_ascii_surrogateescape() and Py_DecodeLocale() uses
205
          decode_ascii()
206
       0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
207
          Py_DecodeLocale() uses mbstowcs()
208
      -1: unknown, need to call check_force_ascii() to get the value
209
*/
210
32
#define force_ascii (_PyRuntime.fileutils.force_ascii)
211
212
static int
213
check_force_ascii(void)
214
0
{
215
0
    char *loc = setlocale(LC_CTYPE, NULL);
216
0
    if (loc == NULL) {
217
0
        goto error;
218
0
    }
219
0
    if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
220
        /* the LC_CTYPE locale is different than C and POSIX */
221
0
        return 0;
222
0
    }
223
224
0
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
225
0
    const char *codeset = nl_langinfo(CODESET);
226
0
    if (!codeset || codeset[0] == '\0') {
227
        /* CODESET is not set or empty */
228
0
        goto error;
229
0
    }
230
231
0
    char encoding[20];   /* longest name: "iso_646.irv_1991\0" */
232
0
    if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding))) {
233
0
        goto error;
234
0
    }
235
236
#ifdef __hpux
237
    if (strcmp(encoding, "roman8") == 0) {
238
        unsigned char ch;
239
        wchar_t wch;
240
        size_t res;
241
242
        ch = (unsigned char)0xA7;
243
        res = _Py_mbstowcs(&wch, (char*)&ch, 1);
244
        if (res != DECODE_ERROR && wch == L'\xA7') {
245
            /* On HP-UX with C locale or the POSIX locale,
246
               nl_langinfo(CODESET) announces "roman8", whereas mbstowcs() uses
247
               Latin1 encoding in practice. Force ASCII in this case.
248
249
               Roman8 decodes 0xA7 to U+00CF. Latin1 decodes 0xA7 to U+00A7. */
250
            return 1;
251
        }
252
    }
253
#else
254
0
    const char* ascii_aliases[] = {
255
0
        "ascii",
256
        /* Aliases from Lib/encodings/aliases.py */
257
0
        "646",
258
0
        "ansi_x3.4_1968",
259
0
        "ansi_x3.4_1986",
260
0
        "ansi_x3_4_1968",
261
0
        "cp367",
262
0
        "csascii",
263
0
        "ibm367",
264
0
        "iso646_us",
265
0
        "iso_646.irv_1991",
266
0
        "iso_ir_6",
267
0
        "us",
268
0
        "us_ascii",
269
0
        NULL
270
0
    };
271
272
0
    int is_ascii = 0;
273
0
    for (const char **alias=ascii_aliases; *alias != NULL; alias++) {
274
0
        if (strcmp(encoding, *alias) == 0) {
275
0
            is_ascii = 1;
276
0
            break;
277
0
        }
278
0
    }
279
0
    if (!is_ascii) {
280
        /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */
281
0
        return 0;
282
0
    }
283
284
0
    for (unsigned int i=0x80; i<=0xff; i++) {
285
0
        char ch[1];
286
0
        wchar_t wch[1];
287
0
        size_t res;
288
289
0
        unsigned uch = (unsigned char)i;
290
0
        ch[0] = (char)uch;
291
0
        res = _Py_mbstowcs(wch, ch, 1);
292
0
        if (res != DECODE_ERROR) {
293
            /* decoding a non-ASCII character from the locale encoding succeed:
294
               the locale encoding is not ASCII, force ASCII */
295
0
            return 1;
296
0
        }
297
0
    }
298
    /* None of the bytes in the range 0x80-0xff can be decoded from the locale
299
       encoding: the locale encoding is really ASCII */
300
0
#endif   /* !defined(__hpux) */
301
0
    return 0;
302
#else
303
    /* nl_langinfo(CODESET) is not available: always force ASCII */
304
    return 1;
305
#endif   /* defined(HAVE_LANGINFO_H) && defined(CODESET) */
306
307
0
error:
308
    /* if an error occurred, force the ASCII encoding */
309
0
    return 1;
310
0
}
311
312
313
int
314
_Py_GetForceASCII(void)
315
0
{
316
0
    if (force_ascii == -1) {
317
0
        force_ascii = check_force_ascii();
318
0
    }
319
0
    return force_ascii;
320
0
}
321
322
323
void
324
_Py_ResetForceASCII(void)
325
32
{
326
32
    force_ascii = -1;
327
32
}
328
329
330
static int
331
encode_ascii(const wchar_t *text, char **str,
332
             size_t *error_pos, const char **reason,
333
             int raw_malloc, _Py_error_handler errors)
334
0
{
335
0
    char *result = NULL, *out;
336
0
    size_t len, i;
337
0
    wchar_t ch;
338
339
0
    int surrogateescape;
340
0
    if (get_surrogateescape(errors, &surrogateescape) < 0) {
341
0
        return -3;
342
0
    }
343
344
0
    len = wcslen(text);
345
346
    /* +1 for NULL byte */
347
0
    if (raw_malloc) {
348
0
        result = PyMem_RawMalloc(len + 1);
349
0
    }
350
0
    else {
351
0
        result = PyMem_Malloc(len + 1);
352
0
    }
353
0
    if (result == NULL) {
354
0
        return -1;
355
0
    }
356
357
0
    out = result;
358
0
    for (i=0; i<len; i++) {
359
0
        ch = text[i];
360
361
0
        if (ch <= 0x7f) {
362
            /* ASCII character */
363
0
            *out++ = (char)ch;
364
0
        }
365
0
        else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) {
366
            /* UTF-8b surrogate */
367
0
            *out++ = (char)(ch - 0xdc00);
368
0
        }
369
0
        else {
370
0
            if (raw_malloc) {
371
0
                PyMem_RawFree(result);
372
0
            }
373
0
            else {
374
0
                PyMem_Free(result);
375
0
            }
376
0
            if (error_pos != NULL) {
377
0
                *error_pos = i;
378
0
            }
379
0
            if (reason) {
380
0
                *reason = "encoding error";
381
0
            }
382
0
            return -2;
383
0
        }
384
0
    }
385
0
    *out = '\0';
386
0
    *str = result;
387
0
    return 0;
388
0
}
389
#else
390
int
391
_Py_GetForceASCII(void)
392
{
393
    return 0;
394
}
395
396
void
397
_Py_ResetForceASCII(void)
398
{
399
    /* nothing to do */
400
}
401
#endif   /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */
402
403
404
#if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII)
405
static int
406
decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen,
407
             const char **reason, _Py_error_handler errors)
408
0
{
409
0
    wchar_t *res;
410
0
    unsigned char *in;
411
0
    wchar_t *out;
412
0
    size_t argsize = strlen(arg) + 1;
413
414
0
    int surrogateescape;
415
0
    if (get_surrogateescape(errors, &surrogateescape) < 0) {
416
0
        return -3;
417
0
    }
418
419
0
    if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
420
0
        return -1;
421
0
    }
422
0
    res = PyMem_RawMalloc(argsize * sizeof(wchar_t));
423
0
    if (!res) {
424
0
        return -1;
425
0
    }
426
427
0
    out = res;
428
0
    for (in = (unsigned char*)arg; *in; in++) {
429
0
        unsigned char ch = *in;
430
0
        if (ch < 128) {
431
0
            *out++ = ch;
432
0
        }
433
0
        else {
434
0
            if (!surrogateescape) {
435
0
                PyMem_RawFree(res);
436
0
                if (wlen) {
437
0
                    *wlen = in - (unsigned char*)arg;
438
0
                }
439
0
                if (reason) {
440
0
                    *reason = "decoding error";
441
0
                }
442
0
                return -2;
443
0
            }
444
0
            *out++ = 0xdc00 + ch;
445
0
        }
446
0
    }
447
0
    *out = 0;
448
449
0
    if (wlen != NULL) {
450
0
        *wlen = out - res;
451
0
    }
452
0
    *wstr = res;
453
0
    return 0;
454
0
}
455
#endif   /* !HAVE_MBRTOWC */
456
457
static int
458
decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen,
459
                      const char **reason, _Py_error_handler errors)
460
11.8k
{
461
11.8k
    wchar_t *res;
462
11.8k
    size_t argsize;
463
11.8k
    size_t count;
464
11.8k
#ifdef HAVE_MBRTOWC
465
11.8k
    unsigned char *in;
466
11.8k
    wchar_t *out;
467
11.8k
    mbstate_t mbs;
468
11.8k
#endif
469
470
11.8k
    int surrogateescape;
471
11.8k
    if (get_surrogateescape(errors, &surrogateescape) < 0) {
472
0
        return -3;
473
0
    }
474
475
#ifdef HAVE_BROKEN_MBSTOWCS
476
    /* Some platforms have a broken implementation of
477
     * mbstowcs which does not count the characters that
478
     * would result from conversion.  Use an upper bound.
479
     */
480
    argsize = strlen(arg);
481
#else
482
11.8k
    argsize = _Py_mbstowcs(NULL, arg, 0);
483
11.8k
#endif
484
11.8k
    if (argsize != DECODE_ERROR) {
485
11.8k
        if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) {
486
0
            return -1;
487
0
        }
488
11.8k
        res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t));
489
11.8k
        if (!res) {
490
0
            return -1;
491
0
        }
492
493
11.8k
        count = _Py_mbstowcs(res, arg, argsize + 1);
494
11.8k
        if (count != DECODE_ERROR) {
495
11.8k
            *wstr = res;
496
11.8k
            if (wlen != NULL) {
497
11.8k
                *wlen = count;
498
11.8k
            }
499
11.8k
            return 0;
500
11.8k
        }
501
0
        PyMem_RawFree(res);
502
0
    }
503
504
    /* Conversion failed. Fall back to escaping with surrogateescape. */
505
0
#ifdef HAVE_MBRTOWC
506
    /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */
507
508
    /* Overallocate; as multi-byte characters are in the argument, the
509
       actual output could use less memory. */
510
0
    argsize = strlen(arg) + 1;
511
0
    if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) {
512
0
        return -1;
513
0
    }
514
0
    res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t));
515
0
    if (!res) {
516
0
        return -1;
517
0
    }
518
519
0
    in = (unsigned char*)arg;
520
0
    out = res;
521
0
    memset(&mbs, 0, sizeof mbs);
522
0
    while (argsize) {
523
0
        size_t converted = _Py_mbrtowc(out, (char*)in, argsize, &mbs);
524
0
        if (converted == 0) {
525
            /* Reached end of string; null char stored. */
526
0
            break;
527
0
        }
528
529
0
        if (converted == DECODE_ERROR || converted == INCOMPLETE_CHARACTER) {
530
0
            if (!surrogateescape) {
531
0
                goto decode_error;
532
0
            }
533
534
            /* Decoding error. Escape as UTF-8b, and start over in the initial
535
               shift state. */
536
0
            *out++ = 0xdc00 + *in++;
537
0
            argsize--;
538
0
            memset(&mbs, 0, sizeof mbs);
539
0
            continue;
540
0
        }
541
542
        // _Py_mbrtowc() reject lone surrogate characters
543
0
        assert(!Py_UNICODE_IS_SURROGATE(*out));
544
545
        /* successfully converted some bytes */
546
0
        in += converted;
547
0
        argsize -= converted;
548
0
        out++;
549
0
    }
550
0
    if (wlen != NULL) {
551
0
        *wlen = out - res;
552
0
    }
553
0
    *wstr = res;
554
0
    return 0;
555
556
0
decode_error:
557
0
    PyMem_RawFree(res);
558
0
    if (wlen) {
559
0
        *wlen = in - (unsigned char*)arg;
560
0
    }
561
0
    if (reason) {
562
0
        *reason = "decoding error";
563
0
    }
564
0
    return -2;
565
#else   /* HAVE_MBRTOWC */
566
    /* Cannot use C locale for escaping; manually escape as if charset
567
       is ASCII (i.e. escape all bytes > 128. This will still roundtrip
568
       correctly in the locale's charset, which must be an ASCII superset. */
569
    return decode_ascii(arg, wstr, wlen, reason, errors);
570
#endif   /* HAVE_MBRTOWC */
571
0
}
572
573
574
/* Decode a byte string from the locale encoding.
575
576
   Use the strict error handler if 'surrogateescape' is zero.  Use the
577
   surrogateescape error handler if 'surrogateescape' is non-zero: undecodable
578
   bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence
579
   can be decoded as a surrogate character, escape the bytes using the
580
   surrogateescape error handler instead of decoding them.
581
582
   On success, return 0 and write the newly allocated wide character string into
583
   *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write
584
   the number of wide characters excluding the null character into *wlen.
585
586
   On memory allocation failure, return -1.
587
588
   On decoding error, return -2. If wlen is not NULL, write the start of
589
   invalid byte sequence in the input string into *wlen. If reason is not NULL,
590
   write the decoding error message into *reason.
591
592
   Return -3 if the error handler 'errors' is not supported.
593
594
   Use the Py_EncodeLocaleEx() function to encode the character string back to
595
   a byte string. */
596
int
597
_Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen,
598
                   const char **reason,
599
                   int current_locale, _Py_error_handler errors)
600
17.0k
{
601
17.0k
    if (current_locale) {
602
#ifdef _Py_FORCE_UTF8_LOCALE
603
        return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
604
                                errors);
605
#else
606
11.8k
        return decode_current_locale(arg, wstr, wlen, reason, errors);
607
11.8k
#endif
608
11.8k
    }
609
610
#ifdef _Py_FORCE_UTF8_FS_ENCODING
611
    return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
612
                            errors);
613
#else
614
5.24k
    int use_utf8 = (_PyRuntime.preconfig.utf8_mode >= 1);
615
#ifdef MS_WINDOWS
616
    use_utf8 |= (_PyRuntime.preconfig.legacy_windows_fs_encoding == 0);
617
#endif
618
5.24k
    if (use_utf8) {
619
5.24k
        return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason,
620
5.24k
                                errors);
621
5.24k
    }
622
623
0
#ifdef USE_FORCE_ASCII
624
0
    if (force_ascii == -1) {
625
0
        force_ascii = check_force_ascii();
626
0
    }
627
628
0
    if (force_ascii) {
629
        /* force ASCII encoding to workaround mbstowcs() issue */
630
0
        return decode_ascii(arg, wstr, wlen, reason, errors);
631
0
    }
632
0
#endif
633
634
0
    return decode_current_locale(arg, wstr, wlen, reason, errors);
635
0
#endif   /* !_Py_FORCE_UTF8_FS_ENCODING */
636
0
}
637
638
639
/* Decode a byte string from the locale encoding with the
640
   surrogateescape error handler: undecodable bytes are decoded as characters
641
   in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
642
   character, escape the bytes using the surrogateescape error handler instead
643
   of decoding them.
644
645
   Return a pointer to a newly allocated wide character string, use
646
   PyMem_RawFree() to free the memory. If size is not NULL, write the number of
647
   wide characters excluding the null character into *size
648
649
   Return NULL on decoding error or memory allocation error. If *size* is not
650
   NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
651
   decoding error.
652
653
   Decoding errors should never happen, unless there is a bug in the C
654
   library.
655
656
   Use the Py_EncodeLocale() function to encode the character string back to a
657
   byte string. */
658
wchar_t*
659
Py_DecodeLocale(const char* arg, size_t *wlen)
660
112
{
661
112
    wchar_t *wstr;
662
112
    int res = _Py_DecodeLocaleEx(arg, &wstr, wlen,
663
112
                                 NULL, 0,
664
112
                                 _Py_ERROR_SURROGATEESCAPE);
665
112
    if (res != 0) {
666
0
        assert(res != -3);
667
0
        if (wlen != NULL) {
668
0
            *wlen = (size_t)res;
669
0
        }
670
0
        return NULL;
671
0
    }
672
112
    return wstr;
673
112
}
674
675
676
static int
677
encode_current_locale(const wchar_t *text, char **str,
678
                      size_t *error_pos, const char **reason,
679
                      int raw_malloc, _Py_error_handler errors)
680
0
{
681
0
    const size_t len = wcslen(text);
682
0
    char *result = NULL, *bytes = NULL;
683
0
    size_t i, size, converted;
684
0
    wchar_t c, buf[2];
685
686
0
    int surrogateescape;
687
0
    if (get_surrogateescape(errors, &surrogateescape) < 0) {
688
0
        return -3;
689
0
    }
690
691
    /* The function works in two steps:
692
       1. compute the length of the output buffer in bytes (size)
693
       2. outputs the bytes */
694
0
    size = 0;
695
0
    buf[1] = 0;
696
0
    while (1) {
697
0
        for (i=0; i < len; i++) {
698
0
            c = text[i];
699
0
            if (c >= 0xdc80 && c <= 0xdcff) {
700
0
                if (!surrogateescape) {
701
0
                    goto encode_error;
702
0
                }
703
                /* UTF-8b surrogate */
704
0
                if (bytes != NULL) {
705
0
                    *bytes++ = c - 0xdc00;
706
0
                    size--;
707
0
                }
708
0
                else {
709
0
                    size++;
710
0
                }
711
0
                continue;
712
0
            }
713
0
            else {
714
0
                buf[0] = c;
715
0
                if (bytes != NULL) {
716
0
                    converted = wcstombs(bytes, buf, size);
717
0
                }
718
0
                else {
719
0
                    converted = wcstombs(NULL, buf, 0);
720
0
                }
721
0
                if (converted == DECODE_ERROR) {
722
0
                    goto encode_error;
723
0
                }
724
0
                if (bytes != NULL) {
725
0
                    bytes += converted;
726
0
                    size -= converted;
727
0
                }
728
0
                else {
729
0
                    size += converted;
730
0
                }
731
0
            }
732
0
        }
733
0
        if (result != NULL) {
734
0
            *bytes = '\0';
735
0
            break;
736
0
        }
737
738
0
        size += 1; /* nul byte at the end */
739
0
        if (raw_malloc) {
740
0
            result = PyMem_RawMalloc(size);
741
0
        }
742
0
        else {
743
0
            result = PyMem_Malloc(size);
744
0
        }
745
0
        if (result == NULL) {
746
0
            return -1;
747
0
        }
748
0
        bytes = result;
749
0
    }
750
0
    *str = result;
751
0
    return 0;
752
753
0
encode_error:
754
0
    if (raw_malloc) {
755
0
        PyMem_RawFree(result);
756
0
    }
757
0
    else {
758
0
        PyMem_Free(result);
759
0
    }
760
0
    if (error_pos != NULL) {
761
0
        *error_pos = i;
762
0
    }
763
0
    if (reason) {
764
0
        *reason = "encoding error";
765
0
    }
766
0
    return -2;
767
0
}
768
769
770
/* Encode a string to the locale encoding.
771
772
   Parameters:
773
774
   * raw_malloc: if non-zero, allocate memory using PyMem_RawMalloc() instead
775
     of PyMem_Malloc().
776
   * current_locale: if non-zero, use the current LC_CTYPE, otherwise use
777
     Python filesystem encoding.
778
   * errors: error handler like "strict" or "surrogateescape".
779
780
   Return value:
781
782
    0: success, *str is set to a newly allocated decoded string.
783
   -1: memory allocation failure
784
   -2: encoding error, set *error_pos and *reason (if set).
785
   -3: the error handler 'errors' is not supported.
786
 */
787
static int
788
encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos,
789
                 const char **reason,
790
                 int raw_malloc, int current_locale, _Py_error_handler errors)
791
586
{
792
586
    if (current_locale) {
793
#ifdef _Py_FORCE_UTF8_LOCALE
794
        return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
795
                                raw_malloc, errors);
796
#else
797
0
        return encode_current_locale(text, str, error_pos, reason,
798
0
                                     raw_malloc, errors);
799
0
#endif
800
0
    }
801
802
#ifdef _Py_FORCE_UTF8_FS_ENCODING
803
    return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
804
                            raw_malloc, errors);
805
#else
806
586
    int use_utf8 = (_PyRuntime.preconfig.utf8_mode >= 1);
807
#ifdef MS_WINDOWS
808
    use_utf8 |= (_PyRuntime.preconfig.legacy_windows_fs_encoding == 0);
809
#endif
810
586
    if (use_utf8) {
811
586
        return _Py_EncodeUTF8Ex(text, str, error_pos, reason,
812
586
                                raw_malloc, errors);
813
586
    }
814
815
0
#ifdef USE_FORCE_ASCII
816
0
    if (force_ascii == -1) {
817
0
        force_ascii = check_force_ascii();
818
0
    }
819
820
0
    if (force_ascii) {
821
0
        return encode_ascii(text, str, error_pos, reason,
822
0
                            raw_malloc, errors);
823
0
    }
824
0
#endif
825
826
0
    return encode_current_locale(text, str, error_pos, reason,
827
0
                                 raw_malloc, errors);
828
0
#endif   /* _Py_FORCE_UTF8_FS_ENCODING */
829
0
}
830
831
static char*
832
encode_locale(const wchar_t *text, size_t *error_pos,
833
              int raw_malloc, int current_locale)
834
160
{
835
160
    char *str;
836
160
    int res = encode_locale_ex(text, &str, error_pos, NULL,
837
160
                               raw_malloc, current_locale,
838
160
                               _Py_ERROR_SURROGATEESCAPE);
839
160
    if (res != -2 && error_pos) {
840
0
        *error_pos = (size_t)-1;
841
0
    }
842
160
    if (res != 0) {
843
0
        return NULL;
844
0
    }
845
160
    return str;
846
160
}
847
848
/* Encode a wide character string to the locale encoding with the
849
   surrogateescape error handler: surrogate characters in the range
850
   U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
851
852
   Return a pointer to a newly allocated byte string, use PyMem_Free() to free
853
   the memory. Return NULL on encoding or memory allocation error.
854
855
   If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set
856
   to the index of the invalid character on encoding error.
857
858
   Use the Py_DecodeLocale() function to decode the bytes string back to a wide
859
   character string. */
860
char*
861
Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
862
0
{
863
0
    return encode_locale(text, error_pos, 0, 0);
864
0
}
865
866
867
/* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
868
   instead of PyMem_Free(). */
869
char*
870
_Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
871
160
{
872
160
    return encode_locale(text, error_pos, 1, 0);
873
160
}
874
875
876
int
877
_Py_EncodeLocaleEx(const wchar_t *text, char **str,
878
                   size_t *error_pos, const char **reason,
879
                   int current_locale, _Py_error_handler errors)
880
426
{
881
426
    return encode_locale_ex(text, str, error_pos, reason, 1,
882
426
                            current_locale, errors);
883
426
}
884
885
886
// Get the current locale encoding name:
887
//
888
// - Return "utf-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
889
// - Return "utf-8" if the UTF-8 Mode is enabled
890
// - On Windows, return the ANSI code page (ex: "cp1250")
891
// - Return "utf-8" if nl_langinfo(CODESET) returns an empty string.
892
// - Otherwise, return nl_langinfo(CODESET).
893
//
894
// Return NULL on memory allocation failure.
895
//
896
// See also config_get_locale_encoding()
897
wchar_t*
898
_Py_GetLocaleEncoding(void)
899
0
{
900
#ifdef _Py_FORCE_UTF8_LOCALE
901
    // On Android langinfo.h and CODESET are missing,
902
    // and UTF-8 is always used in mbstowcs() and wcstombs().
903
    return _PyMem_RawWcsdup(L"utf-8");
904
#else
905
906
#ifdef MS_WINDOWS
907
    wchar_t encoding[23];
908
    unsigned int ansi_codepage = GetACP();
909
    swprintf(encoding, Py_ARRAY_LENGTH(encoding), L"cp%u", ansi_codepage);
910
    encoding[Py_ARRAY_LENGTH(encoding) - 1] = 0;
911
    return _PyMem_RawWcsdup(encoding);
912
#else
913
0
    const char *encoding = nl_langinfo(CODESET);
914
0
    if (!encoding || encoding[0] == '\0') {
915
        // Use UTF-8 if nl_langinfo() returns an empty string. It can happen on
916
        // macOS if the LC_CTYPE locale is not supported.
917
0
        return _PyMem_RawWcsdup(L"utf-8");
918
0
    }
919
920
0
    wchar_t *wstr;
921
0
    int res = decode_current_locale(encoding, &wstr, NULL,
922
0
                                    NULL, _Py_ERROR_SURROGATEESCAPE);
923
0
    if (res < 0) {
924
0
        return NULL;
925
0
    }
926
0
    return wstr;
927
0
#endif  // !MS_WINDOWS
928
929
0
#endif  // !_Py_FORCE_UTF8_LOCALE
930
0
}
931
932
933
PyObject *
934
_Py_GetLocaleEncodingObject(void)
935
0
{
936
0
    wchar_t *encoding = _Py_GetLocaleEncoding();
937
0
    if (encoding == NULL) {
938
0
        PyErr_NoMemory();
939
0
        return NULL;
940
0
    }
941
942
0
    PyObject *str = PyUnicode_FromWideChar(encoding, -1);
943
0
    PyMem_RawFree(encoding);
944
0
    return str;
945
0
}
946
947
#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION
948
949
/* Check whether current locale uses Unicode as internal wchar_t form. */
950
int
951
_Py_LocaleUsesNonUnicodeWchar(void)
952
{
953
    /* Oracle Solaris uses non-Unicode internal wchar_t form for
954
       non-Unicode locales and hence needs conversion to UTF first. */
955
    char* codeset = nl_langinfo(CODESET);
956
    if (!codeset) {
957
        return 0;
958
    }
959
    /* 646 refers to ISO/IEC 646 standard that corresponds to ASCII encoding */
960
    return (strcmp(codeset, "UTF-8") != 0 && strcmp(codeset, "646") != 0);
961
}
962
963
static wchar_t *
964
_Py_ConvertWCharForm(const wchar_t *source, Py_ssize_t size,
965
                     const char *tocode, const char *fromcode)
966
{
967
    static_assert(sizeof(wchar_t) == 4, "wchar_t must be 32-bit");
968
969
    /* Ensure we won't overflow the size. */
970
    if (size > (PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(wchar_t))) {
971
        PyErr_NoMemory();
972
        return NULL;
973
    }
974
975
    /* the string doesn't have to be NULL terminated */
976
    wchar_t* target = PyMem_Malloc(size * sizeof(wchar_t));
977
    if (target == NULL) {
978
        PyErr_NoMemory();
979
        return NULL;
980
    }
981
982
    iconv_t cd = iconv_open(tocode, fromcode);
983
    if (cd == (iconv_t)-1) {
984
        PyErr_Format(PyExc_ValueError, "iconv_open() failed");
985
        PyMem_Free(target);
986
        return NULL;
987
    }
988
989
    char *inbuf = (char *) source;
990
    char *outbuf = (char *) target;
991
    size_t inbytesleft = sizeof(wchar_t) * size;
992
    size_t outbytesleft = inbytesleft;
993
994
    size_t ret = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
995
    if (ret == DECODE_ERROR) {
996
        PyErr_Format(PyExc_ValueError, "iconv() failed");
997
        PyMem_Free(target);
998
        iconv_close(cd);
999
        return NULL;
1000
    }
1001
1002
    iconv_close(cd);
1003
    return target;
1004
}
1005
1006
/* Convert a wide character string to the UCS-4 encoded string. This
1007
   is necessary on systems where internal form of wchar_t are not Unicode
1008
   code points (e.g. Oracle Solaris).
1009
1010
   Return a pointer to a newly allocated string, use PyMem_Free() to free
1011
   the memory. Return NULL and raise exception on conversion or memory
1012
   allocation error. */
1013
wchar_t *
1014
_Py_DecodeNonUnicodeWchar(const wchar_t *native, Py_ssize_t size)
1015
{
1016
    return _Py_ConvertWCharForm(native, size, "UCS-4-INTERNAL", "wchar_t");
1017
}
1018
1019
/* Convert a UCS-4 encoded string to native wide character string. This
1020
   is necessary on systems where internal form of wchar_t are not Unicode
1021
   code points (e.g. Oracle Solaris).
1022
1023
   The conversion is done in place. This can be done because both wchar_t
1024
   and UCS-4 use 4-byte encoding, and one wchar_t symbol always correspond
1025
   to a single UCS-4 symbol and vice versa. (This is true for Oracle Solaris,
1026
   which is currently the only system using these functions; it doesn't have
1027
   to be for other systems).
1028
1029
   Return 0 on success. Return -1 and raise exception on conversion
1030
   or memory allocation error. */
1031
int
1032
_Py_EncodeNonUnicodeWchar_InPlace(wchar_t *unicode, Py_ssize_t size)
1033
{
1034
    wchar_t* result = _Py_ConvertWCharForm(unicode, size, "wchar_t", "UCS-4-INTERNAL");
1035
    if (!result) {
1036
        return -1;
1037
    }
1038
    memcpy(unicode, result, size * sizeof(wchar_t));
1039
    PyMem_Free(result);
1040
    return 0;
1041
}
1042
#endif /* HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION */
1043
1044
#ifdef MS_WINDOWS
1045
static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */
1046
1047
static void
1048
FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out)
1049
{
1050
    /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */
1051
    /* Cannot simply cast and dereference in_ptr,
1052
       since it might not be aligned properly */
1053
    __int64 in;
1054
    memcpy(&in, in_ptr, sizeof(in));
1055
    *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
1056
    *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t);
1057
}
1058
1059
static void
1060
LARGE_INTEGER_to_time_t_nsec(LARGE_INTEGER *in_ptr, time_t *time_out, int* nsec_out)
1061
{
1062
    *nsec_out = (int)(in_ptr->QuadPart % 10000000) * 100; /* FILETIME is in units of 100 nsec. */
1063
    *time_out = Py_SAFE_DOWNCAST((in_ptr->QuadPart / 10000000) - secs_between_epochs, __int64, time_t);
1064
}
1065
1066
void
1067
_Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr)
1068
{
1069
    /* XXX endianness */
1070
    __int64 out;
1071
    out = time_in + secs_between_epochs;
1072
    out = out * 10000000 + nsec_in / 100;
1073
    memcpy(out_ptr, &out, sizeof(out));
1074
}
1075
1076
/* Below, we *know* that ugo+r is 0444 */
1077
#if _S_IREAD != 0400
1078
#error Unsupported C library
1079
#endif
1080
static int
1081
attributes_to_mode(DWORD attr)
1082
{
1083
    int m = 0;
1084
    if (attr & FILE_ATTRIBUTE_DIRECTORY)
1085
        m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */
1086
    else
1087
        m |= _S_IFREG;
1088
    if (attr & FILE_ATTRIBUTE_READONLY)
1089
        m |= 0444;
1090
    else
1091
        m |= 0666;
1092
    return m;
1093
}
1094
1095
1096
typedef union {
1097
    FILE_ID_128 id;
1098
    struct {
1099
        uint64_t st_ino;
1100
        uint64_t st_ino_high;
1101
    };
1102
} id_128_to_ino;
1103
1104
1105
void
1106
_Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag,
1107
                           FILE_BASIC_INFO *basic_info, FILE_ID_INFO *id_info,
1108
                           struct _Py_stat_struct *result)
1109
{
1110
    memset(result, 0, sizeof(*result));
1111
    result->st_mode = attributes_to_mode(info->dwFileAttributes);
1112
    result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow;
1113
    result->st_dev = id_info ? id_info->VolumeSerialNumber : info->dwVolumeSerialNumber;
1114
    result->st_rdev = 0;
1115
    /* st_ctime is deprecated, but we preserve the legacy value in our caller, not here */
1116
    if (basic_info) {
1117
        LARGE_INTEGER_to_time_t_nsec(&basic_info->CreationTime, &result->st_birthtime, &result->st_birthtime_nsec);
1118
        LARGE_INTEGER_to_time_t_nsec(&basic_info->ChangeTime, &result->st_ctime, &result->st_ctime_nsec);
1119
        LARGE_INTEGER_to_time_t_nsec(&basic_info->LastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
1120
        LARGE_INTEGER_to_time_t_nsec(&basic_info->LastAccessTime, &result->st_atime, &result->st_atime_nsec);
1121
    } else {
1122
        FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_birthtime, &result->st_birthtime_nsec);
1123
        FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
1124
        FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec);
1125
    }
1126
    result->st_nlink = info->nNumberOfLinks;
1127
1128
    if (id_info) {
1129
        id_128_to_ino file_id;
1130
        file_id.id = id_info->FileId;
1131
        result->st_ino = file_id.st_ino;
1132
        result->st_ino_high = file_id.st_ino_high;
1133
    }
1134
    if (!result->st_ino && !result->st_ino_high) {
1135
        /* should only occur for DirEntry_from_find_data, in which case the
1136
           index is likely to be zero anyway. */
1137
        result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow;
1138
    }
1139
1140
    /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
1141
       open other name surrogate reparse points without traversing them. To
1142
       detect/handle these, check st_file_attributes and st_reparse_tag. */
1143
    result->st_reparse_tag = reparse_tag;
1144
    if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
1145
        reparse_tag == IO_REPARSE_TAG_SYMLINK) {
1146
        /* set the bits that make this a symlink */
1147
        result->st_mode = (result->st_mode & ~S_IFMT) | S_IFLNK;
1148
    }
1149
    result->st_file_attributes = info->dwFileAttributes;
1150
}
1151
1152
void
1153
_Py_stat_basic_info_to_stat(FILE_STAT_BASIC_INFORMATION *info,
1154
                            struct _Py_stat_struct *result)
1155
{
1156
    memset(result, 0, sizeof(*result));
1157
    result->st_mode = attributes_to_mode(info->FileAttributes);
1158
    result->st_size = info->EndOfFile.QuadPart;
1159
    LARGE_INTEGER_to_time_t_nsec(&info->CreationTime, &result->st_birthtime, &result->st_birthtime_nsec);
1160
    LARGE_INTEGER_to_time_t_nsec(&info->ChangeTime, &result->st_ctime, &result->st_ctime_nsec);
1161
    LARGE_INTEGER_to_time_t_nsec(&info->LastWriteTime, &result->st_mtime, &result->st_mtime_nsec);
1162
    LARGE_INTEGER_to_time_t_nsec(&info->LastAccessTime, &result->st_atime, &result->st_atime_nsec);
1163
    result->st_nlink = info->NumberOfLinks;
1164
    result->st_dev = info->VolumeSerialNumber.QuadPart;
1165
    /* File systems with less than 128-bits zero pad into this field */
1166
    id_128_to_ino file_id;
1167
    file_id.id = info->FileId128;
1168
    result->st_ino = file_id.st_ino;
1169
    result->st_ino_high = file_id.st_ino_high;
1170
    /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will
1171
       open other name surrogate reparse points without traversing them. To
1172
       detect/handle these, check st_file_attributes and st_reparse_tag. */
1173
    result->st_reparse_tag = info->ReparseTag;
1174
    if (info->FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
1175
        info->ReparseTag == IO_REPARSE_TAG_SYMLINK) {
1176
        /* set the bits that make this a symlink */
1177
        result->st_mode = (result->st_mode & ~S_IFMT) | S_IFLNK;
1178
    }
1179
    result->st_file_attributes = info->FileAttributes;
1180
    switch (info->DeviceType) {
1181
    case FILE_DEVICE_DISK:
1182
    case FILE_DEVICE_VIRTUAL_DISK:
1183
    case FILE_DEVICE_DFS:
1184
    case FILE_DEVICE_CD_ROM:
1185
    case FILE_DEVICE_CONTROLLER:
1186
    case FILE_DEVICE_DATALINK:
1187
        break;
1188
    case FILE_DEVICE_DISK_FILE_SYSTEM:
1189
    case FILE_DEVICE_CD_ROM_FILE_SYSTEM:
1190
    case FILE_DEVICE_NETWORK_FILE_SYSTEM:
1191
        result->st_mode = (result->st_mode & ~S_IFMT) | 0x6000; /* _S_IFBLK */
1192
        break;
1193
    case FILE_DEVICE_CONSOLE:
1194
    case FILE_DEVICE_NULL:
1195
    case FILE_DEVICE_KEYBOARD:
1196
    case FILE_DEVICE_MODEM:
1197
    case FILE_DEVICE_MOUSE:
1198
    case FILE_DEVICE_PARALLEL_PORT:
1199
    case FILE_DEVICE_PRINTER:
1200
    case FILE_DEVICE_SCREEN:
1201
    case FILE_DEVICE_SERIAL_PORT:
1202
    case FILE_DEVICE_SOUND:
1203
        result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFCHR;
1204
        break;
1205
    case FILE_DEVICE_NAMED_PIPE:
1206
        result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFIFO;
1207
        break;
1208
    default:
1209
        if (info->FileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
1210
            result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFDIR;
1211
        }
1212
        break;
1213
    }
1214
}
1215
1216
#endif
1217
1218
/* Return information about a file.
1219
1220
   On POSIX, use fstat().
1221
1222
   On Windows, use GetFileType() and GetFileInformationByHandle() which support
1223
   files larger than 2 GiB.  fstat() may fail with EOVERFLOW on files larger
1224
   than 2 GiB because the file size type is a signed 32-bit integer: see issue
1225
   #23152.
1226
1227
   On Windows, set the last Windows error and return nonzero on error. On
1228
   POSIX, set errno and return nonzero on error. Fill status and return 0 on
1229
   success. */
1230
int
1231
_Py_fstat_noraise(int fd, struct _Py_stat_struct *status)
1232
1.35k
{
1233
#ifdef MS_WINDOWS
1234
    BY_HANDLE_FILE_INFORMATION info;
1235
    FILE_BASIC_INFO basicInfo;
1236
    FILE_ID_INFO idInfo;
1237
    FILE_ID_INFO *pIdInfo = &idInfo;
1238
    HANDLE h;
1239
    int type;
1240
1241
    h = _Py_get_osfhandle_noraise(fd);
1242
1243
    if (h == INVALID_HANDLE_VALUE) {
1244
        /* errno is already set by _get_osfhandle, but we also set
1245
           the Win32 error for callers who expect that */
1246
        SetLastError(ERROR_INVALID_HANDLE);
1247
        return -1;
1248
    }
1249
    memset(status, 0, sizeof(*status));
1250
1251
    type = GetFileType(h);
1252
    if (type == FILE_TYPE_UNKNOWN) {
1253
        DWORD error = GetLastError();
1254
        if (error != 0) {
1255
            errno = winerror_to_errno(error);
1256
            return -1;
1257
        }
1258
        /* else: valid but unknown file */
1259
    }
1260
1261
    if (type != FILE_TYPE_DISK) {
1262
        if (type == FILE_TYPE_CHAR)
1263
            status->st_mode = _S_IFCHR;
1264
        else if (type == FILE_TYPE_PIPE)
1265
            status->st_mode = _S_IFIFO;
1266
        return 0;
1267
    }
1268
1269
    if (!GetFileInformationByHandle(h, &info) ||
1270
        !GetFileInformationByHandleEx(h, FileBasicInfo, &basicInfo, sizeof(basicInfo))) {
1271
        /* The Win32 error is already set, but we also set errno for
1272
           callers who expect it */
1273
        errno = winerror_to_errno(GetLastError());
1274
        return -1;
1275
    }
1276
1277
    if (!GetFileInformationByHandleEx(h, FileIdInfo, &idInfo, sizeof(idInfo))) {
1278
        /* Failed to get FileIdInfo, so do not pass it along */
1279
        pIdInfo = NULL;
1280
    }
1281
1282
    _Py_attribute_data_to_stat(&info, 0, &basicInfo, pIdInfo, status);
1283
    return 0;
1284
#else
1285
1.35k
    return fstat(fd, status);
1286
1.35k
#endif
1287
1.35k
}
1288
1289
/* Return information about a file.
1290
1291
   On POSIX, use fstat().
1292
1293
   On Windows, use GetFileType() and GetFileInformationByHandle() which support
1294
   files larger than 2 GiB.  fstat() may fail with EOVERFLOW on files larger
1295
   than 2 GiB because the file size type is a signed 32-bit integer: see issue
1296
   #23152.
1297
1298
   Raise an exception and return -1 on error. On Windows, set the last Windows
1299
   error on error. On POSIX, set errno on error. Fill status and return 0 on
1300
   success.
1301
1302
   Release the GIL to call GetFileType() and GetFileInformationByHandle(), or
1303
   to call fstat(). The caller must hold the GIL. */
1304
int
1305
_Py_fstat(int fd, struct _Py_stat_struct *status)
1306
0
{
1307
0
    int res;
1308
1309
0
    _Py_AssertHoldsTstate();
1310
1311
0
    Py_BEGIN_ALLOW_THREADS
1312
0
    res = _Py_fstat_noraise(fd, status);
1313
0
    Py_END_ALLOW_THREADS
1314
1315
0
    if (res != 0) {
1316
#ifdef MS_WINDOWS
1317
        PyErr_SetFromWindowsErr(0);
1318
#else
1319
0
        PyErr_SetFromErrno(PyExc_OSError);
1320
0
#endif
1321
0
        return -1;
1322
0
    }
1323
0
    return 0;
1324
0
}
1325
1326
/* Like _Py_stat() but with a raw filename. */
1327
int
1328
_Py_wstat(const wchar_t* path, struct stat *buf)
1329
48
{
1330
48
    int err;
1331
#ifdef MS_WINDOWS
1332
    struct _stat wstatbuf;
1333
    err = _wstat(path, &wstatbuf);
1334
    if (!err) {
1335
        buf->st_mode = wstatbuf.st_mode;
1336
    }
1337
#else
1338
48
    char *fname;
1339
48
    fname = _Py_EncodeLocaleRaw(path, NULL);
1340
48
    if (fname == NULL) {
1341
0
        errno = EINVAL;
1342
0
        return -1;
1343
0
    }
1344
48
    err = stat(fname, buf);
1345
48
    PyMem_RawFree(fname);
1346
48
#endif
1347
48
    return err;
1348
48
}
1349
1350
1351
/* Call _wstat() on Windows, or encode the path to the filesystem encoding and
1352
   call stat() otherwise. Only fill st_mode attribute on Windows.
1353
1354
   Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was
1355
   raised. */
1356
1357
int
1358
_Py_stat(PyObject *path, struct stat *statbuf)
1359
0
{
1360
#ifdef MS_WINDOWS
1361
    int err;
1362
1363
    wchar_t *wpath = PyUnicode_AsWideCharString(path, NULL);
1364
    if (wpath == NULL)
1365
        return -2;
1366
1367
    err = _Py_wstat(wpath, statbuf);
1368
    PyMem_Free(wpath);
1369
    return err;
1370
#else
1371
0
    int ret;
1372
0
    PyObject *bytes;
1373
0
    char *cpath;
1374
1375
0
    bytes = PyUnicode_EncodeFSDefault(path);
1376
0
    if (bytes == NULL)
1377
0
        return -2;
1378
1379
    /* check for embedded null bytes */
1380
0
    if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) {
1381
0
        Py_DECREF(bytes);
1382
0
        return -2;
1383
0
    }
1384
1385
0
    ret = stat(cpath, statbuf);
1386
0
    Py_DECREF(bytes);
1387
0
    return ret;
1388
0
#endif
1389
0
}
1390
1391
#ifdef MS_WINDOWS
1392
// For some Windows API partitions, SetHandleInformation() is declared
1393
// but none of the handle flags are defined.
1394
#ifndef HANDLE_FLAG_INHERIT
1395
#define HANDLE_FLAG_INHERIT 0x00000001
1396
#endif
1397
#endif
1398
1399
/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1400
static int
1401
get_inheritable(int fd, int raise)
1402
16
{
1403
#ifdef MS_WINDOWS
1404
    HANDLE handle;
1405
    DWORD flags;
1406
1407
    handle = _Py_get_osfhandle_noraise(fd);
1408
    if (handle == INVALID_HANDLE_VALUE) {
1409
        if (raise)
1410
            PyErr_SetFromErrno(PyExc_OSError);
1411
        return -1;
1412
    }
1413
1414
    if (!GetHandleInformation(handle, &flags)) {
1415
        if (raise)
1416
            PyErr_SetFromWindowsErr(0);
1417
        return -1;
1418
    }
1419
1420
    return (flags & HANDLE_FLAG_INHERIT);
1421
#else
1422
16
    int flags;
1423
1424
16
    flags = fcntl(fd, F_GETFD, 0);
1425
16
    if (flags == -1) {
1426
0
        if (raise)
1427
0
            PyErr_SetFromErrno(PyExc_OSError);
1428
0
        return -1;
1429
0
    }
1430
16
    return !(flags & FD_CLOEXEC);
1431
16
#endif
1432
16
}
1433
1434
/* Get the inheritable flag of the specified file descriptor.
1435
   Return 1 if the file descriptor can be inherited, 0 if it cannot,
1436
   raise an exception and return -1 on error. */
1437
int
1438
_Py_get_inheritable(int fd)
1439
0
{
1440
0
    return get_inheritable(fd, 1);
1441
0
}
1442
1443
1444
/* This function MUST be kept async-signal-safe on POSIX when raise=0. */
1445
static int
1446
set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works)
1447
1.29k
{
1448
#ifdef MS_WINDOWS
1449
    HANDLE handle;
1450
    DWORD flags;
1451
#else
1452
1.29k
#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1453
1.29k
    static int ioctl_works = -1;
1454
1.29k
    int request;
1455
1.29k
    int err;
1456
1.29k
#endif
1457
1.29k
    int flags, new_flags;
1458
1.29k
    int res;
1459
1.29k
#endif
1460
1461
    /* atomic_flag_works can only be used to make the file descriptor
1462
       non-inheritable */
1463
1.29k
    assert(!(atomic_flag_works != NULL && inheritable));
1464
1465
1.29k
    if (atomic_flag_works != NULL && !inheritable) {
1466
1.29k
        if (_Py_atomic_load_int_relaxed(atomic_flag_works) == -1) {
1467
16
            int isInheritable = get_inheritable(fd, raise);
1468
16
            if (isInheritable == -1)
1469
0
                return -1;
1470
16
            _Py_atomic_store_int_relaxed(atomic_flag_works, !isInheritable);
1471
16
        }
1472
1473
1.29k
        if (_Py_atomic_load_int_relaxed(atomic_flag_works))
1474
1.29k
            return 0;
1475
1.29k
    }
1476
1477
#ifdef MS_WINDOWS
1478
    handle = _Py_get_osfhandle_noraise(fd);
1479
    if (handle == INVALID_HANDLE_VALUE) {
1480
        if (raise)
1481
            PyErr_SetFromErrno(PyExc_OSError);
1482
        return -1;
1483
    }
1484
1485
    if (inheritable)
1486
        flags = HANDLE_FLAG_INHERIT;
1487
    else
1488
        flags = 0;
1489
1490
    if (!SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) {
1491
        if (raise)
1492
            PyErr_SetFromWindowsErr(0);
1493
        return -1;
1494
    }
1495
    return 0;
1496
1497
#else
1498
1499
0
#if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX)
1500
0
    if (raise != 0 && _Py_atomic_load_int_relaxed(&ioctl_works) != 0) {
1501
        /* fast-path: ioctl() only requires one syscall */
1502
        /* caveat: raise=0 is an indicator that we must be async-signal-safe
1503
         * thus avoid using ioctl() so we skip the fast-path. */
1504
0
        if (inheritable)
1505
0
            request = FIONCLEX;
1506
0
        else
1507
0
            request = FIOCLEX;
1508
0
        err = ioctl(fd, request, NULL);
1509
0
        if (!err) {
1510
0
            if (_Py_atomic_load_int_relaxed(&ioctl_works) == -1) {
1511
0
                _Py_atomic_store_int_relaxed(&ioctl_works, 1);
1512
0
            }
1513
0
            return 0;
1514
0
        }
1515
1516
0
#ifdef O_PATH
1517
0
        if (errno == EBADF) {
1518
            // bpo-44849: On Linux and FreeBSD, ioctl(FIOCLEX) fails with EBADF
1519
            // on O_PATH file descriptors. Fall through to the fcntl()
1520
            // implementation.
1521
0
        }
1522
0
        else
1523
0
#endif
1524
0
        if (errno != ENOTTY && errno != EACCES) {
1525
0
            if (raise)
1526
0
                PyErr_SetFromErrno(PyExc_OSError);
1527
0
            return -1;
1528
0
        }
1529
0
        else {
1530
            /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for
1531
               device". The ioctl is declared but not supported by the kernel.
1532
               Remember that ioctl() doesn't work. It is the case on
1533
               Illumos-based OS for example.
1534
1535
               Issue #27057: When SELinux policy disallows ioctl it will fail
1536
               with EACCES. While FIOCLEX is safe operation it may be
1537
               unavailable because ioctl was denied altogether.
1538
               This can be the case on Android. */
1539
0
            _Py_atomic_store_int_relaxed(&ioctl_works, 0);
1540
0
        }
1541
        /* fallback to fcntl() if ioctl() does not work */
1542
0
    }
1543
0
#endif
1544
1545
    /* slow-path: fcntl() requires two syscalls */
1546
0
    flags = fcntl(fd, F_GETFD);
1547
0
    if (flags < 0) {
1548
0
        if (raise)
1549
0
            PyErr_SetFromErrno(PyExc_OSError);
1550
0
        return -1;
1551
0
    }
1552
1553
0
    if (inheritable) {
1554
0
        new_flags = flags & ~FD_CLOEXEC;
1555
0
    }
1556
0
    else {
1557
0
        new_flags = flags | FD_CLOEXEC;
1558
0
    }
1559
1560
0
    if (new_flags == flags) {
1561
        /* FD_CLOEXEC flag already set/cleared: nothing to do */
1562
0
        return 0;
1563
0
    }
1564
1565
0
    res = fcntl(fd, F_SETFD, new_flags);
1566
0
    if (res < 0) {
1567
0
        if (raise)
1568
0
            PyErr_SetFromErrno(PyExc_OSError);
1569
0
        return -1;
1570
0
    }
1571
0
    return 0;
1572
0
#endif
1573
0
}
1574
1575
/* Make the file descriptor non-inheritable.
1576
   Return 0 on success, set errno and return -1 on error. */
1577
static int
1578
make_non_inheritable(int fd)
1579
0
{
1580
0
    return set_inheritable(fd, 0, 0, NULL);
1581
0
}
1582
1583
/* Set the inheritable flag of the specified file descriptor.
1584
   On success: return 0, on error: raise an exception and return -1.
1585
1586
   If atomic_flag_works is not NULL:
1587
1588
    * if *atomic_flag_works==-1, check if the inheritable is set on the file
1589
      descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and
1590
      set the inheritable flag
1591
    * if *atomic_flag_works==1: do nothing
1592
    * if *atomic_flag_works==0: set inheritable flag to False
1593
1594
   Set atomic_flag_works to NULL if no atomic flag was used to create the
1595
   file descriptor.
1596
1597
   atomic_flag_works can only be used to make a file descriptor
1598
   non-inheritable: atomic_flag_works must be NULL if inheritable=1. */
1599
int
1600
_Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works)
1601
1.29k
{
1602
1.29k
    return set_inheritable(fd, inheritable, 1, atomic_flag_works);
1603
1.29k
}
1604
1605
/* Same as _Py_set_inheritable() but on error, set errno and
1606
   don't raise an exception.
1607
   This function is async-signal-safe. */
1608
int
1609
_Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works)
1610
0
{
1611
0
    return set_inheritable(fd, inheritable, 0, atomic_flag_works);
1612
0
}
1613
1614
static int
1615
_Py_open_impl(const char *pathname, int flags, int gil_held)
1616
0
{
1617
0
    int fd;
1618
0
    int async_err = 0;
1619
0
#ifndef MS_WINDOWS
1620
0
    int *atomic_flag_works;
1621
0
#endif
1622
1623
#ifdef MS_WINDOWS
1624
    flags |= O_NOINHERIT;
1625
#elif defined(O_CLOEXEC)
1626
    atomic_flag_works = &_Py_open_cloexec_works;
1627
0
    flags |= O_CLOEXEC;
1628
#else
1629
    atomic_flag_works = NULL;
1630
#endif
1631
1632
0
    if (gil_held) {
1633
0
        PyObject *pathname_obj = PyUnicode_DecodeFSDefault(pathname);
1634
0
        if (pathname_obj == NULL) {
1635
0
            return -1;
1636
0
        }
1637
0
        if (PySys_Audit("open", "OOi", pathname_obj, Py_None, flags) < 0) {
1638
0
            Py_DECREF(pathname_obj);
1639
0
            return -1;
1640
0
        }
1641
1642
0
        do {
1643
0
            Py_BEGIN_ALLOW_THREADS
1644
0
            fd = open(pathname, flags);
1645
0
            Py_END_ALLOW_THREADS
1646
0
        } while (fd < 0
1647
0
                 && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1648
0
        if (async_err) {
1649
0
            Py_DECREF(pathname_obj);
1650
0
            return -1;
1651
0
        }
1652
0
        if (fd < 0) {
1653
0
            PyErr_SetFromErrnoWithFilenameObjects(PyExc_OSError, pathname_obj, NULL);
1654
0
            Py_DECREF(pathname_obj);
1655
0
            return -1;
1656
0
        }
1657
0
        Py_DECREF(pathname_obj);
1658
0
    }
1659
0
    else {
1660
0
        fd = open(pathname, flags);
1661
0
        if (fd < 0)
1662
0
            return -1;
1663
0
    }
1664
1665
0
#ifndef MS_WINDOWS
1666
0
    if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) {
1667
0
        close(fd);
1668
0
        return -1;
1669
0
    }
1670
0
#endif
1671
1672
0
    return fd;
1673
0
}
1674
1675
/* Open a file with the specified flags (wrapper to open() function).
1676
   Return a file descriptor on success. Raise an exception and return -1 on
1677
   error.
1678
1679
   The file descriptor is created non-inheritable.
1680
1681
   When interrupted by a signal (open() fails with EINTR), retry the syscall,
1682
   except if the Python signal handler raises an exception.
1683
1684
   Release the GIL to call open(). The caller must hold the GIL. */
1685
int
1686
_Py_open(const char *pathname, int flags)
1687
0
{
1688
    /* _Py_open() must be called with the GIL held. */
1689
0
    _Py_AssertHoldsTstate();
1690
0
    return _Py_open_impl(pathname, flags, 1);
1691
0
}
1692
1693
/* Open a file with the specified flags (wrapper to open() function).
1694
   Return a file descriptor on success. Set errno and return -1 on error.
1695
1696
   The file descriptor is created non-inheritable.
1697
1698
   If interrupted by a signal, fail with EINTR. */
1699
int
1700
_Py_open_noraise(const char *pathname, int flags)
1701
0
{
1702
0
    return _Py_open_impl(pathname, flags, 0);
1703
0
}
1704
1705
/* Open a file. Use _wfopen() on Windows, encode the path to the locale
1706
   encoding and use fopen() otherwise.
1707
1708
   The file descriptor is created non-inheritable.
1709
1710
   If interrupted by a signal, fail with EINTR. */
1711
FILE *
1712
_Py_wfopen(const wchar_t *path, const wchar_t *mode)
1713
80
{
1714
80
    FILE *f;
1715
80
    if (PySys_Audit("open", "uui", path, mode, 0) < 0) {
1716
0
        return NULL;
1717
0
    }
1718
80
#ifndef MS_WINDOWS
1719
80
    char *cpath;
1720
80
    char cmode[10];
1721
80
    size_t r;
1722
80
    r = wcstombs(cmode, mode, 10);
1723
80
    if (r == DECODE_ERROR || r >= 10) {
1724
0
        errno = EINVAL;
1725
0
        return NULL;
1726
0
    }
1727
80
    cpath = _Py_EncodeLocaleRaw(path, NULL);
1728
80
    if (cpath == NULL) {
1729
0
        return NULL;
1730
0
    }
1731
80
    f = fopen(cpath, cmode);
1732
80
    PyMem_RawFree(cpath);
1733
#else
1734
    f = _wfopen(path, mode);
1735
#endif
1736
80
    if (f == NULL)
1737
80
        return NULL;
1738
0
    if (make_non_inheritable(fileno(f)) < 0) {
1739
0
        fclose(f);
1740
0
        return NULL;
1741
0
    }
1742
0
    return f;
1743
0
}
1744
1745
1746
/* Open a file.
1747
1748
   On Windows, if 'path' is a Unicode string, call _wfopen(). Otherwise, encode
1749
   the path to the filesystem encoding and call fopen().
1750
1751
   Return the new file object on success. Raise an exception and return NULL
1752
   on error.
1753
1754
   The file descriptor is created non-inheritable.
1755
1756
   When interrupted by a signal (open() fails with EINTR), retry the syscall,
1757
   except if the Python signal handler raises an exception.
1758
1759
   Release the GIL to call _wfopen() or fopen(). The caller must hold
1760
   the GIL. */
1761
FILE*
1762
Py_fopen(PyObject *path, const char *mode)
1763
11.4k
{
1764
11.4k
    _Py_AssertHoldsTstate();
1765
1766
11.4k
    if (PySys_Audit("open", "Osi", path, mode, 0) < 0) {
1767
0
        return NULL;
1768
0
    }
1769
1770
11.4k
    FILE *f;
1771
11.4k
    int async_err = 0;
1772
11.4k
    int saved_errno;
1773
#ifdef MS_WINDOWS
1774
    PyObject *unicode;
1775
    if (!PyUnicode_FSDecoder(path, &unicode)) {
1776
        return NULL;
1777
    }
1778
1779
    wchar_t *wpath = PyUnicode_AsWideCharString(unicode, NULL);
1780
    Py_DECREF(unicode);
1781
    if (wpath == NULL) {
1782
        return NULL;
1783
    }
1784
1785
    wchar_t wmode[10];
1786
    int usize = MultiByteToWideChar(CP_ACP, 0, mode, -1,
1787
                                    wmode, Py_ARRAY_LENGTH(wmode));
1788
    if (usize == 0) {
1789
        PyErr_SetFromWindowsErr(0);
1790
        PyMem_Free(wpath);
1791
        return NULL;
1792
    }
1793
1794
    do {
1795
        Py_BEGIN_ALLOW_THREADS
1796
        _Py_BEGIN_SUPPRESS_IPH
1797
        f = _wfopen(wpath, wmode);
1798
        _Py_END_SUPPRESS_IPH
1799
        Py_END_ALLOW_THREADS
1800
    } while (f == NULL
1801
             && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1802
    saved_errno = errno;
1803
    PyMem_Free(wpath);
1804
#else
1805
11.4k
    PyObject *bytes;
1806
11.4k
    if (!PyUnicode_FSConverter(path, &bytes)) {
1807
0
        return NULL;
1808
0
    }
1809
11.4k
    const char *path_bytes = PyBytes_AS_STRING(bytes);
1810
1811
11.4k
    do {
1812
11.4k
        Py_BEGIN_ALLOW_THREADS
1813
11.4k
        f = fopen(path_bytes, mode);
1814
11.4k
        Py_END_ALLOW_THREADS
1815
11.4k
    } while (f == NULL
1816
11.4k
             && errno == EINTR && !(async_err = PyErr_CheckSignals()));
1817
11.4k
    saved_errno = errno;
1818
11.4k
    Py_DECREF(bytes);
1819
11.4k
#endif
1820
1821
11.4k
    if (async_err) {
1822
0
        return NULL;
1823
0
    }
1824
1825
11.4k
    if (f == NULL) {
1826
11.4k
        errno = saved_errno;
1827
11.4k
        PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path);
1828
11.4k
        return NULL;
1829
11.4k
    }
1830
1831
0
    if (set_inheritable(fileno(f), 0, 1, NULL) < 0) {
1832
0
        fclose(f);
1833
0
        return NULL;
1834
0
    }
1835
0
    return f;
1836
0
}
1837
1838
1839
// Call fclose().
1840
//
1841
// On Windows, files opened by Py_fopen() in the Python DLL must be closed by
1842
// the Python DLL to use the same C runtime version. Otherwise, calling
1843
// fclose() directly can cause undefined behavior.
1844
int
1845
Py_fclose(FILE *file)
1846
0
{
1847
0
    return fclose(file);
1848
0
}
1849
1850
1851
/* Read count bytes from fd into buf.
1852
1853
   On success, return the number of read bytes, it can be lower than count.
1854
   If the current file offset is at or past the end of file, no bytes are read,
1855
   and read() returns zero.
1856
1857
   On error, raise an exception, set errno and return -1.
1858
1859
   When interrupted by a signal (read() fails with EINTR), retry the syscall.
1860
   If the Python signal handler raises an exception, the function returns -1
1861
   (the syscall is not retried).
1862
1863
   Release the GIL to call read(). The caller must hold the GIL. */
1864
Py_ssize_t
1865
_Py_read(int fd, void *buf, size_t count)
1866
2.09k
{
1867
2.09k
    Py_ssize_t n;
1868
2.09k
    int err;
1869
2.09k
    int async_err = 0;
1870
1871
2.09k
    _Py_AssertHoldsTstate();
1872
1873
    /* _Py_read() must not be called with an exception set, otherwise the
1874
     * caller may think that read() was interrupted by a signal and the signal
1875
     * handler raised an exception. */
1876
2.09k
    assert(!PyErr_Occurred());
1877
1878
2.09k
    if (count > _PY_READ_MAX) {
1879
0
        count = _PY_READ_MAX;
1880
0
    }
1881
1882
2.09k
    _Py_BEGIN_SUPPRESS_IPH
1883
2.09k
    do {
1884
2.09k
        Py_BEGIN_ALLOW_THREADS
1885
2.09k
        errno = 0;
1886
#ifdef MS_WINDOWS
1887
        _doserrno = 0;
1888
        n = read(fd, buf, (int)count);
1889
        // read() on a non-blocking empty pipe fails with EINVAL, which is
1890
        // mapped from the Windows error code ERROR_NO_DATA.
1891
        if (n < 0 && errno == EINVAL) {
1892
            if (_doserrno == ERROR_NO_DATA) {
1893
                errno = EAGAIN;
1894
            }
1895
        }
1896
#else
1897
2.09k
        n = read(fd, buf, count);
1898
2.09k
#endif
1899
        /* save/restore errno because PyErr_CheckSignals()
1900
         * and PyErr_SetFromErrno() can modify it */
1901
2.09k
        err = errno;
1902
2.09k
        Py_END_ALLOW_THREADS
1903
2.09k
    } while (n < 0 && err == EINTR &&
1904
0
            !(async_err = PyErr_CheckSignals()));
1905
2.09k
    _Py_END_SUPPRESS_IPH
1906
1907
2.09k
    if (async_err) {
1908
        /* read() was interrupted by a signal (failed with EINTR)
1909
         * and the Python signal handler raised an exception */
1910
0
        errno = err;
1911
0
        assert(errno == EINTR && PyErr_Occurred());
1912
0
        return -1;
1913
0
    }
1914
2.09k
    if (n < 0) {
1915
0
        PyErr_SetFromErrno(PyExc_OSError);
1916
0
        errno = err;
1917
0
        return -1;
1918
0
    }
1919
1920
2.09k
    return n;
1921
2.09k
}
1922
1923
static Py_ssize_t
1924
_Py_write_impl(int fd, const void *buf, size_t count, int gil_held)
1925
244
{
1926
244
    Py_ssize_t n;
1927
244
    int err;
1928
244
    int async_err = 0;
1929
1930
244
    _Py_BEGIN_SUPPRESS_IPH
1931
#ifdef MS_WINDOWS
1932
    if (count > 32767) {
1933
        /* Issue #11395: the Windows console returns an error (12: not
1934
           enough space error) on writing into stdout if stdout mode is
1935
           binary and the length is greater than 66,000 bytes (or less,
1936
           depending on heap usage). */
1937
        if (gil_held) {
1938
            Py_BEGIN_ALLOW_THREADS
1939
            if (isatty(fd)) {
1940
                count = 32767;
1941
            }
1942
            Py_END_ALLOW_THREADS
1943
        } else {
1944
            if (isatty(fd)) {
1945
                count = 32767;
1946
            }
1947
        }
1948
    }
1949
1950
#endif
1951
244
    if (count > _PY_WRITE_MAX) {
1952
0
        count = _PY_WRITE_MAX;
1953
0
    }
1954
1955
244
    if (gil_held) {
1956
244
        do {
1957
244
            Py_BEGIN_ALLOW_THREADS
1958
244
            errno = 0;
1959
#ifdef MS_WINDOWS
1960
            // write() on a non-blocking pipe fails with ENOSPC on Windows if
1961
            // the pipe lacks available space for the entire buffer.
1962
            int c = (int)count;
1963
            do {
1964
                _doserrno = 0;
1965
                n = write(fd, buf, c);
1966
                if (n >= 0 || errno != ENOSPC || _doserrno != 0) {
1967
                    break;
1968
                }
1969
                errno = EAGAIN;
1970
                c /= 2;
1971
            } while (c > 0);
1972
#else
1973
244
            n = write(fd, buf, count);
1974
244
#endif
1975
            /* save/restore errno because PyErr_CheckSignals()
1976
             * and PyErr_SetFromErrno() can modify it */
1977
244
            err = errno;
1978
244
            Py_END_ALLOW_THREADS
1979
244
        } while (n < 0 && err == EINTR &&
1980
0
                !(async_err = PyErr_CheckSignals()));
1981
244
    }
1982
0
    else {
1983
0
        do {
1984
0
            errno = 0;
1985
#ifdef MS_WINDOWS
1986
            // write() on a non-blocking pipe fails with ENOSPC on Windows if
1987
            // the pipe lacks available space for the entire buffer.
1988
            int c = (int)count;
1989
            do {
1990
                _doserrno = 0;
1991
                n = write(fd, buf, c);
1992
                if (n >= 0 || errno != ENOSPC || _doserrno != 0) {
1993
                    break;
1994
                }
1995
                errno = EAGAIN;
1996
                c /= 2;
1997
            } while (c > 0);
1998
#else
1999
0
            n = write(fd, buf, count);
2000
0
#endif
2001
0
            err = errno;
2002
0
        } while (n < 0 && err == EINTR);
2003
0
    }
2004
244
    _Py_END_SUPPRESS_IPH
2005
2006
244
    if (async_err) {
2007
        /* write() was interrupted by a signal (failed with EINTR)
2008
           and the Python signal handler raised an exception (if gil_held is
2009
           nonzero). */
2010
0
        errno = err;
2011
0
        assert(errno == EINTR && (!gil_held || PyErr_Occurred()));
2012
0
        return -1;
2013
0
    }
2014
244
    if (n < 0) {
2015
0
        if (gil_held)
2016
0
            PyErr_SetFromErrno(PyExc_OSError);
2017
0
        errno = err;
2018
0
        return -1;
2019
0
    }
2020
2021
244
    return n;
2022
244
}
2023
2024
/* Write count bytes of buf into fd.
2025
2026
   On success, return the number of written bytes, it can be lower than count
2027
   including 0. On error, raise an exception, set errno and return -1.
2028
2029
   When interrupted by a signal (write() fails with EINTR), retry the syscall.
2030
   If the Python signal handler raises an exception, the function returns -1
2031
   (the syscall is not retried).
2032
2033
   Release the GIL to call write(). The caller must hold the GIL. */
2034
Py_ssize_t
2035
_Py_write(int fd, const void *buf, size_t count)
2036
244
{
2037
244
    _Py_AssertHoldsTstate();
2038
2039
    /* _Py_write() must not be called with an exception set, otherwise the
2040
     * caller may think that write() was interrupted by a signal and the signal
2041
     * handler raised an exception. */
2042
244
    assert(!PyErr_Occurred());
2043
2044
244
    return _Py_write_impl(fd, buf, count, 1);
2045
244
}
2046
2047
/* Write count bytes of buf into fd.
2048
 *
2049
 * On success, return the number of written bytes, it can be lower than count
2050
 * including 0. On error, set errno and return -1.
2051
 *
2052
 * When interrupted by a signal (write() fails with EINTR), retry the syscall
2053
 * without calling the Python signal handler. */
2054
Py_ssize_t
2055
_Py_write_noraise(int fd, const void *buf, size_t count)
2056
0
{
2057
0
    return _Py_write_impl(fd, buf, count, 0);
2058
0
}
2059
2060
#ifdef HAVE_READLINK
2061
2062
/* Read value of symbolic link. Encode the path to the locale encoding, decode
2063
   the result from the locale encoding.
2064
2065
   Return -1 on encoding error, on readlink() error, if the internal buffer is
2066
   too short, on decoding error, or if 'buf' is too short. */
2067
int
2068
_Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t buflen)
2069
32
{
2070
32
    char *cpath;
2071
32
    char cbuf[MAXPATHLEN];
2072
32
    size_t cbuf_len = Py_ARRAY_LENGTH(cbuf);
2073
32
    wchar_t *wbuf;
2074
32
    Py_ssize_t res;
2075
32
    size_t r1;
2076
2077
32
    cpath = _Py_EncodeLocaleRaw(path, NULL);
2078
32
    if (cpath == NULL) {
2079
0
        errno = EINVAL;
2080
0
        return -1;
2081
0
    }
2082
32
    res = readlink(cpath, cbuf, cbuf_len);
2083
32
    PyMem_RawFree(cpath);
2084
32
    if (res == -1) {
2085
16
        return -1;
2086
16
    }
2087
16
    if ((size_t)res == cbuf_len) {
2088
0
        errno = EINVAL;
2089
0
        return -1;
2090
0
    }
2091
16
    cbuf[res] = '\0'; /* buf will be null terminated */
2092
16
    wbuf = Py_DecodeLocale(cbuf, &r1);
2093
16
    if (wbuf == NULL) {
2094
0
        errno = EINVAL;
2095
0
        return -1;
2096
0
    }
2097
    /* wbuf must have space to store the trailing NUL character */
2098
16
    if (buflen <= r1) {
2099
0
        PyMem_RawFree(wbuf);
2100
0
        errno = EINVAL;
2101
0
        return -1;
2102
0
    }
2103
16
    wcsncpy(buf, wbuf, buflen);
2104
16
    PyMem_RawFree(wbuf);
2105
16
    return (int)r1;
2106
16
}
2107
#endif
2108
2109
#ifdef HAVE_REALPATH
2110
2111
/* Return the canonicalized absolute pathname. Encode path to the locale
2112
   encoding, decode the result from the locale encoding.
2113
2114
   Return NULL on encoding error, realpath() error, decoding error
2115
   or if 'resolved_path' is too short. */
2116
wchar_t*
2117
_Py_wrealpath(const wchar_t *path,
2118
              wchar_t *resolved_path, size_t resolved_path_len)
2119
0
{
2120
0
    char *cpath;
2121
0
    char cresolved_path[MAXPATHLEN];
2122
0
    wchar_t *wresolved_path;
2123
0
    char *res;
2124
0
    size_t r;
2125
0
    cpath = _Py_EncodeLocaleRaw(path, NULL);
2126
0
    if (cpath == NULL) {
2127
0
        errno = EINVAL;
2128
0
        return NULL;
2129
0
    }
2130
0
    res = realpath(cpath, cresolved_path);
2131
0
    PyMem_RawFree(cpath);
2132
0
    if (res == NULL)
2133
0
        return NULL;
2134
2135
0
    wresolved_path = Py_DecodeLocale(cresolved_path, &r);
2136
0
    if (wresolved_path == NULL) {
2137
0
        errno = EINVAL;
2138
0
        return NULL;
2139
0
    }
2140
    /* wresolved_path must have space to store the trailing NUL character */
2141
0
    if (resolved_path_len <= r) {
2142
0
        PyMem_RawFree(wresolved_path);
2143
0
        errno = EINVAL;
2144
0
        return NULL;
2145
0
    }
2146
0
    wcsncpy(resolved_path, wresolved_path, resolved_path_len);
2147
0
    PyMem_RawFree(wresolved_path);
2148
0
    return resolved_path;
2149
0
}
2150
#endif
2151
2152
2153
int
2154
_Py_isabs(const wchar_t *path)
2155
304
{
2156
#ifdef MS_WINDOWS
2157
    const wchar_t *tail;
2158
    HRESULT hr = PathCchSkipRoot(path, &tail);
2159
    if (FAILED(hr) || path == tail) {
2160
        return 0;
2161
    }
2162
    if (tail == &path[1] && (path[0] == SEP || path[0] == ALTSEP)) {
2163
        // Exclude paths with leading SEP
2164
        return 0;
2165
    }
2166
    if (tail == &path[2] && path[1] == L':') {
2167
        // Exclude drive-relative paths (e.g. C:filename.ext)
2168
        return 0;
2169
    }
2170
    return 1;
2171
#else
2172
304
    return (path[0] == SEP);
2173
304
#endif
2174
304
}
2175
2176
2177
/* Get an absolute path.
2178
   On error (ex: fail to get the current directory), return -1.
2179
   On memory allocation failure, set *abspath_p to NULL and return 0.
2180
   On success, return a newly allocated to *abspath_p to and return 0.
2181
   The string must be freed by PyMem_RawFree(). */
2182
int
2183
_Py_abspath(const wchar_t *path, wchar_t **abspath_p)
2184
0
{
2185
0
    if (path[0] == '\0' || !wcscmp(path, L".")) {
2186
0
        wchar_t cwd[MAXPATHLEN + 1];
2187
0
        cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
2188
0
        if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
2189
            /* unable to get the current directory */
2190
0
            return -1;
2191
0
        }
2192
0
        *abspath_p = _PyMem_RawWcsdup(cwd);
2193
0
        return 0;
2194
0
    }
2195
2196
0
    if (_Py_isabs(path)) {
2197
0
        *abspath_p = _PyMem_RawWcsdup(path);
2198
0
        return 0;
2199
0
    }
2200
2201
#ifdef MS_WINDOWS
2202
    return _PyOS_getfullpathname(path, abspath_p);
2203
#else
2204
0
    wchar_t cwd[MAXPATHLEN + 1];
2205
0
    cwd[Py_ARRAY_LENGTH(cwd) - 1] = 0;
2206
0
    if (!_Py_wgetcwd(cwd, Py_ARRAY_LENGTH(cwd) - 1)) {
2207
        /* unable to get the current directory */
2208
0
        return -1;
2209
0
    }
2210
2211
0
    size_t cwd_len = wcslen(cwd);
2212
0
    size_t path_len = wcslen(path);
2213
0
    size_t len = cwd_len + 1 + path_len + 1;
2214
0
    if (len <= (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
2215
0
        *abspath_p = PyMem_RawMalloc(len * sizeof(wchar_t));
2216
0
    }
2217
0
    else {
2218
0
        *abspath_p = NULL;
2219
0
    }
2220
0
    if (*abspath_p == NULL) {
2221
0
        return 0;
2222
0
    }
2223
2224
0
    wchar_t *abspath = *abspath_p;
2225
0
    memcpy(abspath, cwd, cwd_len * sizeof(wchar_t));
2226
0
    abspath += cwd_len;
2227
2228
0
    *abspath = (wchar_t)SEP;
2229
0
    abspath++;
2230
2231
0
    memcpy(abspath, path, path_len * sizeof(wchar_t));
2232
0
    abspath += path_len;
2233
2234
0
    *abspath = 0;
2235
0
    return 0;
2236
0
#endif
2237
0
}
2238
2239
// The Windows Games API family implements the PathCch* APIs in the Xbox OS,
2240
// but does not expose them yet. Load them dynamically until
2241
// 1) they are officially exposed
2242
// 2) we stop supporting older versions of the GDK which do not expose them
2243
#if defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP)
2244
HRESULT
2245
PathCchSkipRoot(const wchar_t *path, const wchar_t **rootEnd)
2246
{
2247
    static int initialized = 0;
2248
    typedef HRESULT(__stdcall *PPathCchSkipRoot) (PCWSTR pszPath,
2249
                                                  PCWSTR *ppszRootEnd);
2250
    static PPathCchSkipRoot _PathCchSkipRoot;
2251
2252
    if (initialized == 0) {
2253
        HMODULE pathapi = LoadLibraryExW(L"api-ms-win-core-path-l1-1-0.dll", NULL,
2254
                                         LOAD_LIBRARY_SEARCH_SYSTEM32);
2255
        if (pathapi) {
2256
            _PathCchSkipRoot = (PPathCchSkipRoot)GetProcAddress(
2257
                pathapi, "PathCchSkipRoot");
2258
        }
2259
        else {
2260
            _PathCchSkipRoot = NULL;
2261
        }
2262
        initialized = 1;
2263
    }
2264
2265
    if (!_PathCchSkipRoot) {
2266
        return E_NOINTERFACE;
2267
    }
2268
2269
    return _PathCchSkipRoot(path, rootEnd);
2270
}
2271
2272
static HRESULT
2273
PathCchCombineEx(wchar_t *buffer, size_t bufsize, const wchar_t *dirname,
2274
                 const wchar_t *relfile, unsigned long flags)
2275
{
2276
    static int initialized = 0;
2277
    typedef HRESULT(__stdcall *PPathCchCombineEx) (PWSTR pszPathOut,
2278
                                                   size_t cchPathOut,
2279
                                                   PCWSTR pszPathIn,
2280
                                                   PCWSTR pszMore,
2281
                                                   unsigned long dwFlags);
2282
    static PPathCchCombineEx _PathCchCombineEx;
2283
2284
    if (initialized == 0) {
2285
        HMODULE pathapi = LoadLibraryExW(L"api-ms-win-core-path-l1-1-0.dll", NULL,
2286
                                         LOAD_LIBRARY_SEARCH_SYSTEM32);
2287
        if (pathapi) {
2288
            _PathCchCombineEx = (PPathCchCombineEx)GetProcAddress(
2289
                pathapi, "PathCchCombineEx");
2290
        }
2291
        else {
2292
            _PathCchCombineEx = NULL;
2293
        }
2294
        initialized = 1;
2295
    }
2296
2297
    if (!_PathCchCombineEx) {
2298
        return E_NOINTERFACE;
2299
    }
2300
2301
    return _PathCchCombineEx(buffer, bufsize, dirname, relfile, flags);
2302
}
2303
2304
#endif /* defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) */
2305
2306
void
2307
_Py_skiproot(const wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize,
2308
             Py_ssize_t *rootsize)
2309
224
{
2310
224
    assert(drvsize);
2311
224
    assert(rootsize);
2312
224
#ifndef MS_WINDOWS
2313
672
#define IS_SEP(x) (*(x) == SEP)
2314
224
    *drvsize = 0;
2315
224
    if (!IS_SEP(&path[0])) {
2316
        // Relative path, e.g.: 'foo'
2317
0
        *rootsize = 0;
2318
0
    }
2319
224
    else if (!IS_SEP(&path[1]) || IS_SEP(&path[2])) {
2320
        // Absolute path, e.g.: '/foo', '///foo', '////foo', etc.
2321
224
        *rootsize = 1;
2322
224
    }
2323
0
    else {
2324
        // Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see
2325
        // https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13
2326
0
        *rootsize = 2;
2327
0
    }
2328
224
#undef IS_SEP
2329
#else
2330
    const wchar_t *pEnd = size >= 0 ? &path[size] : NULL;
2331
#define IS_END(x) (pEnd ? (x) == pEnd : !*(x))
2332
#define IS_SEP(x) (*(x) == SEP || *(x) == ALTSEP)
2333
#define SEP_OR_END(x) (IS_SEP(x) || IS_END(x))
2334
    if (IS_SEP(&path[0])) {
2335
        if (IS_SEP(&path[1])) {
2336
            // Device drives, e.g. \\.\device or \\?\device
2337
            // UNC drives, e.g. \\server\share or \\?\UNC\server\share
2338
            Py_ssize_t idx;
2339
            if (path[2] == L'?' && IS_SEP(&path[3]) &&
2340
                (path[4] == L'U' || path[4] == L'u') &&
2341
                (path[5] == L'N' || path[5] == L'n') &&
2342
                (path[6] == L'C' || path[6] == L'c') &&
2343
                IS_SEP(&path[7]))
2344
            {
2345
                idx = 8;
2346
            }
2347
            else {
2348
                idx = 2;
2349
            }
2350
            while (!SEP_OR_END(&path[idx])) {
2351
                idx++;
2352
            }
2353
            if (IS_END(&path[idx])) {
2354
                *drvsize = idx;
2355
                *rootsize = 0;
2356
            }
2357
            else {
2358
                idx++;
2359
                while (!SEP_OR_END(&path[idx])) {
2360
                    idx++;
2361
                }
2362
                *drvsize = idx;
2363
                if (IS_END(&path[idx])) {
2364
                    *rootsize = 0;
2365
                }
2366
                else {
2367
                    *rootsize = 1;
2368
                }
2369
            }
2370
        }
2371
        else {
2372
            // Relative path with root, e.g. \Windows
2373
            *drvsize = 0;
2374
            *rootsize = 1;
2375
        }
2376
    }
2377
    else if (!IS_END(&path[0]) && path[1] == L':') {
2378
        *drvsize = 2;
2379
        if (IS_SEP(&path[2])) {
2380
            // Absolute drive-letter path, e.g. X:\Windows
2381
            *rootsize = 1;
2382
        }
2383
        else {
2384
            // Relative path with drive, e.g. X:Windows
2385
            *rootsize = 0;
2386
        }
2387
    }
2388
    else {
2389
        // Relative path, e.g. Windows
2390
        *drvsize = 0;
2391
        *rootsize = 0;
2392
    }
2393
#undef SEP_OR_END
2394
#undef IS_SEP
2395
#undef IS_END
2396
#endif
2397
224
}
2398
2399
// The caller must ensure "buffer" is big enough.
2400
static int
2401
join_relfile(wchar_t *buffer, size_t bufsize,
2402
             const wchar_t *dirname, const wchar_t *relfile)
2403
144
{
2404
#ifdef MS_WINDOWS
2405
    if (FAILED(PathCchCombineEx(buffer, bufsize, dirname, relfile,
2406
        PATHCCH_ALLOW_LONG_PATHS))) {
2407
        return -1;
2408
    }
2409
#else
2410
144
    assert(!_Py_isabs(relfile));
2411
144
    size_t dirlen = wcslen(dirname);
2412
144
    size_t rellen = wcslen(relfile);
2413
144
    size_t maxlen = bufsize - 1;
2414
144
    if (maxlen > MAXPATHLEN || dirlen >= maxlen || rellen >= maxlen - dirlen) {
2415
0
        return -1;
2416
0
    }
2417
144
    if (dirlen == 0) {
2418
        // We do not add a leading separator.
2419
0
        wcscpy(buffer, relfile);
2420
0
    }
2421
144
    else {
2422
144
        if (dirname != buffer) {
2423
0
            wcscpy(buffer, dirname);
2424
0
        }
2425
144
        size_t relstart = dirlen;
2426
144
        if (dirlen > 1 && dirname[dirlen - 1] != SEP) {
2427
144
            buffer[dirlen] = SEP;
2428
144
            relstart += 1;
2429
144
        }
2430
144
        wcscpy(&buffer[relstart], relfile);
2431
144
    }
2432
144
#endif
2433
144
    return 0;
2434
144
}
2435
2436
/* Join the two paths together, like os.path.join().  Return NULL
2437
   if memory could not be allocated.  The caller is responsible
2438
   for calling PyMem_RawFree() on the result. */
2439
wchar_t *
2440
_Py_join_relfile(const wchar_t *dirname, const wchar_t *relfile)
2441
0
{
2442
0
    assert(dirname != NULL && relfile != NULL);
2443
0
#ifndef MS_WINDOWS
2444
0
    assert(!_Py_isabs(relfile));
2445
0
#endif
2446
0
    size_t maxlen = wcslen(dirname) + 1 + wcslen(relfile);
2447
0
    size_t bufsize = maxlen + 1;
2448
0
    wchar_t *filename = PyMem_RawMalloc(bufsize * sizeof(wchar_t));
2449
0
    if (filename == NULL) {
2450
0
        return NULL;
2451
0
    }
2452
0
    assert(wcslen(dirname) < MAXPATHLEN);
2453
0
    assert(wcslen(relfile) < MAXPATHLEN - wcslen(dirname));
2454
0
    if (join_relfile(filename, bufsize, dirname, relfile) < 0) {
2455
0
        PyMem_RawFree(filename);
2456
0
        return NULL;
2457
0
    }
2458
0
    return filename;
2459
0
}
2460
2461
/* Join the two paths together, like os.path.join().
2462
     dirname: the target buffer with the dirname already in place,
2463
              including trailing NUL
2464
     relfile: this must be a relative path
2465
     bufsize: total allocated size of the buffer
2466
   Return -1 if anything is wrong with the path lengths. */
2467
int
2468
_Py_add_relfile(wchar_t *dirname, const wchar_t *relfile, size_t bufsize)
2469
144
{
2470
144
    assert(dirname != NULL && relfile != NULL);
2471
144
    assert(bufsize > 0);
2472
144
    return join_relfile(dirname, bufsize, dirname, relfile);
2473
144
}
2474
2475
2476
size_t
2477
_Py_find_basename(const wchar_t *filename)
2478
0
{
2479
0
    for (size_t i = wcslen(filename); i > 0; --i) {
2480
0
        if (filename[i] == SEP) {
2481
0
            return i + 1;
2482
0
        }
2483
0
    }
2484
0
    return 0;
2485
0
}
2486
2487
/* In-place path normalisation. Returns the start of the normalized
2488
   path, which will be within the original buffer. Guaranteed to not
2489
   make the path longer, and will not fail. 'size' is the length of
2490
   the path, if known. If -1, the first null character will be assumed
2491
   to be the end of the path. 'normsize' will be set to contain the
2492
   length of the resulting normalized path. */
2493
wchar_t *
2494
_Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t *normsize)
2495
224
{
2496
224
    assert(path != NULL);
2497
224
    if ((size < 0 && !path[0]) || size == 0) {
2498
0
        *normsize = 0;
2499
0
        return path;
2500
0
    }
2501
224
    wchar_t *pEnd = size >= 0 ? &path[size] : NULL;
2502
224
    wchar_t *p1 = path;     // sequentially scanned address in the path
2503
224
    wchar_t *p2 = path;     // destination of a scanned character to be ljusted
2504
224
    wchar_t *minP2 = path;  // the beginning of the destination range
2505
224
    wchar_t lastC = L'\0';  // the last ljusted character, p2[-1] in most cases
2506
2507
11.4k
#define IS_END(x) (pEnd ? (x) == pEnd : !*(x))
2508
#ifdef ALTSEP
2509
#define IS_SEP(x) (*(x) == SEP || *(x) == ALTSEP)
2510
#else
2511
224
#define IS_SEP(x) (*(x) == SEP)
2512
224
#endif
2513
224
#define SEP_OR_END(x) (IS_SEP(x) || IS_END(x))
2514
2515
224
    Py_ssize_t drvsize, rootsize;
2516
224
    _Py_skiproot(path, size, &drvsize, &rootsize);
2517
224
    if (drvsize || rootsize) {
2518
        // Skip past root and update minP2
2519
224
        p1 = &path[drvsize + rootsize];
2520
224
#ifndef ALTSEP
2521
224
        p2 = p1;
2522
#else
2523
        for (; p2 < p1; ++p2) {
2524
            if (*p2 == ALTSEP) {
2525
                *p2 = SEP;
2526
            }
2527
        }
2528
#endif
2529
224
        minP2 = p2 - 1;
2530
224
        lastC = *minP2;
2531
#ifdef MS_WINDOWS
2532
        if (lastC != SEP) {
2533
            minP2++;
2534
        }
2535
#endif
2536
224
    }
2537
224
    if (p1[0] == L'.' && SEP_OR_END(&p1[1])) {
2538
        // Skip leading '.\'
2539
0
        lastC = *++p1;
2540
#ifdef ALTSEP
2541
        if (lastC == ALTSEP) {
2542
            lastC = SEP;
2543
        }
2544
#endif
2545
0
        while (IS_SEP(p1)) {
2546
0
            p1++;
2547
0
        }
2548
0
    }
2549
2550
    /* if pEnd is specified, check that. Else, check for null terminator */
2551
11.4k
    for (; !IS_END(p1); ++p1) {
2552
11.2k
        wchar_t c = *p1;
2553
#ifdef ALTSEP
2554
        if (c == ALTSEP) {
2555
            c = SEP;
2556
        }
2557
#endif
2558
11.2k
        if (lastC == SEP) {
2559
1.16k
            if (c == L'.') {
2560
0
                int sep_at_1 = SEP_OR_END(&p1[1]);
2561
0
                int sep_at_2 = !sep_at_1 && SEP_OR_END(&p1[2]);
2562
0
                if (sep_at_2 && p1[1] == L'.') {
2563
0
                    wchar_t *p3 = p2;
2564
0
                    while (p3 != minP2 && *--p3 == SEP) { }
2565
0
                    while (p3 != minP2 && *(p3 - 1) != SEP) { --p3; }
2566
0
                    if (p2 == minP2
2567
0
                        || (p3[0] == L'.' && p3[1] == L'.' && IS_SEP(&p3[2])))
2568
0
                    {
2569
                        // Previous segment is also ../, so append instead.
2570
                        // Relative path does not absorb ../ at minP2 as well.
2571
0
                        *p2++ = L'.';
2572
0
                        *p2++ = L'.';
2573
0
                        lastC = L'.';
2574
0
                    } else if (p3[0] == SEP) {
2575
                        // Absolute path, so absorb segment
2576
0
                        p2 = p3 + 1;
2577
0
                    } else {
2578
0
                        p2 = p3;
2579
0
                    }
2580
0
                    p1 += 1;
2581
0
                } else if (sep_at_1) {
2582
0
                } else {
2583
0
                    *p2++ = lastC = c;
2584
0
                }
2585
1.16k
            } else if (c == SEP) {
2586
1.16k
            } else {
2587
1.16k
                *p2++ = lastC = c;
2588
1.16k
            }
2589
10.0k
        } else {
2590
10.0k
            *p2++ = lastC = c;
2591
10.0k
        }
2592
11.2k
    }
2593
224
    *p2 = L'\0';
2594
224
    if (p2 != minP2) {
2595
224
        while (--p2 != minP2 && *p2 == SEP) {
2596
0
            *p2 = L'\0';
2597
0
        }
2598
224
    } else {
2599
0
        --p2;
2600
0
    }
2601
224
    *normsize = p2 - path + 1;
2602
224
#undef SEP_OR_END
2603
224
#undef IS_SEP
2604
224
#undef IS_END
2605
224
    return path;
2606
224
}
2607
2608
/* In-place path normalisation. Returns the start of the normalized
2609
   path, which will be within the original buffer. Guaranteed to not
2610
   make the path longer, and will not fail. 'size' is the length of
2611
   the path, if known. If -1, the first null character will be assumed
2612
   to be the end of the path. */
2613
wchar_t *
2614
_Py_normpath(wchar_t *path, Py_ssize_t size)
2615
144
{
2616
144
    Py_ssize_t norm_length;
2617
144
    return _Py_normpath_and_size(path, size, &norm_length);
2618
144
}
2619
2620
2621
/* Get the current directory. buflen is the buffer size in wide characters
2622
   including the null character. Decode the path from the locale encoding.
2623
2624
   Return NULL on getcwd() error, on decoding error, or if 'buf' is
2625
   too short. */
2626
wchar_t*
2627
_Py_wgetcwd(wchar_t *buf, size_t buflen)
2628
0
{
2629
#ifdef MS_WINDOWS
2630
    int ibuflen = (int)Py_MIN(buflen, INT_MAX);
2631
    return _wgetcwd(buf, ibuflen);
2632
#else
2633
0
    char fname[MAXPATHLEN];
2634
0
    wchar_t *wname;
2635
0
    size_t len;
2636
2637
0
    if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
2638
0
        return NULL;
2639
0
    wname = Py_DecodeLocale(fname, &len);
2640
0
    if (wname == NULL)
2641
0
        return NULL;
2642
    /* wname must have space to store the trailing NUL character */
2643
0
    if (buflen <= len) {
2644
0
        PyMem_RawFree(wname);
2645
0
        return NULL;
2646
0
    }
2647
0
    wcsncpy(buf, wname, buflen);
2648
0
    PyMem_RawFree(wname);
2649
0
    return buf;
2650
0
#endif
2651
0
}
2652
2653
/* Duplicate a file descriptor. The new file descriptor is created as
2654
   non-inheritable. Return a new file descriptor on success, raise an OSError
2655
   exception and return -1 on error.
2656
2657
   The GIL is released to call dup(). The caller must hold the GIL. */
2658
int
2659
_Py_dup(int fd)
2660
0
{
2661
#ifdef MS_WINDOWS
2662
    HANDLE handle;
2663
#endif
2664
2665
0
    _Py_AssertHoldsTstate();
2666
2667
#ifdef MS_WINDOWS
2668
    handle = _Py_get_osfhandle(fd);
2669
    if (handle == INVALID_HANDLE_VALUE)
2670
        return -1;
2671
2672
    Py_BEGIN_ALLOW_THREADS
2673
    _Py_BEGIN_SUPPRESS_IPH
2674
    fd = dup(fd);
2675
    _Py_END_SUPPRESS_IPH
2676
    Py_END_ALLOW_THREADS
2677
    if (fd < 0) {
2678
        PyErr_SetFromErrno(PyExc_OSError);
2679
        return -1;
2680
    }
2681
2682
    if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2683
        _Py_BEGIN_SUPPRESS_IPH
2684
        close(fd);
2685
        _Py_END_SUPPRESS_IPH
2686
        return -1;
2687
    }
2688
#elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC)
2689
0
    Py_BEGIN_ALLOW_THREADS
2690
0
    _Py_BEGIN_SUPPRESS_IPH
2691
0
    fd = fcntl(fd, F_DUPFD_CLOEXEC, 0);
2692
0
    _Py_END_SUPPRESS_IPH
2693
0
    Py_END_ALLOW_THREADS
2694
0
    if (fd < 0) {
2695
0
        PyErr_SetFromErrno(PyExc_OSError);
2696
0
        return -1;
2697
0
    }
2698
2699
#elif HAVE_DUP
2700
    Py_BEGIN_ALLOW_THREADS
2701
    _Py_BEGIN_SUPPRESS_IPH
2702
    fd = dup(fd);
2703
    _Py_END_SUPPRESS_IPH
2704
    Py_END_ALLOW_THREADS
2705
    if (fd < 0) {
2706
        PyErr_SetFromErrno(PyExc_OSError);
2707
        return -1;
2708
    }
2709
2710
    if (_Py_set_inheritable(fd, 0, NULL) < 0) {
2711
        _Py_BEGIN_SUPPRESS_IPH
2712
        close(fd);
2713
        _Py_END_SUPPRESS_IPH
2714
        return -1;
2715
    }
2716
#else
2717
    errno = ENOTSUP;
2718
    PyErr_SetFromErrno(PyExc_OSError);
2719
    return -1;
2720
#endif
2721
0
    return fd;
2722
0
}
2723
2724
#ifndef MS_WINDOWS
2725
/* Get the blocking mode of the file descriptor.
2726
   Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared,
2727
   raise an exception and return -1 on error. */
2728
int
2729
_Py_get_blocking(int fd)
2730
0
{
2731
0
    int flags;
2732
0
    _Py_BEGIN_SUPPRESS_IPH
2733
0
    flags = fcntl(fd, F_GETFL, 0);
2734
0
    _Py_END_SUPPRESS_IPH
2735
0
    if (flags < 0) {
2736
0
        PyErr_SetFromErrno(PyExc_OSError);
2737
0
        return -1;
2738
0
    }
2739
2740
0
    return !(flags & O_NONBLOCK);
2741
0
}
2742
2743
/* Set the blocking mode of the specified file descriptor.
2744
2745
   Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag
2746
   otherwise.
2747
2748
   Return 0 on success, raise an exception and return -1 on error. */
2749
int
2750
_Py_set_blocking(int fd, int blocking)
2751
0
{
2752
/* bpo-41462: On VxWorks, ioctl(FIONBIO) only works on sockets.
2753
   Use fcntl() instead. */
2754
0
#if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO) && !defined(__VXWORKS__)
2755
0
    int arg = !blocking;
2756
0
    if (ioctl(fd, FIONBIO, &arg) < 0)
2757
0
        goto error;
2758
#else
2759
    int flags, res;
2760
2761
    _Py_BEGIN_SUPPRESS_IPH
2762
    flags = fcntl(fd, F_GETFL, 0);
2763
    if (flags >= 0) {
2764
        if (blocking)
2765
            flags = flags & (~O_NONBLOCK);
2766
        else
2767
            flags = flags | O_NONBLOCK;
2768
2769
        res = fcntl(fd, F_SETFL, flags);
2770
    } else {
2771
        res = -1;
2772
    }
2773
    _Py_END_SUPPRESS_IPH
2774
2775
    if (res < 0)
2776
        goto error;
2777
#endif
2778
0
    return 0;
2779
2780
0
error:
2781
0
    PyErr_SetFromErrno(PyExc_OSError);
2782
0
    return -1;
2783
0
}
2784
#else   /* MS_WINDOWS */
2785
2786
// The Windows Games API family doesn't expose GetNamedPipeHandleStateW so attempt
2787
// to load it directly from the Kernel32.dll
2788
#if !defined(MS_WINDOWS_APP) && !defined(MS_WINDOWS_SYSTEM)
2789
BOOL
2790
GetNamedPipeHandleStateW(HANDLE hNamedPipe, LPDWORD lpState, LPDWORD lpCurInstances, LPDWORD lpMaxCollectionCount,
2791
                         LPDWORD lpCollectDataTimeout, LPWSTR lpUserName, DWORD nMaxUserNameSize)
2792
{
2793
    static int initialized = 0;
2794
    typedef BOOL(__stdcall* PGetNamedPipeHandleStateW) (
2795
        HANDLE hNamedPipe, LPDWORD lpState, LPDWORD lpCurInstances, LPDWORD lpMaxCollectionCount,
2796
        LPDWORD lpCollectDataTimeout, LPWSTR lpUserName, DWORD nMaxUserNameSize);
2797
    static PGetNamedPipeHandleStateW _GetNamedPipeHandleStateW;
2798
2799
    if (initialized == 0) {
2800
        HMODULE api = LoadLibraryExW(L"Kernel32.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32);
2801
        if (api) {
2802
            _GetNamedPipeHandleStateW = (PGetNamedPipeHandleStateW)GetProcAddress(
2803
                api, "GetNamedPipeHandleStateW");
2804
        }
2805
        else {
2806
            _GetNamedPipeHandleStateW = NULL;
2807
        }
2808
        initialized = 1;
2809
    }
2810
2811
    if (!_GetNamedPipeHandleStateW) {
2812
        SetLastError(E_NOINTERFACE);
2813
        return FALSE;
2814
    }
2815
2816
    return _GetNamedPipeHandleStateW(
2817
        hNamedPipe, lpState, lpCurInstances, lpMaxCollectionCount, lpCollectDataTimeout, lpUserName, nMaxUserNameSize
2818
    );
2819
}
2820
#endif /* !MS_WINDOWS_APP && !MS_WINDOWS_SYSTEM */
2821
2822
int
2823
_Py_get_blocking(int fd)
2824
{
2825
    HANDLE handle;
2826
    DWORD mode;
2827
    BOOL success;
2828
2829
    handle = _Py_get_osfhandle(fd);
2830
    if (handle == INVALID_HANDLE_VALUE) {
2831
        return -1;
2832
    }
2833
2834
    Py_BEGIN_ALLOW_THREADS
2835
    success = GetNamedPipeHandleStateW(handle, &mode,
2836
                                       NULL, NULL, NULL, NULL, 0);
2837
    Py_END_ALLOW_THREADS
2838
2839
    if (!success) {
2840
        PyErr_SetFromWindowsErr(0);
2841
        return -1;
2842
    }
2843
2844
    return !(mode & PIPE_NOWAIT);
2845
}
2846
2847
int
2848
_Py_set_blocking(int fd, int blocking)
2849
{
2850
    HANDLE handle;
2851
    DWORD mode;
2852
    BOOL success;
2853
2854
    handle = _Py_get_osfhandle(fd);
2855
    if (handle == INVALID_HANDLE_VALUE) {
2856
        return -1;
2857
    }
2858
2859
    Py_BEGIN_ALLOW_THREADS
2860
    success = GetNamedPipeHandleStateW(handle, &mode,
2861
                                       NULL, NULL, NULL, NULL, 0);
2862
    if (success) {
2863
        if (blocking) {
2864
            mode &= ~PIPE_NOWAIT;
2865
        }
2866
        else {
2867
            mode |= PIPE_NOWAIT;
2868
        }
2869
        success = SetNamedPipeHandleState(handle, &mode, NULL, NULL);
2870
    }
2871
    Py_END_ALLOW_THREADS
2872
2873
    if (!success) {
2874
        PyErr_SetFromWindowsErr(0);
2875
        return -1;
2876
    }
2877
    return 0;
2878
}
2879
2880
void*
2881
_Py_get_osfhandle_noraise(int fd)
2882
{
2883
    void *handle;
2884
    _Py_BEGIN_SUPPRESS_IPH
2885
    handle = (void*)_get_osfhandle(fd);
2886
    _Py_END_SUPPRESS_IPH
2887
    return handle;
2888
}
2889
2890
void*
2891
_Py_get_osfhandle(int fd)
2892
{
2893
    void *handle = _Py_get_osfhandle_noraise(fd);
2894
    if (handle == INVALID_HANDLE_VALUE)
2895
        PyErr_SetFromErrno(PyExc_OSError);
2896
2897
    return handle;
2898
}
2899
2900
int
2901
_Py_open_osfhandle_noraise(void *handle, int flags)
2902
{
2903
    int fd;
2904
    _Py_BEGIN_SUPPRESS_IPH
2905
    fd = _open_osfhandle((intptr_t)handle, flags);
2906
    _Py_END_SUPPRESS_IPH
2907
    return fd;
2908
}
2909
2910
int
2911
_Py_open_osfhandle(void *handle, int flags)
2912
{
2913
    int fd = _Py_open_osfhandle_noraise(handle, flags);
2914
    if (fd == -1)
2915
        PyErr_SetFromErrno(PyExc_OSError);
2916
2917
    return fd;
2918
}
2919
#endif  /* MS_WINDOWS */
2920
2921
int
2922
_Py_GetLocaleconvNumeric(struct lconv *lc,
2923
                         PyObject **decimal_point, PyObject **thousands_sep)
2924
0
{
2925
0
    assert(decimal_point != NULL);
2926
0
    assert(thousands_sep != NULL);
2927
2928
0
#ifndef MS_WINDOWS
2929
0
    int change_locale = 0;
2930
0
    if ((strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) {
2931
0
        change_locale = 1;
2932
0
    }
2933
0
    if ((strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) {
2934
0
        change_locale = 1;
2935
0
    }
2936
2937
    /* Keep a copy of the LC_CTYPE locale */
2938
0
    char *oldloc = NULL, *loc = NULL;
2939
0
    if (change_locale) {
2940
0
        oldloc = setlocale(LC_CTYPE, NULL);
2941
0
        if (!oldloc) {
2942
0
            PyErr_SetString(PyExc_RuntimeWarning,
2943
0
                            "failed to get LC_CTYPE locale");
2944
0
            return -1;
2945
0
        }
2946
2947
0
        oldloc = _PyMem_Strdup(oldloc);
2948
0
        if (!oldloc) {
2949
0
            PyErr_NoMemory();
2950
0
            return -1;
2951
0
        }
2952
2953
0
        loc = setlocale(LC_NUMERIC, NULL);
2954
0
        if (loc != NULL && strcmp(loc, oldloc) == 0) {
2955
0
            loc = NULL;
2956
0
        }
2957
2958
0
        if (loc != NULL) {
2959
            /* Only set the locale temporarily the LC_CTYPE locale
2960
               if LC_NUMERIC locale is different than LC_CTYPE locale and
2961
               decimal_point and/or thousands_sep are non-ASCII or longer than
2962
               1 byte */
2963
0
            setlocale(LC_CTYPE, loc);
2964
0
        }
2965
0
    }
2966
2967
0
#define GET_LOCALE_STRING(ATTR) PyUnicode_DecodeLocale(lc->ATTR, NULL)
2968
#else /* MS_WINDOWS */
2969
/* Use _W_* fields of Windows strcut lconv */
2970
#define GET_LOCALE_STRING(ATTR) PyUnicode_FromWideChar(lc->_W_ ## ATTR, -1)
2971
#endif /* MS_WINDOWS */
2972
2973
0
    int res = -1;
2974
2975
0
    *decimal_point = GET_LOCALE_STRING(decimal_point);
2976
0
    if (*decimal_point == NULL) {
2977
0
        goto done;
2978
0
    }
2979
2980
0
    *thousands_sep = GET_LOCALE_STRING(thousands_sep);
2981
0
    if (*thousands_sep == NULL) {
2982
0
        goto done;
2983
0
    }
2984
2985
0
    res = 0;
2986
2987
0
done:
2988
0
#ifndef MS_WINDOWS
2989
0
    if (loc != NULL) {
2990
0
        setlocale(LC_CTYPE, oldloc);
2991
0
    }
2992
0
    PyMem_Free(oldloc);
2993
0
#endif
2994
0
    return res;
2995
2996
0
#undef GET_LOCALE_STRING
2997
0
}
2998
2999
/* Our selection logic for which function to use is as follows:
3000
 * 1. If close_range(2) is available, always prefer that; it's better for
3001
 *    contiguous ranges like this than fdwalk(3) which entails iterating over
3002
 *    the entire fd space and simply doing nothing for those outside the range.
3003
 * 2. If closefrom(2) is available, we'll attempt to use that next if we're
3004
 *    closing up to sysconf(_SC_OPEN_MAX).
3005
 * 2a. Fallback to fdwalk(3) if we're not closing up to sysconf(_SC_OPEN_MAX),
3006
 *    as that will be more performant if the range happens to have any chunk of
3007
 *    non-opened fd in the middle.
3008
 * 2b. If fdwalk(3) isn't available, just do a plain close(2) loop.
3009
 */
3010
#ifdef HAVE_CLOSEFROM
3011
#  define USE_CLOSEFROM
3012
#endif /* HAVE_CLOSEFROM */
3013
3014
#ifdef HAVE_FDWALK
3015
#  define USE_FDWALK
3016
#endif /* HAVE_FDWALK */
3017
3018
#ifdef USE_FDWALK
3019
static int
3020
_fdwalk_close_func(void *lohi, int fd)
3021
{
3022
    int lo = ((int *)lohi)[0];
3023
    int hi = ((int *)lohi)[1];
3024
3025
    if (fd >= hi) {
3026
        return 1;
3027
    }
3028
    else if (fd >= lo) {
3029
        /* Ignore errors */
3030
        (void)close(fd);
3031
    }
3032
    return 0;
3033
}
3034
#endif /* USE_FDWALK */
3035
3036
/* Closes all file descriptors in [first, last], ignoring errors. */
3037
void
3038
_Py_closerange(int first, int last)
3039
0
{
3040
0
    first = Py_MAX(first, 0);
3041
0
    _Py_BEGIN_SUPPRESS_IPH
3042
#ifdef HAVE_CLOSE_RANGE
3043
    if (close_range(first, last, 0) == 0) {
3044
        /* close_range() ignores errors when it closes file descriptors.
3045
         * Possible reasons of an error return are lack of kernel support
3046
         * or denial of the underlying syscall by a seccomp sandbox on Linux.
3047
         * Fallback to other methods in case of any error. */
3048
    }
3049
    else
3050
#endif /* HAVE_CLOSE_RANGE */
3051
#ifdef USE_CLOSEFROM
3052
    if (last >= sysconf(_SC_OPEN_MAX)) {
3053
        /* Any errors encountered while closing file descriptors are ignored */
3054
        (void)closefrom(first);
3055
    }
3056
    else
3057
#endif /* USE_CLOSEFROM */
3058
#ifdef USE_FDWALK
3059
    {
3060
        int lohi[2];
3061
        lohi[0] = first;
3062
        lohi[1] = last + 1;
3063
        fdwalk(_fdwalk_close_func, lohi);
3064
    }
3065
#else
3066
0
    {
3067
0
        for (int i = first; i <= last; i++) {
3068
            /* Ignore errors */
3069
0
            (void)close(i);
3070
0
        }
3071
0
    }
3072
0
#endif /* USE_FDWALK */
3073
0
    _Py_END_SUPPRESS_IPH
3074
0
}
3075
3076
3077
#ifndef MS_WINDOWS
3078
// Ticks per second used by clock() and times() functions.
3079
// See os.times() and time.process_time() implementations.
3080
int
3081
_Py_GetTicksPerSecond(long *ticks_per_second)
3082
32
{
3083
32
#if defined(HAVE_SYSCONF) && defined(_SC_CLK_TCK)
3084
32
    long value = sysconf(_SC_CLK_TCK);
3085
32
    if (value < 1) {
3086
0
        return -1;
3087
0
    }
3088
32
    *ticks_per_second = value;
3089
#elif defined(HZ)
3090
    assert(HZ >= 1);
3091
    *ticks_per_second = HZ;
3092
#else
3093
    // Magic fallback value; may be bogus
3094
    *ticks_per_second = 60;
3095
#endif
3096
32
    return 0;
3097
32
}
3098
#endif
3099
3100
3101
/* Check if a file descriptor is valid or not.
3102
   Return 0 if the file descriptor is invalid, return non-zero otherwise. */
3103
int
3104
_Py_IsValidFD(int fd)
3105
48
{
3106
/* dup() is faster than fstat(): fstat() can require input/output operations,
3107
   whereas dup() doesn't. There is a low risk of EMFILE/ENFILE at Python
3108
   startup. Problem: dup() doesn't check if the file descriptor is valid on
3109
   some platforms.
3110
3111
   fcntl(fd, F_GETFD) is even faster, because it only checks the process table.
3112
   It is preferred over dup() when available, since it cannot fail with the
3113
   "too many open files" error (EMFILE).
3114
3115
   bpo-30225: On macOS Tiger, when stdout is redirected to a pipe and the other
3116
   side of the pipe is closed, dup(1) succeed, whereas fstat(1, &st) fails with
3117
   EBADF. FreeBSD has similar issue (bpo-32849).
3118
3119
   Only use dup() on Linux where dup() is enough to detect invalid FD
3120
   (bpo-32849).
3121
*/
3122
48
    if (fd < 0) {
3123
0
        return 0;
3124
0
    }
3125
48
#if defined(F_GETFD) && ( \
3126
48
        defined(__linux__) || \
3127
48
        defined(__APPLE__) || \
3128
48
        (defined(__wasm__) && !defined(__wasi__)))
3129
48
    return fcntl(fd, F_GETFD) >= 0;
3130
#elif defined(__linux__)
3131
    int fd2 = dup(fd);
3132
    if (fd2 >= 0) {
3133
        close(fd2);
3134
    }
3135
    return (fd2 >= 0);
3136
#elif defined(MS_WINDOWS)
3137
    HANDLE hfile;
3138
    _Py_BEGIN_SUPPRESS_IPH
3139
    hfile = (HANDLE)_get_osfhandle(fd);
3140
    _Py_END_SUPPRESS_IPH
3141
    return (hfile != INVALID_HANDLE_VALUE
3142
            && GetFileType(hfile) != FILE_TYPE_UNKNOWN);
3143
#else
3144
    struct stat st;
3145
    return (fstat(fd, &st) == 0);
3146
#endif
3147
48
}