/src/Python-3.8.3/Python/fileutils.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "Python.h" |
2 | | #include "pycore_fileutils.h" |
3 | | #include "osdefs.h" |
4 | | #include <locale.h> |
5 | | |
6 | | #ifdef MS_WINDOWS |
7 | | # include <malloc.h> |
8 | | # include <windows.h> |
9 | | extern int winerror_to_errno(int); |
10 | | #endif |
11 | | |
12 | | #ifdef HAVE_LANGINFO_H |
13 | | #include <langinfo.h> |
14 | | #endif |
15 | | |
16 | | #ifdef HAVE_SYS_IOCTL_H |
17 | | #include <sys/ioctl.h> |
18 | | #endif |
19 | | |
20 | | #ifdef HAVE_FCNTL_H |
21 | | #include <fcntl.h> |
22 | | #endif /* HAVE_FCNTL_H */ |
23 | | |
24 | | #ifdef O_CLOEXEC |
25 | | /* Does open() support the O_CLOEXEC flag? Possible values: |
26 | | |
27 | | -1: unknown |
28 | | 0: open() ignores O_CLOEXEC flag, ex: Linux kernel older than 2.6.23 |
29 | | 1: open() supports O_CLOEXEC flag, close-on-exec is set |
30 | | |
31 | | The flag is used by _Py_open(), _Py_open_noraise(), io.FileIO |
32 | | and os.open(). */ |
33 | | int _Py_open_cloexec_works = -1; |
34 | | #endif |
35 | | |
36 | | |
37 | | static int |
38 | | get_surrogateescape(_Py_error_handler errors, int *surrogateescape) |
39 | 5.42k | { |
40 | 5.42k | switch (errors) |
41 | 5.42k | { |
42 | 0 | case _Py_ERROR_STRICT: |
43 | 0 | *surrogateescape = 0; |
44 | 0 | return 0; |
45 | 5.42k | case _Py_ERROR_SURROGATEESCAPE: |
46 | 5.42k | *surrogateescape = 1; |
47 | 5.42k | return 0; |
48 | 0 | default: |
49 | 0 | return -1; |
50 | 5.42k | } |
51 | 5.42k | } |
52 | | |
53 | | |
54 | | PyObject * |
55 | | _Py_device_encoding(int fd) |
56 | 0 | { |
57 | | #if defined(MS_WINDOWS) |
58 | | UINT cp; |
59 | | #endif |
60 | 0 | int valid; |
61 | 0 | _Py_BEGIN_SUPPRESS_IPH |
62 | 0 | valid = isatty(fd); |
63 | 0 | _Py_END_SUPPRESS_IPH |
64 | 0 | if (!valid) |
65 | 0 | Py_RETURN_NONE; |
66 | | |
67 | | #if defined(MS_WINDOWS) |
68 | | if (fd == 0) |
69 | | cp = GetConsoleCP(); |
70 | | else if (fd == 1 || fd == 2) |
71 | | cp = GetConsoleOutputCP(); |
72 | | else |
73 | | cp = 0; |
74 | | /* GetConsoleCP() and GetConsoleOutputCP() return 0 if the application |
75 | | has no console */ |
76 | | if (cp != 0) |
77 | | return PyUnicode_FromFormat("cp%u", (unsigned int)cp); |
78 | | #elif defined(CODESET) |
79 | 0 | { |
80 | 0 | char *codeset = nl_langinfo(CODESET); |
81 | 0 | if (codeset != NULL && codeset[0] != 0) |
82 | 0 | return PyUnicode_FromString(codeset); |
83 | 0 | } |
84 | 0 | #endif |
85 | 0 | Py_RETURN_NONE; |
86 | 0 | } |
87 | | |
88 | | #if !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) |
89 | | |
90 | | #define USE_FORCE_ASCII |
91 | | |
92 | | extern int _Py_normalize_encoding(const char *, char *, size_t); |
93 | | |
94 | | /* Workaround FreeBSD and OpenIndiana locale encoding issue with the C locale |
95 | | and POSIX locale. nl_langinfo(CODESET) announces an alias of the |
96 | | ASCII encoding, whereas mbstowcs() and wcstombs() functions use the |
97 | | ISO-8859-1 encoding. The problem is that os.fsencode() and os.fsdecode() use |
98 | | locale.getpreferredencoding() codec. For example, if command line arguments |
99 | | are decoded by mbstowcs() and encoded back by os.fsencode(), we get a |
100 | | UnicodeEncodeError instead of retrieving the original byte string. |
101 | | |
102 | | The workaround is enabled if setlocale(LC_CTYPE, NULL) returns "C", |
103 | | nl_langinfo(CODESET) announces "ascii" (or an alias to ASCII), and at least |
104 | | one byte in range 0x80-0xff can be decoded from the locale encoding. The |
105 | | workaround is also enabled on error, for example if getting the locale |
106 | | failed. |
107 | | |
108 | | On HP-UX with the C locale or the POSIX locale, nl_langinfo(CODESET) |
109 | | announces "roman8" but mbstowcs() uses Latin1 in practice. Force also the |
110 | | ASCII encoding in this case. |
111 | | |
112 | | Values of force_ascii: |
113 | | |
114 | | 1: the workaround is used: Py_EncodeLocale() uses |
115 | | encode_ascii_surrogateescape() and Py_DecodeLocale() uses |
116 | | decode_ascii() |
117 | | 0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and |
118 | | Py_DecodeLocale() uses mbstowcs() |
119 | | -1: unknown, need to call check_force_ascii() to get the value |
120 | | */ |
121 | | static int force_ascii = -1; |
122 | | |
123 | | static int |
124 | | check_force_ascii(void) |
125 | 14 | { |
126 | 14 | char *loc = setlocale(LC_CTYPE, NULL); |
127 | 14 | if (loc == NULL) { |
128 | 0 | goto error; |
129 | 0 | } |
130 | 14 | if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) { |
131 | | /* the LC_CTYPE locale is different than C and POSIX */ |
132 | 0 | return 0; |
133 | 0 | } |
134 | | |
135 | 14 | #if defined(HAVE_LANGINFO_H) && defined(CODESET) |
136 | 14 | const char *codeset = nl_langinfo(CODESET); |
137 | 14 | if (!codeset || codeset[0] == '\0') { |
138 | | /* CODESET is not set or empty */ |
139 | 0 | goto error; |
140 | 0 | } |
141 | | |
142 | 14 | char encoding[20]; /* longest name: "iso_646.irv_1991\0" */ |
143 | 14 | if (!_Py_normalize_encoding(codeset, encoding, sizeof(encoding))) { |
144 | 0 | goto error; |
145 | 0 | } |
146 | | |
147 | | #ifdef __hpux |
148 | | if (strcmp(encoding, "roman8") == 0) { |
149 | | unsigned char ch; |
150 | | wchar_t wch; |
151 | | size_t res; |
152 | | |
153 | | ch = (unsigned char)0xA7; |
154 | | res = mbstowcs(&wch, (char*)&ch, 1); |
155 | | if (res != (size_t)-1 && wch == L'\xA7') { |
156 | | /* On HP-UX withe C locale or the POSIX locale, |
157 | | nl_langinfo(CODESET) announces "roman8", whereas mbstowcs() uses |
158 | | Latin1 encoding in practice. Force ASCII in this case. |
159 | | |
160 | | Roman8 decodes 0xA7 to U+00CF. Latin1 decodes 0xA7 to U+00A7. */ |
161 | | return 1; |
162 | | } |
163 | | } |
164 | | #else |
165 | 14 | const char* ascii_aliases[] = { |
166 | 14 | "ascii", |
167 | | /* Aliases from Lib/encodings/aliases.py */ |
168 | 14 | "646", |
169 | 14 | "ansi_x3.4_1968", |
170 | 14 | "ansi_x3.4_1986", |
171 | 14 | "ansi_x3_4_1968", |
172 | 14 | "cp367", |
173 | 14 | "csascii", |
174 | 14 | "ibm367", |
175 | 14 | "iso646_us", |
176 | 14 | "iso_646.irv_1991", |
177 | 14 | "iso_ir_6", |
178 | 14 | "us", |
179 | 14 | "us_ascii", |
180 | 14 | NULL |
181 | 14 | }; |
182 | | |
183 | 14 | int is_ascii = 0; |
184 | 42 | for (const char **alias=ascii_aliases; *alias != NULL; alias++) { |
185 | 42 | if (strcmp(encoding, *alias) == 0) { |
186 | 14 | is_ascii = 1; |
187 | 14 | break; |
188 | 14 | } |
189 | 42 | } |
190 | 14 | if (!is_ascii) { |
191 | | /* nl_langinfo(CODESET) is not "ascii" or an alias of ASCII */ |
192 | 0 | return 0; |
193 | 0 | } |
194 | | |
195 | 1.80k | for (unsigned int i=0x80; i<=0xff; i++) { |
196 | 1.79k | char ch[1]; |
197 | 1.79k | wchar_t wch[1]; |
198 | 1.79k | size_t res; |
199 | | |
200 | 1.79k | unsigned uch = (unsigned char)i; |
201 | 1.79k | ch[0] = (char)uch; |
202 | 1.79k | res = mbstowcs(wch, ch, 1); |
203 | 1.79k | if (res != (size_t)-1) { |
204 | | /* decoding a non-ASCII character from the locale encoding succeed: |
205 | | the locale encoding is not ASCII, force ASCII */ |
206 | 0 | return 1; |
207 | 0 | } |
208 | 1.79k | } |
209 | | /* None of the bytes in the range 0x80-0xff can be decoded from the locale |
210 | | encoding: the locale encoding is really ASCII */ |
211 | 14 | #endif /* !defined(__hpux) */ |
212 | 14 | return 0; |
213 | | #else |
214 | | /* nl_langinfo(CODESET) is not available: always force ASCII */ |
215 | | return 1; |
216 | | #endif /* defined(HAVE_LANGINFO_H) && defined(CODESET) */ |
217 | | |
218 | 0 | error: |
219 | | /* if an error occurred, force the ASCII encoding */ |
220 | 0 | return 1; |
221 | 14 | } |
222 | | |
223 | | |
224 | | int |
225 | | _Py_GetForceASCII(void) |
226 | 14 | { |
227 | 14 | if (force_ascii == -1) { |
228 | 0 | force_ascii = check_force_ascii(); |
229 | 0 | } |
230 | 14 | return force_ascii; |
231 | 14 | } |
232 | | |
233 | | |
234 | | void |
235 | | _Py_ResetForceASCII(void) |
236 | 28 | { |
237 | 28 | force_ascii = -1; |
238 | 28 | } |
239 | | |
240 | | |
241 | | static int |
242 | | encode_ascii(const wchar_t *text, char **str, |
243 | | size_t *error_pos, const char **reason, |
244 | | int raw_malloc, _Py_error_handler errors) |
245 | 0 | { |
246 | 0 | char *result = NULL, *out; |
247 | 0 | size_t len, i; |
248 | 0 | wchar_t ch; |
249 | |
|
250 | 0 | int surrogateescape; |
251 | 0 | if (get_surrogateescape(errors, &surrogateescape) < 0) { |
252 | 0 | return -3; |
253 | 0 | } |
254 | | |
255 | 0 | len = wcslen(text); |
256 | | |
257 | | /* +1 for NULL byte */ |
258 | 0 | if (raw_malloc) { |
259 | 0 | result = PyMem_RawMalloc(len + 1); |
260 | 0 | } |
261 | 0 | else { |
262 | 0 | result = PyMem_Malloc(len + 1); |
263 | 0 | } |
264 | 0 | if (result == NULL) { |
265 | 0 | return -1; |
266 | 0 | } |
267 | | |
268 | 0 | out = result; |
269 | 0 | for (i=0; i<len; i++) { |
270 | 0 | ch = text[i]; |
271 | |
|
272 | 0 | if (ch <= 0x7f) { |
273 | | /* ASCII character */ |
274 | 0 | *out++ = (char)ch; |
275 | 0 | } |
276 | 0 | else if (surrogateescape && 0xdc80 <= ch && ch <= 0xdcff) { |
277 | | /* UTF-8b surrogate */ |
278 | 0 | *out++ = (char)(ch - 0xdc00); |
279 | 0 | } |
280 | 0 | else { |
281 | 0 | if (raw_malloc) { |
282 | 0 | PyMem_RawFree(result); |
283 | 0 | } |
284 | 0 | else { |
285 | 0 | PyMem_Free(result); |
286 | 0 | } |
287 | 0 | if (error_pos != NULL) { |
288 | 0 | *error_pos = i; |
289 | 0 | } |
290 | 0 | if (reason) { |
291 | 0 | *reason = "encoding error"; |
292 | 0 | } |
293 | 0 | return -2; |
294 | 0 | } |
295 | 0 | } |
296 | 0 | *out = '\0'; |
297 | 0 | *str = result; |
298 | 0 | return 0; |
299 | 0 | } |
300 | | #else |
301 | | int |
302 | | _Py_GetForceASCII(void) |
303 | | { |
304 | | return 0; |
305 | | } |
306 | | |
307 | | void |
308 | | _Py_ResetForceASCII(void) |
309 | | { |
310 | | /* nothing to do */ |
311 | | } |
312 | | #endif /* !defined(_Py_FORCE_UTF8_FS_ENCODING) && !defined(MS_WINDOWS) */ |
313 | | |
314 | | |
315 | | #if !defined(HAVE_MBRTOWC) || defined(USE_FORCE_ASCII) |
316 | | static int |
317 | | decode_ascii(const char *arg, wchar_t **wstr, size_t *wlen, |
318 | | const char **reason, _Py_error_handler errors) |
319 | 0 | { |
320 | 0 | wchar_t *res; |
321 | 0 | unsigned char *in; |
322 | 0 | wchar_t *out; |
323 | 0 | size_t argsize = strlen(arg) + 1; |
324 | |
|
325 | 0 | int surrogateescape; |
326 | 0 | if (get_surrogateescape(errors, &surrogateescape) < 0) { |
327 | 0 | return -3; |
328 | 0 | } |
329 | | |
330 | 0 | if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) { |
331 | 0 | return -1; |
332 | 0 | } |
333 | 0 | res = PyMem_RawMalloc(argsize * sizeof(wchar_t)); |
334 | 0 | if (!res) { |
335 | 0 | return -1; |
336 | 0 | } |
337 | | |
338 | 0 | out = res; |
339 | 0 | for (in = (unsigned char*)arg; *in; in++) { |
340 | 0 | unsigned char ch = *in; |
341 | 0 | if (ch < 128) { |
342 | 0 | *out++ = ch; |
343 | 0 | } |
344 | 0 | else { |
345 | 0 | if (!surrogateescape) { |
346 | 0 | PyMem_RawFree(res); |
347 | 0 | if (wlen) { |
348 | 0 | *wlen = in - (unsigned char*)arg; |
349 | 0 | } |
350 | 0 | if (reason) { |
351 | 0 | *reason = "decoding error"; |
352 | 0 | } |
353 | 0 | return -2; |
354 | 0 | } |
355 | 0 | *out++ = 0xdc00 + ch; |
356 | 0 | } |
357 | 0 | } |
358 | 0 | *out = 0; |
359 | |
|
360 | 0 | if (wlen != NULL) { |
361 | 0 | *wlen = out - res; |
362 | 0 | } |
363 | 0 | *wstr = res; |
364 | 0 | return 0; |
365 | 0 | } |
366 | | #endif /* !HAVE_MBRTOWC */ |
367 | | |
368 | | static int |
369 | | decode_current_locale(const char* arg, wchar_t **wstr, size_t *wlen, |
370 | | const char **reason, _Py_error_handler errors) |
371 | 4.99k | { |
372 | 4.99k | wchar_t *res; |
373 | 4.99k | size_t argsize; |
374 | 4.99k | size_t count; |
375 | 4.99k | #ifdef HAVE_MBRTOWC |
376 | 4.99k | unsigned char *in; |
377 | 4.99k | wchar_t *out; |
378 | 4.99k | mbstate_t mbs; |
379 | 4.99k | #endif |
380 | | |
381 | 4.99k | int surrogateescape; |
382 | 4.99k | if (get_surrogateescape(errors, &surrogateescape) < 0) { |
383 | 0 | return -3; |
384 | 0 | } |
385 | | |
386 | | #ifdef HAVE_BROKEN_MBSTOWCS |
387 | | /* Some platforms have a broken implementation of |
388 | | * mbstowcs which does not count the characters that |
389 | | * would result from conversion. Use an upper bound. |
390 | | */ |
391 | | argsize = strlen(arg); |
392 | | #else |
393 | 4.99k | argsize = mbstowcs(NULL, arg, 0); |
394 | 4.99k | #endif |
395 | 4.99k | if (argsize != (size_t)-1) { |
396 | 4.99k | if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t) - 1) { |
397 | 0 | return -1; |
398 | 0 | } |
399 | 4.99k | res = (wchar_t *)PyMem_RawMalloc((argsize + 1) * sizeof(wchar_t)); |
400 | 4.99k | if (!res) { |
401 | 0 | return -1; |
402 | 0 | } |
403 | | |
404 | 4.99k | count = mbstowcs(res, arg, argsize + 1); |
405 | 4.99k | if (count != (size_t)-1) { |
406 | 4.99k | wchar_t *tmp; |
407 | | /* Only use the result if it contains no |
408 | | surrogate characters. */ |
409 | 63.4k | for (tmp = res; *tmp != 0 && |
410 | 63.4k | !Py_UNICODE_IS_SURROGATE(*tmp); tmp++) |
411 | 58.4k | ; |
412 | 4.99k | if (*tmp == 0) { |
413 | 4.99k | if (wlen != NULL) { |
414 | 4.99k | *wlen = count; |
415 | 4.99k | } |
416 | 4.99k | *wstr = res; |
417 | 4.99k | return 0; |
418 | 4.99k | } |
419 | 4.99k | } |
420 | 0 | PyMem_RawFree(res); |
421 | 0 | } |
422 | | |
423 | | /* Conversion failed. Fall back to escaping with surrogateescape. */ |
424 | 0 | #ifdef HAVE_MBRTOWC |
425 | | /* Try conversion with mbrtwoc (C99), and escape non-decodable bytes. */ |
426 | | |
427 | | /* Overallocate; as multi-byte characters are in the argument, the |
428 | | actual output could use less memory. */ |
429 | 0 | argsize = strlen(arg) + 1; |
430 | 0 | if (argsize > PY_SSIZE_T_MAX / sizeof(wchar_t)) { |
431 | 0 | return -1; |
432 | 0 | } |
433 | 0 | res = (wchar_t*)PyMem_RawMalloc(argsize * sizeof(wchar_t)); |
434 | 0 | if (!res) { |
435 | 0 | return -1; |
436 | 0 | } |
437 | | |
438 | 0 | in = (unsigned char*)arg; |
439 | 0 | out = res; |
440 | 0 | memset(&mbs, 0, sizeof mbs); |
441 | 0 | while (argsize) { |
442 | 0 | size_t converted = mbrtowc(out, (char*)in, argsize, &mbs); |
443 | 0 | if (converted == 0) { |
444 | | /* Reached end of string; null char stored. */ |
445 | 0 | break; |
446 | 0 | } |
447 | | |
448 | 0 | if (converted == (size_t)-2) { |
449 | | /* Incomplete character. This should never happen, |
450 | | since we provide everything that we have - |
451 | | unless there is a bug in the C library, or I |
452 | | misunderstood how mbrtowc works. */ |
453 | 0 | goto decode_error; |
454 | 0 | } |
455 | | |
456 | 0 | if (converted == (size_t)-1) { |
457 | 0 | if (!surrogateescape) { |
458 | 0 | goto decode_error; |
459 | 0 | } |
460 | | |
461 | | /* Conversion error. Escape as UTF-8b, and start over |
462 | | in the initial shift state. */ |
463 | 0 | *out++ = 0xdc00 + *in++; |
464 | 0 | argsize--; |
465 | 0 | memset(&mbs, 0, sizeof mbs); |
466 | 0 | continue; |
467 | 0 | } |
468 | | |
469 | 0 | if (Py_UNICODE_IS_SURROGATE(*out)) { |
470 | 0 | if (!surrogateescape) { |
471 | 0 | goto decode_error; |
472 | 0 | } |
473 | | |
474 | | /* Surrogate character. Escape the original |
475 | | byte sequence with surrogateescape. */ |
476 | 0 | argsize -= converted; |
477 | 0 | while (converted--) { |
478 | 0 | *out++ = 0xdc00 + *in++; |
479 | 0 | } |
480 | 0 | continue; |
481 | 0 | } |
482 | | /* successfully converted some bytes */ |
483 | 0 | in += converted; |
484 | 0 | argsize -= converted; |
485 | 0 | out++; |
486 | 0 | } |
487 | 0 | if (wlen != NULL) { |
488 | 0 | *wlen = out - res; |
489 | 0 | } |
490 | 0 | *wstr = res; |
491 | 0 | return 0; |
492 | | |
493 | 0 | decode_error: |
494 | 0 | PyMem_RawFree(res); |
495 | 0 | if (wlen) { |
496 | 0 | *wlen = in - (unsigned char*)arg; |
497 | 0 | } |
498 | 0 | if (reason) { |
499 | 0 | *reason = "decoding error"; |
500 | 0 | } |
501 | 0 | return -2; |
502 | | #else /* HAVE_MBRTOWC */ |
503 | | /* Cannot use C locale for escaping; manually escape as if charset |
504 | | is ASCII (i.e. escape all bytes > 128. This will still roundtrip |
505 | | correctly in the locale's charset, which must be an ASCII superset. */ |
506 | | return decode_ascii(arg, wstr, wlen, reason, errors); |
507 | | #endif /* HAVE_MBRTOWC */ |
508 | 0 | } |
509 | | |
510 | | |
511 | | /* Decode a byte string from the locale encoding. |
512 | | |
513 | | Use the strict error handler if 'surrogateescape' is zero. Use the |
514 | | surrogateescape error handler if 'surrogateescape' is non-zero: undecodable |
515 | | bytes are decoded as characters in range U+DC80..U+DCFF. If a byte sequence |
516 | | can be decoded as a surrogate character, escape the bytes using the |
517 | | surrogateescape error handler instead of decoding them. |
518 | | |
519 | | On success, return 0 and write the newly allocated wide character string into |
520 | | *wstr (use PyMem_RawFree() to free the memory). If wlen is not NULL, write |
521 | | the number of wide characters excluding the null character into *wlen. |
522 | | |
523 | | On memory allocation failure, return -1. |
524 | | |
525 | | On decoding error, return -2. If wlen is not NULL, write the start of |
526 | | invalid byte sequence in the input string into *wlen. If reason is not NULL, |
527 | | write the decoding error message into *reason. |
528 | | |
529 | | Return -3 if the error handler 'errors' is not supported. |
530 | | |
531 | | Use the Py_EncodeLocaleEx() function to encode the character string back to |
532 | | a byte string. */ |
533 | | int |
534 | | _Py_DecodeLocaleEx(const char* arg, wchar_t **wstr, size_t *wlen, |
535 | | const char **reason, |
536 | | int current_locale, _Py_error_handler errors) |
537 | 4.99k | { |
538 | 4.99k | if (current_locale) { |
539 | | #ifdef _Py_FORCE_UTF8_LOCALE |
540 | | return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason, |
541 | | errors); |
542 | | #else |
543 | 230 | return decode_current_locale(arg, wstr, wlen, reason, errors); |
544 | 230 | #endif |
545 | 230 | } |
546 | | |
547 | | #ifdef _Py_FORCE_UTF8_FS_ENCODING |
548 | | return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason, |
549 | | errors); |
550 | | #else |
551 | 4.76k | int use_utf8 = (Py_UTF8Mode == 1); |
552 | | #ifdef MS_WINDOWS |
553 | | use_utf8 |= !Py_LegacyWindowsFSEncodingFlag; |
554 | | #endif |
555 | 4.76k | if (use_utf8) { |
556 | 0 | return _Py_DecodeUTF8Ex(arg, strlen(arg), wstr, wlen, reason, |
557 | 0 | errors); |
558 | 0 | } |
559 | | |
560 | 4.76k | #ifdef USE_FORCE_ASCII |
561 | 4.76k | if (force_ascii == -1) { |
562 | 14 | force_ascii = check_force_ascii(); |
563 | 14 | } |
564 | | |
565 | 4.76k | if (force_ascii) { |
566 | | /* force ASCII encoding to workaround mbstowcs() issue */ |
567 | 0 | return decode_ascii(arg, wstr, wlen, reason, errors); |
568 | 0 | } |
569 | 4.76k | #endif |
570 | | |
571 | 4.76k | return decode_current_locale(arg, wstr, wlen, reason, errors); |
572 | 4.76k | #endif /* !_Py_FORCE_UTF8_FS_ENCODING */ |
573 | 4.76k | } |
574 | | |
575 | | |
576 | | /* Decode a byte string from the locale encoding with the |
577 | | surrogateescape error handler: undecodable bytes are decoded as characters |
578 | | in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate |
579 | | character, escape the bytes using the surrogateescape error handler instead |
580 | | of decoding them. |
581 | | |
582 | | Return a pointer to a newly allocated wide character string, use |
583 | | PyMem_RawFree() to free the memory. If size is not NULL, write the number of |
584 | | wide characters excluding the null character into *size |
585 | | |
586 | | Return NULL on decoding error or memory allocation error. If *size* is not |
587 | | NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on |
588 | | decoding error. |
589 | | |
590 | | Decoding errors should never happen, unless there is a bug in the C |
591 | | library. |
592 | | |
593 | | Use the Py_EncodeLocale() function to encode the character string back to a |
594 | | byte string. */ |
595 | | wchar_t* |
596 | | Py_DecodeLocale(const char* arg, size_t *wlen) |
597 | 126 | { |
598 | 126 | wchar_t *wstr; |
599 | 126 | int res = _Py_DecodeLocaleEx(arg, &wstr, wlen, |
600 | 126 | NULL, 0, |
601 | 126 | _Py_ERROR_SURROGATEESCAPE); |
602 | 126 | if (res != 0) { |
603 | 0 | assert(res != -3); |
604 | 0 | if (wlen != NULL) { |
605 | 0 | *wlen = (size_t)res; |
606 | 0 | } |
607 | 0 | return NULL; |
608 | 0 | } |
609 | 126 | return wstr; |
610 | 126 | } |
611 | | |
612 | | |
613 | | static int |
614 | | encode_current_locale(const wchar_t *text, char **str, |
615 | | size_t *error_pos, const char **reason, |
616 | | int raw_malloc, _Py_error_handler errors) |
617 | 434 | { |
618 | 434 | const size_t len = wcslen(text); |
619 | 434 | char *result = NULL, *bytes = NULL; |
620 | 434 | size_t i, size, converted; |
621 | 434 | wchar_t c, buf[2]; |
622 | | |
623 | 434 | int surrogateescape; |
624 | 434 | if (get_surrogateescape(errors, &surrogateescape) < 0) { |
625 | 0 | return -3; |
626 | 0 | } |
627 | | |
628 | | /* The function works in two steps: |
629 | | 1. compute the length of the output buffer in bytes (size) |
630 | | 2. outputs the bytes */ |
631 | 434 | size = 0; |
632 | 434 | buf[1] = 0; |
633 | 868 | while (1) { |
634 | 59.6k | for (i=0; i < len; i++) { |
635 | 58.7k | c = text[i]; |
636 | 58.7k | if (c >= 0xdc80 && c <= 0xdcff) { |
637 | 0 | if (!surrogateescape) { |
638 | 0 | goto encode_error; |
639 | 0 | } |
640 | | /* UTF-8b surrogate */ |
641 | 0 | if (bytes != NULL) { |
642 | 0 | *bytes++ = c - 0xdc00; |
643 | 0 | size--; |
644 | 0 | } |
645 | 0 | else { |
646 | 0 | size++; |
647 | 0 | } |
648 | 0 | continue; |
649 | 0 | } |
650 | 58.7k | else { |
651 | 58.7k | buf[0] = c; |
652 | 58.7k | if (bytes != NULL) { |
653 | 29.3k | converted = wcstombs(bytes, buf, size); |
654 | 29.3k | } |
655 | 29.3k | else { |
656 | 29.3k | converted = wcstombs(NULL, buf, 0); |
657 | 29.3k | } |
658 | 58.7k | if (converted == (size_t)-1) { |
659 | 0 | goto encode_error; |
660 | 0 | } |
661 | 58.7k | if (bytes != NULL) { |
662 | 29.3k | bytes += converted; |
663 | 29.3k | size -= converted; |
664 | 29.3k | } |
665 | 29.3k | else { |
666 | 29.3k | size += converted; |
667 | 29.3k | } |
668 | 58.7k | } |
669 | 58.7k | } |
670 | 868 | if (result != NULL) { |
671 | 434 | *bytes = '\0'; |
672 | 434 | break; |
673 | 434 | } |
674 | | |
675 | 434 | size += 1; /* nul byte at the end */ |
676 | 434 | if (raw_malloc) { |
677 | 434 | result = PyMem_RawMalloc(size); |
678 | 434 | } |
679 | 0 | else { |
680 | 0 | result = PyMem_Malloc(size); |
681 | 0 | } |
682 | 434 | if (result == NULL) { |
683 | 0 | return -1; |
684 | 0 | } |
685 | 434 | bytes = result; |
686 | 434 | } |
687 | 434 | *str = result; |
688 | 434 | return 0; |
689 | | |
690 | 0 | encode_error: |
691 | 0 | if (raw_malloc) { |
692 | 0 | PyMem_RawFree(result); |
693 | 0 | } |
694 | 0 | else { |
695 | 0 | PyMem_Free(result); |
696 | 0 | } |
697 | 0 | if (error_pos != NULL) { |
698 | 0 | *error_pos = i; |
699 | 0 | } |
700 | 0 | if (reason) { |
701 | 0 | *reason = "encoding error"; |
702 | 0 | } |
703 | 0 | return -2; |
704 | 434 | } |
705 | | |
706 | | |
707 | | /* Encode a string to the locale encoding. |
708 | | |
709 | | Parameters: |
710 | | |
711 | | * raw_malloc: if non-zero, allocate memory using PyMem_RawMalloc() instead |
712 | | of PyMem_Malloc(). |
713 | | * current_locale: if non-zero, use the current LC_CTYPE, otherwise use |
714 | | Python filesystem encoding. |
715 | | * errors: error handler like "strict" or "surrogateescape". |
716 | | |
717 | | Return value: |
718 | | |
719 | | 0: success, *str is set to a newly allocated decoded string. |
720 | | -1: memory allocation failure |
721 | | -2: encoding error, set *error_pos and *reason (if set). |
722 | | -3: the error handler 'errors' is not supported. |
723 | | */ |
724 | | static int |
725 | | encode_locale_ex(const wchar_t *text, char **str, size_t *error_pos, |
726 | | const char **reason, |
727 | | int raw_malloc, int current_locale, _Py_error_handler errors) |
728 | 434 | { |
729 | 434 | if (current_locale) { |
730 | | #ifdef _Py_FORCE_UTF8_LOCALE |
731 | | return _Py_EncodeUTF8Ex(text, str, error_pos, reason, |
732 | | raw_malloc, errors); |
733 | | #else |
734 | 0 | return encode_current_locale(text, str, error_pos, reason, |
735 | 0 | raw_malloc, errors); |
736 | 0 | #endif |
737 | 0 | } |
738 | | |
739 | | #ifdef _Py_FORCE_UTF8_FS_ENCODING |
740 | | return _Py_EncodeUTF8Ex(text, str, error_pos, reason, |
741 | | raw_malloc, errors); |
742 | | #else |
743 | 434 | int use_utf8 = (Py_UTF8Mode == 1); |
744 | | #ifdef MS_WINDOWS |
745 | | use_utf8 |= !Py_LegacyWindowsFSEncodingFlag; |
746 | | #endif |
747 | 434 | if (use_utf8) { |
748 | 0 | return _Py_EncodeUTF8Ex(text, str, error_pos, reason, |
749 | 0 | raw_malloc, errors); |
750 | 0 | } |
751 | | |
752 | 434 | #ifdef USE_FORCE_ASCII |
753 | 434 | if (force_ascii == -1) { |
754 | 0 | force_ascii = check_force_ascii(); |
755 | 0 | } |
756 | | |
757 | 434 | if (force_ascii) { |
758 | 0 | return encode_ascii(text, str, error_pos, reason, |
759 | 0 | raw_malloc, errors); |
760 | 0 | } |
761 | 434 | #endif |
762 | | |
763 | 434 | return encode_current_locale(text, str, error_pos, reason, |
764 | 434 | raw_malloc, errors); |
765 | 434 | #endif /* _Py_FORCE_UTF8_FS_ENCODING */ |
766 | 434 | } |
767 | | |
768 | | static char* |
769 | | encode_locale(const wchar_t *text, size_t *error_pos, |
770 | | int raw_malloc, int current_locale) |
771 | 84 | { |
772 | 84 | char *str; |
773 | 84 | int res = encode_locale_ex(text, &str, error_pos, NULL, |
774 | 84 | raw_malloc, current_locale, |
775 | 84 | _Py_ERROR_SURROGATEESCAPE); |
776 | 84 | if (res != -2 && error_pos) { |
777 | 0 | *error_pos = (size_t)-1; |
778 | 0 | } |
779 | 84 | if (res != 0) { |
780 | 0 | return NULL; |
781 | 0 | } |
782 | 84 | return str; |
783 | 84 | } |
784 | | |
785 | | /* Encode a wide character string to the locale encoding with the |
786 | | surrogateescape error handler: surrogate characters in the range |
787 | | U+DC80..U+DCFF are converted to bytes 0x80..0xFF. |
788 | | |
789 | | Return a pointer to a newly allocated byte string, use PyMem_Free() to free |
790 | | the memory. Return NULL on encoding or memory allocation error. |
791 | | |
792 | | If error_pos is not NULL, *error_pos is set to (size_t)-1 on success, or set |
793 | | to the index of the invalid character on encoding error. |
794 | | |
795 | | Use the Py_DecodeLocale() function to decode the bytes string back to a wide |
796 | | character string. */ |
797 | | char* |
798 | | Py_EncodeLocale(const wchar_t *text, size_t *error_pos) |
799 | 0 | { |
800 | 0 | return encode_locale(text, error_pos, 0, 0); |
801 | 0 | } |
802 | | |
803 | | |
804 | | /* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree() |
805 | | instead of PyMem_Free(). */ |
806 | | char* |
807 | | _Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos) |
808 | 84 | { |
809 | 84 | return encode_locale(text, error_pos, 1, 0); |
810 | 84 | } |
811 | | |
812 | | |
813 | | int |
814 | | _Py_EncodeLocaleEx(const wchar_t *text, char **str, |
815 | | size_t *error_pos, const char **reason, |
816 | | int current_locale, _Py_error_handler errors) |
817 | 350 | { |
818 | 350 | return encode_locale_ex(text, str, error_pos, reason, 1, |
819 | 350 | current_locale, errors); |
820 | 350 | } |
821 | | |
822 | | |
823 | | #ifdef MS_WINDOWS |
824 | | static __int64 secs_between_epochs = 11644473600; /* Seconds between 1.1.1601 and 1.1.1970 */ |
825 | | |
826 | | static void |
827 | | FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out) |
828 | | { |
829 | | /* XXX endianness. Shouldn't matter, as all Windows implementations are little-endian */ |
830 | | /* Cannot simply cast and dereference in_ptr, |
831 | | since it might not be aligned properly */ |
832 | | __int64 in; |
833 | | memcpy(&in, in_ptr, sizeof(in)); |
834 | | *nsec_out = (int)(in % 10000000) * 100; /* FILETIME is in units of 100 nsec. */ |
835 | | *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t); |
836 | | } |
837 | | |
838 | | void |
839 | | _Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr) |
840 | | { |
841 | | /* XXX endianness */ |
842 | | __int64 out; |
843 | | out = time_in + secs_between_epochs; |
844 | | out = out * 10000000 + nsec_in / 100; |
845 | | memcpy(out_ptr, &out, sizeof(out)); |
846 | | } |
847 | | |
848 | | /* Below, we *know* that ugo+r is 0444 */ |
849 | | #if _S_IREAD != 0400 |
850 | | #error Unsupported C library |
851 | | #endif |
852 | | static int |
853 | | attributes_to_mode(DWORD attr) |
854 | | { |
855 | | int m = 0; |
856 | | if (attr & FILE_ATTRIBUTE_DIRECTORY) |
857 | | m |= _S_IFDIR | 0111; /* IFEXEC for user,group,other */ |
858 | | else |
859 | | m |= _S_IFREG; |
860 | | if (attr & FILE_ATTRIBUTE_READONLY) |
861 | | m |= 0444; |
862 | | else |
863 | | m |= 0666; |
864 | | return m; |
865 | | } |
866 | | |
867 | | void |
868 | | _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag, |
869 | | struct _Py_stat_struct *result) |
870 | | { |
871 | | memset(result, 0, sizeof(*result)); |
872 | | result->st_mode = attributes_to_mode(info->dwFileAttributes); |
873 | | result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow; |
874 | | result->st_dev = info->dwVolumeSerialNumber; |
875 | | result->st_rdev = result->st_dev; |
876 | | FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec); |
877 | | FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec); |
878 | | FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec); |
879 | | result->st_nlink = info->nNumberOfLinks; |
880 | | result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow; |
881 | | /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will |
882 | | open other name surrogate reparse points without traversing them. To |
883 | | detect/handle these, check st_file_attributes and st_reparse_tag. */ |
884 | | result->st_reparse_tag = reparse_tag; |
885 | | if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT && |
886 | | reparse_tag == IO_REPARSE_TAG_SYMLINK) { |
887 | | /* first clear the S_IFMT bits */ |
888 | | result->st_mode ^= (result->st_mode & S_IFMT); |
889 | | /* now set the bits that make this a symlink */ |
890 | | result->st_mode |= S_IFLNK; |
891 | | } |
892 | | result->st_file_attributes = info->dwFileAttributes; |
893 | | } |
894 | | #endif |
895 | | |
896 | | /* Return information about a file. |
897 | | |
898 | | On POSIX, use fstat(). |
899 | | |
900 | | On Windows, use GetFileType() and GetFileInformationByHandle() which support |
901 | | files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger |
902 | | than 2 GiB because the file size type is a signed 32-bit integer: see issue |
903 | | #23152. |
904 | | |
905 | | On Windows, set the last Windows error and return nonzero on error. On |
906 | | POSIX, set errno and return nonzero on error. Fill status and return 0 on |
907 | | success. */ |
908 | | int |
909 | | _Py_fstat_noraise(int fd, struct _Py_stat_struct *status) |
910 | 527 | { |
911 | | #ifdef MS_WINDOWS |
912 | | BY_HANDLE_FILE_INFORMATION info; |
913 | | HANDLE h; |
914 | | int type; |
915 | | |
916 | | _Py_BEGIN_SUPPRESS_IPH |
917 | | h = (HANDLE)_get_osfhandle(fd); |
918 | | _Py_END_SUPPRESS_IPH |
919 | | |
920 | | if (h == INVALID_HANDLE_VALUE) { |
921 | | /* errno is already set by _get_osfhandle, but we also set |
922 | | the Win32 error for callers who expect that */ |
923 | | SetLastError(ERROR_INVALID_HANDLE); |
924 | | return -1; |
925 | | } |
926 | | memset(status, 0, sizeof(*status)); |
927 | | |
928 | | type = GetFileType(h); |
929 | | if (type == FILE_TYPE_UNKNOWN) { |
930 | | DWORD error = GetLastError(); |
931 | | if (error != 0) { |
932 | | errno = winerror_to_errno(error); |
933 | | return -1; |
934 | | } |
935 | | /* else: valid but unknown file */ |
936 | | } |
937 | | |
938 | | if (type != FILE_TYPE_DISK) { |
939 | | if (type == FILE_TYPE_CHAR) |
940 | | status->st_mode = _S_IFCHR; |
941 | | else if (type == FILE_TYPE_PIPE) |
942 | | status->st_mode = _S_IFIFO; |
943 | | return 0; |
944 | | } |
945 | | |
946 | | if (!GetFileInformationByHandle(h, &info)) { |
947 | | /* The Win32 error is already set, but we also set errno for |
948 | | callers who expect it */ |
949 | | errno = winerror_to_errno(GetLastError()); |
950 | | return -1; |
951 | | } |
952 | | |
953 | | _Py_attribute_data_to_stat(&info, 0, status); |
954 | | /* specific to fstat() */ |
955 | | status->st_ino = (((uint64_t)info.nFileIndexHigh) << 32) + info.nFileIndexLow; |
956 | | return 0; |
957 | | #else |
958 | 527 | return fstat(fd, status); |
959 | 527 | #endif |
960 | 527 | } |
961 | | |
962 | | /* Return information about a file. |
963 | | |
964 | | On POSIX, use fstat(). |
965 | | |
966 | | On Windows, use GetFileType() and GetFileInformationByHandle() which support |
967 | | files larger than 2 GiB. fstat() may fail with EOVERFLOW on files larger |
968 | | than 2 GiB because the file size type is a signed 32-bit integer: see issue |
969 | | #23152. |
970 | | |
971 | | Raise an exception and return -1 on error. On Windows, set the last Windows |
972 | | error on error. On POSIX, set errno on error. Fill status and return 0 on |
973 | | success. |
974 | | |
975 | | Release the GIL to call GetFileType() and GetFileInformationByHandle(), or |
976 | | to call fstat(). The caller must hold the GIL. */ |
977 | | int |
978 | | _Py_fstat(int fd, struct _Py_stat_struct *status) |
979 | 0 | { |
980 | 0 | int res; |
981 | |
|
982 | 0 | assert(PyGILState_Check()); |
983 | |
|
984 | 0 | Py_BEGIN_ALLOW_THREADS |
985 | 0 | res = _Py_fstat_noraise(fd, status); |
986 | 0 | Py_END_ALLOW_THREADS |
987 | |
|
988 | 0 | if (res != 0) { |
989 | | #ifdef MS_WINDOWS |
990 | | PyErr_SetFromWindowsErr(0); |
991 | | #else |
992 | 0 | PyErr_SetFromErrno(PyExc_OSError); |
993 | 0 | #endif |
994 | 0 | return -1; |
995 | 0 | } |
996 | 0 | return 0; |
997 | 0 | } |
998 | | |
999 | | /* Call _wstat() on Windows, or encode the path to the filesystem encoding and |
1000 | | call stat() otherwise. Only fill st_mode attribute on Windows. |
1001 | | |
1002 | | Return 0 on success, -1 on _wstat() / stat() error, -2 if an exception was |
1003 | | raised. */ |
1004 | | |
1005 | | int |
1006 | | _Py_stat(PyObject *path, struct stat *statbuf) |
1007 | 0 | { |
1008 | | #ifdef MS_WINDOWS |
1009 | | int err; |
1010 | | struct _stat wstatbuf; |
1011 | | const wchar_t *wpath; |
1012 | | |
1013 | | wpath = _PyUnicode_AsUnicode(path); |
1014 | | if (wpath == NULL) |
1015 | | return -2; |
1016 | | |
1017 | | err = _wstat(wpath, &wstatbuf); |
1018 | | if (!err) |
1019 | | statbuf->st_mode = wstatbuf.st_mode; |
1020 | | return err; |
1021 | | #else |
1022 | 0 | int ret; |
1023 | 0 | PyObject *bytes; |
1024 | 0 | char *cpath; |
1025 | |
|
1026 | 0 | bytes = PyUnicode_EncodeFSDefault(path); |
1027 | 0 | if (bytes == NULL) |
1028 | 0 | return -2; |
1029 | | |
1030 | | /* check for embedded null bytes */ |
1031 | 0 | if (PyBytes_AsStringAndSize(bytes, &cpath, NULL) == -1) { |
1032 | 0 | Py_DECREF(bytes); |
1033 | 0 | return -2; |
1034 | 0 | } |
1035 | | |
1036 | 0 | ret = stat(cpath, statbuf); |
1037 | 0 | Py_DECREF(bytes); |
1038 | 0 | return ret; |
1039 | 0 | #endif |
1040 | 0 | } |
1041 | | |
1042 | | |
1043 | | /* This function MUST be kept async-signal-safe on POSIX when raise=0. */ |
1044 | | static int |
1045 | | get_inheritable(int fd, int raise) |
1046 | 14 | { |
1047 | | #ifdef MS_WINDOWS |
1048 | | HANDLE handle; |
1049 | | DWORD flags; |
1050 | | |
1051 | | _Py_BEGIN_SUPPRESS_IPH |
1052 | | handle = (HANDLE)_get_osfhandle(fd); |
1053 | | _Py_END_SUPPRESS_IPH |
1054 | | if (handle == INVALID_HANDLE_VALUE) { |
1055 | | if (raise) |
1056 | | PyErr_SetFromErrno(PyExc_OSError); |
1057 | | return -1; |
1058 | | } |
1059 | | |
1060 | | if (!GetHandleInformation(handle, &flags)) { |
1061 | | if (raise) |
1062 | | PyErr_SetFromWindowsErr(0); |
1063 | | return -1; |
1064 | | } |
1065 | | |
1066 | | return (flags & HANDLE_FLAG_INHERIT); |
1067 | | #else |
1068 | 14 | int flags; |
1069 | | |
1070 | 14 | flags = fcntl(fd, F_GETFD, 0); |
1071 | 14 | if (flags == -1) { |
1072 | 0 | if (raise) |
1073 | 0 | PyErr_SetFromErrno(PyExc_OSError); |
1074 | 0 | return -1; |
1075 | 0 | } |
1076 | 14 | return !(flags & FD_CLOEXEC); |
1077 | 14 | #endif |
1078 | 14 | } |
1079 | | |
1080 | | /* Get the inheritable flag of the specified file descriptor. |
1081 | | Return 1 if the file descriptor can be inherited, 0 if it cannot, |
1082 | | raise an exception and return -1 on error. */ |
1083 | | int |
1084 | | _Py_get_inheritable(int fd) |
1085 | 0 | { |
1086 | 0 | return get_inheritable(fd, 1); |
1087 | 0 | } |
1088 | | |
1089 | | |
1090 | | /* This function MUST be kept async-signal-safe on POSIX when raise=0. */ |
1091 | | static int |
1092 | | set_inheritable(int fd, int inheritable, int raise, int *atomic_flag_works) |
1093 | 236 | { |
1094 | | #ifdef MS_WINDOWS |
1095 | | HANDLE handle; |
1096 | | DWORD flags; |
1097 | | #else |
1098 | 236 | #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX) |
1099 | 236 | static int ioctl_works = -1; |
1100 | 236 | int request; |
1101 | 236 | int err; |
1102 | 236 | #endif |
1103 | 236 | int flags, new_flags; |
1104 | 236 | int res; |
1105 | 236 | #endif |
1106 | | |
1107 | | /* atomic_flag_works can only be used to make the file descriptor |
1108 | | non-inheritable */ |
1109 | 236 | assert(!(atomic_flag_works != NULL && inheritable)); |
1110 | | |
1111 | 236 | if (atomic_flag_works != NULL && !inheritable) { |
1112 | 236 | if (*atomic_flag_works == -1) { |
1113 | 14 | int isInheritable = get_inheritable(fd, raise); |
1114 | 14 | if (isInheritable == -1) |
1115 | 0 | return -1; |
1116 | 14 | *atomic_flag_works = !isInheritable; |
1117 | 14 | } |
1118 | | |
1119 | 236 | if (*atomic_flag_works) |
1120 | 236 | return 0; |
1121 | 236 | } |
1122 | | |
1123 | | #ifdef MS_WINDOWS |
1124 | | _Py_BEGIN_SUPPRESS_IPH |
1125 | | handle = (HANDLE)_get_osfhandle(fd); |
1126 | | _Py_END_SUPPRESS_IPH |
1127 | | if (handle == INVALID_HANDLE_VALUE) { |
1128 | | if (raise) |
1129 | | PyErr_SetFromErrno(PyExc_OSError); |
1130 | | return -1; |
1131 | | } |
1132 | | |
1133 | | if (inheritable) |
1134 | | flags = HANDLE_FLAG_INHERIT; |
1135 | | else |
1136 | | flags = 0; |
1137 | | |
1138 | | /* This check can be removed once support for Windows 7 ends. */ |
1139 | | #define CONSOLE_PSEUDOHANDLE(handle) (((ULONG_PTR)(handle) & 0x3) == 0x3 && \ |
1140 | | GetFileType(handle) == FILE_TYPE_CHAR) |
1141 | | |
1142 | | if (!CONSOLE_PSEUDOHANDLE(handle) && |
1143 | | !SetHandleInformation(handle, HANDLE_FLAG_INHERIT, flags)) { |
1144 | | if (raise) |
1145 | | PyErr_SetFromWindowsErr(0); |
1146 | | return -1; |
1147 | | } |
1148 | | #undef CONSOLE_PSEUDOHANDLE |
1149 | | return 0; |
1150 | | |
1151 | | #else |
1152 | | |
1153 | 0 | #if defined(HAVE_SYS_IOCTL_H) && defined(FIOCLEX) && defined(FIONCLEX) |
1154 | 0 | if (ioctl_works != 0 && raise != 0) { |
1155 | | /* fast-path: ioctl() only requires one syscall */ |
1156 | | /* caveat: raise=0 is an indicator that we must be async-signal-safe |
1157 | | * thus avoid using ioctl() so we skip the fast-path. */ |
1158 | 0 | if (inheritable) |
1159 | 0 | request = FIONCLEX; |
1160 | 0 | else |
1161 | 0 | request = FIOCLEX; |
1162 | 0 | err = ioctl(fd, request, NULL); |
1163 | 0 | if (!err) { |
1164 | 0 | ioctl_works = 1; |
1165 | 0 | return 0; |
1166 | 0 | } |
1167 | | |
1168 | 0 | if (errno != ENOTTY && errno != EACCES) { |
1169 | 0 | if (raise) |
1170 | 0 | PyErr_SetFromErrno(PyExc_OSError); |
1171 | 0 | return -1; |
1172 | 0 | } |
1173 | 0 | else { |
1174 | | /* Issue #22258: Here, ENOTTY means "Inappropriate ioctl for |
1175 | | device". The ioctl is declared but not supported by the kernel. |
1176 | | Remember that ioctl() doesn't work. It is the case on |
1177 | | Illumos-based OS for example. |
1178 | | |
1179 | | Issue #27057: When SELinux policy disallows ioctl it will fail |
1180 | | with EACCES. While FIOCLEX is safe operation it may be |
1181 | | unavailable because ioctl was denied altogether. |
1182 | | This can be the case on Android. */ |
1183 | 0 | ioctl_works = 0; |
1184 | 0 | } |
1185 | | /* fallback to fcntl() if ioctl() does not work */ |
1186 | 0 | } |
1187 | 0 | #endif |
1188 | | |
1189 | | /* slow-path: fcntl() requires two syscalls */ |
1190 | 0 | flags = fcntl(fd, F_GETFD); |
1191 | 0 | if (flags < 0) { |
1192 | 0 | if (raise) |
1193 | 0 | PyErr_SetFromErrno(PyExc_OSError); |
1194 | 0 | return -1; |
1195 | 0 | } |
1196 | | |
1197 | 0 | if (inheritable) { |
1198 | 0 | new_flags = flags & ~FD_CLOEXEC; |
1199 | 0 | } |
1200 | 0 | else { |
1201 | 0 | new_flags = flags | FD_CLOEXEC; |
1202 | 0 | } |
1203 | |
|
1204 | 0 | if (new_flags == flags) { |
1205 | | /* FD_CLOEXEC flag already set/cleared: nothing to do */ |
1206 | 0 | return 0; |
1207 | 0 | } |
1208 | | |
1209 | 0 | res = fcntl(fd, F_SETFD, new_flags); |
1210 | 0 | if (res < 0) { |
1211 | 0 | if (raise) |
1212 | 0 | PyErr_SetFromErrno(PyExc_OSError); |
1213 | 0 | return -1; |
1214 | 0 | } |
1215 | 0 | return 0; |
1216 | 0 | #endif |
1217 | 0 | } |
1218 | | |
1219 | | /* Make the file descriptor non-inheritable. |
1220 | | Return 0 on success, set errno and return -1 on error. */ |
1221 | | static int |
1222 | | make_non_inheritable(int fd) |
1223 | 0 | { |
1224 | 0 | return set_inheritable(fd, 0, 0, NULL); |
1225 | 0 | } |
1226 | | |
1227 | | /* Set the inheritable flag of the specified file descriptor. |
1228 | | On success: return 0, on error: raise an exception and return -1. |
1229 | | |
1230 | | If atomic_flag_works is not NULL: |
1231 | | |
1232 | | * if *atomic_flag_works==-1, check if the inheritable is set on the file |
1233 | | descriptor: if yes, set *atomic_flag_works to 1, otherwise set to 0 and |
1234 | | set the inheritable flag |
1235 | | * if *atomic_flag_works==1: do nothing |
1236 | | * if *atomic_flag_works==0: set inheritable flag to False |
1237 | | |
1238 | | Set atomic_flag_works to NULL if no atomic flag was used to create the |
1239 | | file descriptor. |
1240 | | |
1241 | | atomic_flag_works can only be used to make a file descriptor |
1242 | | non-inheritable: atomic_flag_works must be NULL if inheritable=1. */ |
1243 | | int |
1244 | | _Py_set_inheritable(int fd, int inheritable, int *atomic_flag_works) |
1245 | 236 | { |
1246 | 236 | return set_inheritable(fd, inheritable, 1, atomic_flag_works); |
1247 | 236 | } |
1248 | | |
1249 | | /* Same as _Py_set_inheritable() but on error, set errno and |
1250 | | don't raise an exception. |
1251 | | This function is async-signal-safe. */ |
1252 | | int |
1253 | | _Py_set_inheritable_async_safe(int fd, int inheritable, int *atomic_flag_works) |
1254 | 0 | { |
1255 | 0 | return set_inheritable(fd, inheritable, 0, atomic_flag_works); |
1256 | 0 | } |
1257 | | |
1258 | | static int |
1259 | | _Py_open_impl(const char *pathname, int flags, int gil_held) |
1260 | 0 | { |
1261 | 0 | int fd; |
1262 | 0 | int async_err = 0; |
1263 | 0 | #ifndef MS_WINDOWS |
1264 | 0 | int *atomic_flag_works; |
1265 | 0 | #endif |
1266 | |
|
1267 | | #ifdef MS_WINDOWS |
1268 | | flags |= O_NOINHERIT; |
1269 | | #elif defined(O_CLOEXEC) |
1270 | | atomic_flag_works = &_Py_open_cloexec_works; |
1271 | 0 | flags |= O_CLOEXEC; |
1272 | | #else |
1273 | | atomic_flag_works = NULL; |
1274 | | #endif |
1275 | |
|
1276 | 0 | if (gil_held) { |
1277 | 0 | if (PySys_Audit("open", "sOi", pathname, Py_None, flags) < 0) { |
1278 | 0 | return -1; |
1279 | 0 | } |
1280 | | |
1281 | 0 | do { |
1282 | 0 | Py_BEGIN_ALLOW_THREADS |
1283 | 0 | fd = open(pathname, flags); |
1284 | 0 | Py_END_ALLOW_THREADS |
1285 | 0 | } while (fd < 0 |
1286 | 0 | && errno == EINTR && !(async_err = PyErr_CheckSignals())); |
1287 | 0 | if (async_err) |
1288 | 0 | return -1; |
1289 | 0 | if (fd < 0) { |
1290 | 0 | PyErr_SetFromErrnoWithFilename(PyExc_OSError, pathname); |
1291 | 0 | return -1; |
1292 | 0 | } |
1293 | 0 | } |
1294 | 0 | else { |
1295 | 0 | fd = open(pathname, flags); |
1296 | 0 | if (fd < 0) |
1297 | 0 | return -1; |
1298 | 0 | } |
1299 | | |
1300 | 0 | #ifndef MS_WINDOWS |
1301 | 0 | if (set_inheritable(fd, 0, gil_held, atomic_flag_works) < 0) { |
1302 | 0 | close(fd); |
1303 | 0 | return -1; |
1304 | 0 | } |
1305 | 0 | #endif |
1306 | | |
1307 | 0 | return fd; |
1308 | 0 | } |
1309 | | |
1310 | | /* Open a file with the specified flags (wrapper to open() function). |
1311 | | Return a file descriptor on success. Raise an exception and return -1 on |
1312 | | error. |
1313 | | |
1314 | | The file descriptor is created non-inheritable. |
1315 | | |
1316 | | When interrupted by a signal (open() fails with EINTR), retry the syscall, |
1317 | | except if the Python signal handler raises an exception. |
1318 | | |
1319 | | Release the GIL to call open(). The caller must hold the GIL. */ |
1320 | | int |
1321 | | _Py_open(const char *pathname, int flags) |
1322 | 0 | { |
1323 | | /* _Py_open() must be called with the GIL held. */ |
1324 | 0 | assert(PyGILState_Check()); |
1325 | 0 | return _Py_open_impl(pathname, flags, 1); |
1326 | 0 | } |
1327 | | |
1328 | | /* Open a file with the specified flags (wrapper to open() function). |
1329 | | Return a file descriptor on success. Set errno and return -1 on error. |
1330 | | |
1331 | | The file descriptor is created non-inheritable. |
1332 | | |
1333 | | If interrupted by a signal, fail with EINTR. */ |
1334 | | int |
1335 | | _Py_open_noraise(const char *pathname, int flags) |
1336 | 0 | { |
1337 | 0 | return _Py_open_impl(pathname, flags, 0); |
1338 | 0 | } |
1339 | | |
1340 | | /* Open a file. Use _wfopen() on Windows, encode the path to the locale |
1341 | | encoding and use fopen() otherwise. |
1342 | | |
1343 | | The file descriptor is created non-inheritable. |
1344 | | |
1345 | | If interrupted by a signal, fail with EINTR. */ |
1346 | | FILE * |
1347 | | _Py_wfopen(const wchar_t *path, const wchar_t *mode) |
1348 | 28 | { |
1349 | 28 | FILE *f; |
1350 | 28 | if (PySys_Audit("open", "uui", path, mode, 0) < 0) { |
1351 | 0 | return NULL; |
1352 | 0 | } |
1353 | 28 | #ifndef MS_WINDOWS |
1354 | 28 | char *cpath; |
1355 | 28 | char cmode[10]; |
1356 | 28 | size_t r; |
1357 | 28 | r = wcstombs(cmode, mode, 10); |
1358 | 28 | if (r == (size_t)-1 || r >= 10) { |
1359 | 0 | errno = EINVAL; |
1360 | 0 | return NULL; |
1361 | 0 | } |
1362 | 28 | cpath = _Py_EncodeLocaleRaw(path, NULL); |
1363 | 28 | if (cpath == NULL) { |
1364 | 0 | return NULL; |
1365 | 0 | } |
1366 | 28 | f = fopen(cpath, cmode); |
1367 | 28 | PyMem_RawFree(cpath); |
1368 | | #else |
1369 | | f = _wfopen(path, mode); |
1370 | | #endif |
1371 | 28 | if (f == NULL) |
1372 | 28 | return NULL; |
1373 | 0 | if (make_non_inheritable(fileno(f)) < 0) { |
1374 | 0 | fclose(f); |
1375 | 0 | return NULL; |
1376 | 0 | } |
1377 | 0 | return f; |
1378 | 0 | } |
1379 | | |
1380 | | /* Wrapper to fopen(). |
1381 | | |
1382 | | The file descriptor is created non-inheritable. |
1383 | | |
1384 | | If interrupted by a signal, fail with EINTR. */ |
1385 | | FILE* |
1386 | | _Py_fopen(const char *pathname, const char *mode) |
1387 | 0 | { |
1388 | 0 | if (PySys_Audit("open", "ssi", pathname, mode, 0) < 0) { |
1389 | 0 | return NULL; |
1390 | 0 | } |
1391 | | |
1392 | 0 | FILE *f = fopen(pathname, mode); |
1393 | 0 | if (f == NULL) |
1394 | 0 | return NULL; |
1395 | 0 | if (make_non_inheritable(fileno(f)) < 0) { |
1396 | 0 | fclose(f); |
1397 | 0 | return NULL; |
1398 | 0 | } |
1399 | 0 | return f; |
1400 | 0 | } |
1401 | | |
1402 | | /* Open a file. Call _wfopen() on Windows, or encode the path to the filesystem |
1403 | | encoding and call fopen() otherwise. |
1404 | | |
1405 | | Return the new file object on success. Raise an exception and return NULL |
1406 | | on error. |
1407 | | |
1408 | | The file descriptor is created non-inheritable. |
1409 | | |
1410 | | When interrupted by a signal (open() fails with EINTR), retry the syscall, |
1411 | | except if the Python signal handler raises an exception. |
1412 | | |
1413 | | Release the GIL to call _wfopen() or fopen(). The caller must hold |
1414 | | the GIL. */ |
1415 | | FILE* |
1416 | | _Py_fopen_obj(PyObject *path, const char *mode) |
1417 | 0 | { |
1418 | 0 | FILE *f; |
1419 | 0 | int async_err = 0; |
1420 | | #ifdef MS_WINDOWS |
1421 | | const wchar_t *wpath; |
1422 | | wchar_t wmode[10]; |
1423 | | int usize; |
1424 | | |
1425 | | assert(PyGILState_Check()); |
1426 | | |
1427 | | if (PySys_Audit("open", "Osi", path, mode, 0) < 0) { |
1428 | | return NULL; |
1429 | | } |
1430 | | if (!PyUnicode_Check(path)) { |
1431 | | PyErr_Format(PyExc_TypeError, |
1432 | | "str file path expected under Windows, got %R", |
1433 | | Py_TYPE(path)); |
1434 | | return NULL; |
1435 | | } |
1436 | | wpath = _PyUnicode_AsUnicode(path); |
1437 | | if (wpath == NULL) |
1438 | | return NULL; |
1439 | | |
1440 | | usize = MultiByteToWideChar(CP_ACP, 0, mode, -1, |
1441 | | wmode, Py_ARRAY_LENGTH(wmode)); |
1442 | | if (usize == 0) { |
1443 | | PyErr_SetFromWindowsErr(0); |
1444 | | return NULL; |
1445 | | } |
1446 | | |
1447 | | do { |
1448 | | Py_BEGIN_ALLOW_THREADS |
1449 | | f = _wfopen(wpath, wmode); |
1450 | | Py_END_ALLOW_THREADS |
1451 | | } while (f == NULL |
1452 | | && errno == EINTR && !(async_err = PyErr_CheckSignals())); |
1453 | | #else |
1454 | 0 | PyObject *bytes; |
1455 | 0 | char *path_bytes; |
1456 | |
|
1457 | 0 | assert(PyGILState_Check()); |
1458 | |
|
1459 | 0 | if (!PyUnicode_FSConverter(path, &bytes)) |
1460 | 0 | return NULL; |
1461 | 0 | path_bytes = PyBytes_AS_STRING(bytes); |
1462 | |
|
1463 | 0 | if (PySys_Audit("open", "Osi", path, mode, 0) < 0) { |
1464 | 0 | return NULL; |
1465 | 0 | } |
1466 | | |
1467 | 0 | do { |
1468 | 0 | Py_BEGIN_ALLOW_THREADS |
1469 | 0 | f = fopen(path_bytes, mode); |
1470 | 0 | Py_END_ALLOW_THREADS |
1471 | 0 | } while (f == NULL |
1472 | 0 | && errno == EINTR && !(async_err = PyErr_CheckSignals())); |
1473 | |
|
1474 | 0 | Py_DECREF(bytes); |
1475 | 0 | #endif |
1476 | 0 | if (async_err) |
1477 | 0 | return NULL; |
1478 | | |
1479 | 0 | if (f == NULL) { |
1480 | 0 | PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, path); |
1481 | 0 | return NULL; |
1482 | 0 | } |
1483 | | |
1484 | 0 | if (set_inheritable(fileno(f), 0, 1, NULL) < 0) { |
1485 | 0 | fclose(f); |
1486 | 0 | return NULL; |
1487 | 0 | } |
1488 | 0 | return f; |
1489 | 0 | } |
1490 | | |
1491 | | /* Read count bytes from fd into buf. |
1492 | | |
1493 | | On success, return the number of read bytes, it can be lower than count. |
1494 | | If the current file offset is at or past the end of file, no bytes are read, |
1495 | | and read() returns zero. |
1496 | | |
1497 | | On error, raise an exception, set errno and return -1. |
1498 | | |
1499 | | When interrupted by a signal (read() fails with EINTR), retry the syscall. |
1500 | | If the Python signal handler raises an exception, the function returns -1 |
1501 | | (the syscall is not retried). |
1502 | | |
1503 | | Release the GIL to call read(). The caller must hold the GIL. */ |
1504 | | Py_ssize_t |
1505 | | _Py_read(int fd, void *buf, size_t count) |
1506 | 473 | { |
1507 | 473 | Py_ssize_t n; |
1508 | 473 | int err; |
1509 | 473 | int async_err = 0; |
1510 | | |
1511 | 473 | assert(PyGILState_Check()); |
1512 | | |
1513 | | /* _Py_read() must not be called with an exception set, otherwise the |
1514 | | * caller may think that read() was interrupted by a signal and the signal |
1515 | | * handler raised an exception. */ |
1516 | 473 | assert(!PyErr_Occurred()); |
1517 | | |
1518 | 473 | if (count > _PY_READ_MAX) { |
1519 | 0 | count = _PY_READ_MAX; |
1520 | 0 | } |
1521 | | |
1522 | 473 | _Py_BEGIN_SUPPRESS_IPH |
1523 | 473 | do { |
1524 | 473 | Py_BEGIN_ALLOW_THREADS |
1525 | 473 | errno = 0; |
1526 | | #ifdef MS_WINDOWS |
1527 | | n = read(fd, buf, (int)count); |
1528 | | #else |
1529 | 473 | n = read(fd, buf, count); |
1530 | 473 | #endif |
1531 | | /* save/restore errno because PyErr_CheckSignals() |
1532 | | * and PyErr_SetFromErrno() can modify it */ |
1533 | 473 | err = errno; |
1534 | 473 | Py_END_ALLOW_THREADS |
1535 | 473 | } while (n < 0 && err == EINTR && |
1536 | 473 | !(async_err = PyErr_CheckSignals())); |
1537 | 473 | _Py_END_SUPPRESS_IPH |
1538 | | |
1539 | 473 | if (async_err) { |
1540 | | /* read() was interrupted by a signal (failed with EINTR) |
1541 | | * and the Python signal handler raised an exception */ |
1542 | 0 | errno = err; |
1543 | 0 | assert(errno == EINTR && PyErr_Occurred()); |
1544 | 0 | return -1; |
1545 | 0 | } |
1546 | 473 | if (n < 0) { |
1547 | 0 | PyErr_SetFromErrno(PyExc_OSError); |
1548 | 0 | errno = err; |
1549 | 0 | return -1; |
1550 | 0 | } |
1551 | | |
1552 | 473 | return n; |
1553 | 473 | } |
1554 | | |
1555 | | static Py_ssize_t |
1556 | | _Py_write_impl(int fd, const void *buf, size_t count, int gil_held) |
1557 | 14 | { |
1558 | 14 | Py_ssize_t n; |
1559 | 14 | int err; |
1560 | 14 | int async_err = 0; |
1561 | | |
1562 | 14 | _Py_BEGIN_SUPPRESS_IPH |
1563 | | #ifdef MS_WINDOWS |
1564 | | if (count > 32767 && isatty(fd)) { |
1565 | | /* Issue #11395: the Windows console returns an error (12: not |
1566 | | enough space error) on writing into stdout if stdout mode is |
1567 | | binary and the length is greater than 66,000 bytes (or less, |
1568 | | depending on heap usage). */ |
1569 | | count = 32767; |
1570 | | } |
1571 | | #endif |
1572 | 14 | if (count > _PY_WRITE_MAX) { |
1573 | 0 | count = _PY_WRITE_MAX; |
1574 | 0 | } |
1575 | | |
1576 | 14 | if (gil_held) { |
1577 | 14 | do { |
1578 | 14 | Py_BEGIN_ALLOW_THREADS |
1579 | 14 | errno = 0; |
1580 | | #ifdef MS_WINDOWS |
1581 | | n = write(fd, buf, (int)count); |
1582 | | #else |
1583 | 14 | n = write(fd, buf, count); |
1584 | 14 | #endif |
1585 | | /* save/restore errno because PyErr_CheckSignals() |
1586 | | * and PyErr_SetFromErrno() can modify it */ |
1587 | 14 | err = errno; |
1588 | 14 | Py_END_ALLOW_THREADS |
1589 | 14 | } while (n < 0 && err == EINTR && |
1590 | 14 | !(async_err = PyErr_CheckSignals())); |
1591 | 14 | } |
1592 | 0 | else { |
1593 | 0 | do { |
1594 | 0 | errno = 0; |
1595 | | #ifdef MS_WINDOWS |
1596 | | n = write(fd, buf, (int)count); |
1597 | | #else |
1598 | 0 | n = write(fd, buf, count); |
1599 | 0 | #endif |
1600 | 0 | err = errno; |
1601 | 0 | } while (n < 0 && err == EINTR); |
1602 | 0 | } |
1603 | 14 | _Py_END_SUPPRESS_IPH |
1604 | | |
1605 | 14 | if (async_err) { |
1606 | | /* write() was interrupted by a signal (failed with EINTR) |
1607 | | and the Python signal handler raised an exception (if gil_held is |
1608 | | nonzero). */ |
1609 | 0 | errno = err; |
1610 | 0 | assert(errno == EINTR && (!gil_held || PyErr_Occurred())); |
1611 | 0 | return -1; |
1612 | 0 | } |
1613 | 14 | if (n < 0) { |
1614 | 0 | if (gil_held) |
1615 | 0 | PyErr_SetFromErrno(PyExc_OSError); |
1616 | 0 | errno = err; |
1617 | 0 | return -1; |
1618 | 0 | } |
1619 | | |
1620 | 14 | return n; |
1621 | 14 | } |
1622 | | |
1623 | | /* Write count bytes of buf into fd. |
1624 | | |
1625 | | On success, return the number of written bytes, it can be lower than count |
1626 | | including 0. On error, raise an exception, set errno and return -1. |
1627 | | |
1628 | | When interrupted by a signal (write() fails with EINTR), retry the syscall. |
1629 | | If the Python signal handler raises an exception, the function returns -1 |
1630 | | (the syscall is not retried). |
1631 | | |
1632 | | Release the GIL to call write(). The caller must hold the GIL. */ |
1633 | | Py_ssize_t |
1634 | | _Py_write(int fd, const void *buf, size_t count) |
1635 | 14 | { |
1636 | 14 | assert(PyGILState_Check()); |
1637 | | |
1638 | | /* _Py_write() must not be called with an exception set, otherwise the |
1639 | | * caller may think that write() was interrupted by a signal and the signal |
1640 | | * handler raised an exception. */ |
1641 | 14 | assert(!PyErr_Occurred()); |
1642 | | |
1643 | 14 | return _Py_write_impl(fd, buf, count, 1); |
1644 | 14 | } |
1645 | | |
1646 | | /* Write count bytes of buf into fd. |
1647 | | * |
1648 | | * On success, return the number of written bytes, it can be lower than count |
1649 | | * including 0. On error, set errno and return -1. |
1650 | | * |
1651 | | * When interrupted by a signal (write() fails with EINTR), retry the syscall |
1652 | | * without calling the Python signal handler. */ |
1653 | | Py_ssize_t |
1654 | | _Py_write_noraise(int fd, const void *buf, size_t count) |
1655 | 0 | { |
1656 | 0 | return _Py_write_impl(fd, buf, count, 0); |
1657 | 0 | } |
1658 | | |
1659 | | #ifdef HAVE_READLINK |
1660 | | |
1661 | | /* Read value of symbolic link. Encode the path to the locale encoding, decode |
1662 | | the result from the locale encoding. |
1663 | | |
1664 | | Return -1 on encoding error, on readlink() error, if the internal buffer is |
1665 | | too short, on decoding error, or if 'buf' is too short. */ |
1666 | | int |
1667 | | _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t buflen) |
1668 | 28 | { |
1669 | 28 | char *cpath; |
1670 | 28 | char cbuf[MAXPATHLEN]; |
1671 | 28 | wchar_t *wbuf; |
1672 | 28 | int res; |
1673 | 28 | size_t r1; |
1674 | | |
1675 | 28 | cpath = _Py_EncodeLocaleRaw(path, NULL); |
1676 | 28 | if (cpath == NULL) { |
1677 | 0 | errno = EINVAL; |
1678 | 0 | return -1; |
1679 | 0 | } |
1680 | 28 | res = (int)readlink(cpath, cbuf, Py_ARRAY_LENGTH(cbuf)); |
1681 | 28 | PyMem_RawFree(cpath); |
1682 | 28 | if (res == -1) |
1683 | 14 | return -1; |
1684 | 14 | if (res == Py_ARRAY_LENGTH(cbuf)) { |
1685 | 0 | errno = EINVAL; |
1686 | 0 | return -1; |
1687 | 0 | } |
1688 | 14 | cbuf[res] = '\0'; /* buf will be null terminated */ |
1689 | 14 | wbuf = Py_DecodeLocale(cbuf, &r1); |
1690 | 14 | if (wbuf == NULL) { |
1691 | 0 | errno = EINVAL; |
1692 | 0 | return -1; |
1693 | 0 | } |
1694 | | /* wbuf must have space to store the trailing NUL character */ |
1695 | 14 | if (buflen <= r1) { |
1696 | 0 | PyMem_RawFree(wbuf); |
1697 | 0 | errno = EINVAL; |
1698 | 0 | return -1; |
1699 | 0 | } |
1700 | 14 | wcsncpy(buf, wbuf, buflen); |
1701 | 14 | PyMem_RawFree(wbuf); |
1702 | 14 | return (int)r1; |
1703 | 14 | } |
1704 | | #endif |
1705 | | |
1706 | | #ifdef HAVE_REALPATH |
1707 | | |
1708 | | /* Return the canonicalized absolute pathname. Encode path to the locale |
1709 | | encoding, decode the result from the locale encoding. |
1710 | | |
1711 | | Return NULL on encoding error, realpath() error, decoding error |
1712 | | or if 'resolved_path' is too short. */ |
1713 | | wchar_t* |
1714 | | _Py_wrealpath(const wchar_t *path, |
1715 | | wchar_t *resolved_path, size_t resolved_path_len) |
1716 | 0 | { |
1717 | 0 | char *cpath; |
1718 | 0 | char cresolved_path[MAXPATHLEN]; |
1719 | 0 | wchar_t *wresolved_path; |
1720 | 0 | char *res; |
1721 | 0 | size_t r; |
1722 | 0 | cpath = _Py_EncodeLocaleRaw(path, NULL); |
1723 | 0 | if (cpath == NULL) { |
1724 | 0 | errno = EINVAL; |
1725 | 0 | return NULL; |
1726 | 0 | } |
1727 | 0 | res = realpath(cpath, cresolved_path); |
1728 | 0 | PyMem_RawFree(cpath); |
1729 | 0 | if (res == NULL) |
1730 | 0 | return NULL; |
1731 | | |
1732 | 0 | wresolved_path = Py_DecodeLocale(cresolved_path, &r); |
1733 | 0 | if (wresolved_path == NULL) { |
1734 | 0 | errno = EINVAL; |
1735 | 0 | return NULL; |
1736 | 0 | } |
1737 | | /* wresolved_path must have space to store the trailing NUL character */ |
1738 | 0 | if (resolved_path_len <= r) { |
1739 | 0 | PyMem_RawFree(wresolved_path); |
1740 | 0 | errno = EINVAL; |
1741 | 0 | return NULL; |
1742 | 0 | } |
1743 | 0 | wcsncpy(resolved_path, wresolved_path, resolved_path_len); |
1744 | 0 | PyMem_RawFree(wresolved_path); |
1745 | 0 | return resolved_path; |
1746 | 0 | } |
1747 | | #endif |
1748 | | |
1749 | | /* Get the current directory. buflen is the buffer size in wide characters |
1750 | | including the null character. Decode the path from the locale encoding. |
1751 | | |
1752 | | Return NULL on getcwd() error, on decoding error, or if 'buf' is |
1753 | | too short. */ |
1754 | | wchar_t* |
1755 | | _Py_wgetcwd(wchar_t *buf, size_t buflen) |
1756 | 0 | { |
1757 | | #ifdef MS_WINDOWS |
1758 | | int ibuflen = (int)Py_MIN(buflen, INT_MAX); |
1759 | | return _wgetcwd(buf, ibuflen); |
1760 | | #else |
1761 | 0 | char fname[MAXPATHLEN]; |
1762 | 0 | wchar_t *wname; |
1763 | 0 | size_t len; |
1764 | |
|
1765 | 0 | if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL) |
1766 | 0 | return NULL; |
1767 | 0 | wname = Py_DecodeLocale(fname, &len); |
1768 | 0 | if (wname == NULL) |
1769 | 0 | return NULL; |
1770 | | /* wname must have space to store the trailing NUL character */ |
1771 | 0 | if (buflen <= len) { |
1772 | 0 | PyMem_RawFree(wname); |
1773 | 0 | return NULL; |
1774 | 0 | } |
1775 | 0 | wcsncpy(buf, wname, buflen); |
1776 | 0 | PyMem_RawFree(wname); |
1777 | 0 | return buf; |
1778 | 0 | #endif |
1779 | 0 | } |
1780 | | |
1781 | | /* Duplicate a file descriptor. The new file descriptor is created as |
1782 | | non-inheritable. Return a new file descriptor on success, raise an OSError |
1783 | | exception and return -1 on error. |
1784 | | |
1785 | | The GIL is released to call dup(). The caller must hold the GIL. */ |
1786 | | int |
1787 | | _Py_dup(int fd) |
1788 | 0 | { |
1789 | | #ifdef MS_WINDOWS |
1790 | | HANDLE handle; |
1791 | | #endif |
1792 | |
|
1793 | 0 | assert(PyGILState_Check()); |
1794 | |
|
1795 | | #ifdef MS_WINDOWS |
1796 | | _Py_BEGIN_SUPPRESS_IPH |
1797 | | handle = (HANDLE)_get_osfhandle(fd); |
1798 | | _Py_END_SUPPRESS_IPH |
1799 | | if (handle == INVALID_HANDLE_VALUE) { |
1800 | | PyErr_SetFromErrno(PyExc_OSError); |
1801 | | return -1; |
1802 | | } |
1803 | | |
1804 | | Py_BEGIN_ALLOW_THREADS |
1805 | | _Py_BEGIN_SUPPRESS_IPH |
1806 | | fd = dup(fd); |
1807 | | _Py_END_SUPPRESS_IPH |
1808 | | Py_END_ALLOW_THREADS |
1809 | | if (fd < 0) { |
1810 | | PyErr_SetFromErrno(PyExc_OSError); |
1811 | | return -1; |
1812 | | } |
1813 | | |
1814 | | if (_Py_set_inheritable(fd, 0, NULL) < 0) { |
1815 | | _Py_BEGIN_SUPPRESS_IPH |
1816 | | close(fd); |
1817 | | _Py_END_SUPPRESS_IPH |
1818 | | return -1; |
1819 | | } |
1820 | | #elif defined(HAVE_FCNTL_H) && defined(F_DUPFD_CLOEXEC) |
1821 | 0 | Py_BEGIN_ALLOW_THREADS |
1822 | 0 | _Py_BEGIN_SUPPRESS_IPH |
1823 | 0 | fd = fcntl(fd, F_DUPFD_CLOEXEC, 0); |
1824 | 0 | _Py_END_SUPPRESS_IPH |
1825 | 0 | Py_END_ALLOW_THREADS |
1826 | 0 | if (fd < 0) { |
1827 | 0 | PyErr_SetFromErrno(PyExc_OSError); |
1828 | 0 | return -1; |
1829 | 0 | } |
1830 | | |
1831 | | #else |
1832 | | Py_BEGIN_ALLOW_THREADS |
1833 | | _Py_BEGIN_SUPPRESS_IPH |
1834 | | fd = dup(fd); |
1835 | | _Py_END_SUPPRESS_IPH |
1836 | | Py_END_ALLOW_THREADS |
1837 | | if (fd < 0) { |
1838 | | PyErr_SetFromErrno(PyExc_OSError); |
1839 | | return -1; |
1840 | | } |
1841 | | |
1842 | | if (_Py_set_inheritable(fd, 0, NULL) < 0) { |
1843 | | _Py_BEGIN_SUPPRESS_IPH |
1844 | | close(fd); |
1845 | | _Py_END_SUPPRESS_IPH |
1846 | | return -1; |
1847 | | } |
1848 | | #endif |
1849 | 0 | return fd; |
1850 | 0 | } |
1851 | | |
1852 | | #ifndef MS_WINDOWS |
1853 | | /* Get the blocking mode of the file descriptor. |
1854 | | Return 0 if the O_NONBLOCK flag is set, 1 if the flag is cleared, |
1855 | | raise an exception and return -1 on error. */ |
1856 | | int |
1857 | | _Py_get_blocking(int fd) |
1858 | 0 | { |
1859 | 0 | int flags; |
1860 | 0 | _Py_BEGIN_SUPPRESS_IPH |
1861 | 0 | flags = fcntl(fd, F_GETFL, 0); |
1862 | 0 | _Py_END_SUPPRESS_IPH |
1863 | 0 | if (flags < 0) { |
1864 | 0 | PyErr_SetFromErrno(PyExc_OSError); |
1865 | 0 | return -1; |
1866 | 0 | } |
1867 | | |
1868 | 0 | return !(flags & O_NONBLOCK); |
1869 | 0 | } |
1870 | | |
1871 | | /* Set the blocking mode of the specified file descriptor. |
1872 | | |
1873 | | Set the O_NONBLOCK flag if blocking is False, clear the O_NONBLOCK flag |
1874 | | otherwise. |
1875 | | |
1876 | | Return 0 on success, raise an exception and return -1 on error. */ |
1877 | | int |
1878 | | _Py_set_blocking(int fd, int blocking) |
1879 | 0 | { |
1880 | 0 | #if defined(HAVE_SYS_IOCTL_H) && defined(FIONBIO) |
1881 | 0 | int arg = !blocking; |
1882 | 0 | if (ioctl(fd, FIONBIO, &arg) < 0) |
1883 | 0 | goto error; |
1884 | | #else |
1885 | | int flags, res; |
1886 | | |
1887 | | _Py_BEGIN_SUPPRESS_IPH |
1888 | | flags = fcntl(fd, F_GETFL, 0); |
1889 | | if (flags >= 0) { |
1890 | | if (blocking) |
1891 | | flags = flags & (~O_NONBLOCK); |
1892 | | else |
1893 | | flags = flags | O_NONBLOCK; |
1894 | | |
1895 | | res = fcntl(fd, F_SETFL, flags); |
1896 | | } else { |
1897 | | res = -1; |
1898 | | } |
1899 | | _Py_END_SUPPRESS_IPH |
1900 | | |
1901 | | if (res < 0) |
1902 | | goto error; |
1903 | | #endif |
1904 | 0 | return 0; |
1905 | | |
1906 | 0 | error: |
1907 | 0 | PyErr_SetFromErrno(PyExc_OSError); |
1908 | 0 | return -1; |
1909 | 0 | } |
1910 | | #endif |
1911 | | |
1912 | | |
1913 | | int |
1914 | | _Py_GetLocaleconvNumeric(struct lconv *lc, |
1915 | | PyObject **decimal_point, PyObject **thousands_sep) |
1916 | 0 | { |
1917 | 0 | assert(decimal_point != NULL); |
1918 | 0 | assert(thousands_sep != NULL); |
1919 | |
|
1920 | 0 | int change_locale = 0; |
1921 | 0 | if ((strlen(lc->decimal_point) > 1 || ((unsigned char)lc->decimal_point[0]) > 127)) { |
1922 | 0 | change_locale = 1; |
1923 | 0 | } |
1924 | 0 | if ((strlen(lc->thousands_sep) > 1 || ((unsigned char)lc->thousands_sep[0]) > 127)) { |
1925 | 0 | change_locale = 1; |
1926 | 0 | } |
1927 | | |
1928 | | /* Keep a copy of the LC_CTYPE locale */ |
1929 | 0 | char *oldloc = NULL, *loc = NULL; |
1930 | 0 | if (change_locale) { |
1931 | 0 | oldloc = setlocale(LC_CTYPE, NULL); |
1932 | 0 | if (!oldloc) { |
1933 | 0 | PyErr_SetString(PyExc_RuntimeWarning, |
1934 | 0 | "failed to get LC_CTYPE locale"); |
1935 | 0 | return -1; |
1936 | 0 | } |
1937 | | |
1938 | 0 | oldloc = _PyMem_Strdup(oldloc); |
1939 | 0 | if (!oldloc) { |
1940 | 0 | PyErr_NoMemory(); |
1941 | 0 | return -1; |
1942 | 0 | } |
1943 | | |
1944 | 0 | loc = setlocale(LC_NUMERIC, NULL); |
1945 | 0 | if (loc != NULL && strcmp(loc, oldloc) == 0) { |
1946 | 0 | loc = NULL; |
1947 | 0 | } |
1948 | |
|
1949 | 0 | if (loc != NULL) { |
1950 | | /* Only set the locale temporarily the LC_CTYPE locale |
1951 | | if LC_NUMERIC locale is different than LC_CTYPE locale and |
1952 | | decimal_point and/or thousands_sep are non-ASCII or longer than |
1953 | | 1 byte */ |
1954 | 0 | setlocale(LC_CTYPE, loc); |
1955 | 0 | } |
1956 | 0 | } |
1957 | | |
1958 | 0 | int res = -1; |
1959 | |
|
1960 | 0 | *decimal_point = PyUnicode_DecodeLocale(lc->decimal_point, NULL); |
1961 | 0 | if (*decimal_point == NULL) { |
1962 | 0 | goto done; |
1963 | 0 | } |
1964 | | |
1965 | 0 | *thousands_sep = PyUnicode_DecodeLocale(lc->thousands_sep, NULL); |
1966 | 0 | if (*thousands_sep == NULL) { |
1967 | 0 | goto done; |
1968 | 0 | } |
1969 | | |
1970 | 0 | res = 0; |
1971 | |
|
1972 | 0 | done: |
1973 | 0 | if (loc != NULL) { |
1974 | 0 | setlocale(LC_CTYPE, oldloc); |
1975 | 0 | } |
1976 | 0 | PyMem_Free(oldloc); |
1977 | 0 | return res; |
1978 | 0 | } |