Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/docutils/io.py: 34%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# $Id$
2# Author: David Goodger <goodger@python.org>
3# Copyright: This module has been placed in the public domain.
5"""
6I/O classes provide a uniform API for low-level input and output. Subclasses
7exist for a variety of input/output mechanisms.
8"""
10from __future__ import annotations
12__docformat__ = 'reStructuredText'
14import codecs
15import locale
16import os
17import re
18import sys
19import warnings
21from docutils import TransformSpec
23TYPE_CHECKING = False
24if TYPE_CHECKING:
25 from typing import Any, BinaryIO, ClassVar, Final, Literal, TextIO
27 from docutils import nodes
28 from docutils.nodes import StrPath
30# Guess the locale's preferred encoding.
31# If no valid guess can be made, _locale_encoding is set to `None`:
32#
33# TODO: check whether this is set correctly with every OS and Python version
34# or whether front-end tools need to call `locale.setlocale()`
35# before importing this module
36try:
37 # Return locale encoding also in UTF-8 mode
38 with warnings.catch_warnings():
39 warnings.simplefilter("ignore")
40 _locale_encoding: str | None = (locale.getlocale()[1]
41 or locale.getdefaultlocale()[1]
42 ).lower()
43except: # NoQA: E722 (catchall)
44 # Any problem determining the locale: use None
45 _locale_encoding = None
46try:
47 codecs.lookup(_locale_encoding)
48except (LookupError, TypeError):
49 _locale_encoding = None
52class InputError(OSError): pass
53class OutputError(OSError): pass
56def check_encoding(stream: TextIO, encoding: str) -> bool | None:
57 """Test, whether the encoding of `stream` matches `encoding`.
59 Returns
61 :None: if `encoding` or `stream.encoding` are not a valid encoding
62 argument (e.g. ``None``) or `stream.encoding is missing.
63 :True: if the encoding argument resolves to the same value as `encoding`,
64 :False: if the encodings differ.
65 """
66 try:
67 return codecs.lookup(stream.encoding) == codecs.lookup(encoding)
68 except (LookupError, AttributeError, TypeError):
69 return None
72def error_string(err: BaseException) -> str:
73 """Return string representation of Exception `err`.
74 """
75 return f'{err.__class__.__name__}: {err}'
78class Input(TransformSpec):
79 """
80 Abstract base class for input wrappers.
82 Docutils input objects must provide a `read()` method that
83 returns the source, typically as `str` instance.
85 Inheriting `TransformSpec` allows input objects to add "transforms" to
86 the "Transformer". (Since Docutils 0.19, input objects are no longer
87 required to be `TransformSpec` instances.)
88 """
90 component_type: Final = 'input'
92 default_source_path: ClassVar[str | None] = None
94 def __init__(
95 self,
96 source: str | TextIO | nodes.document | None = None,
97 source_path: StrPath | None = None,
98 encoding: str | Literal['unicode'] | None = 'utf-8',
99 error_handler: str | None = 'strict',
100 ) -> None:
101 self.encoding = encoding
102 """Text encoding for the input source."""
104 self.error_handler = error_handler
105 """Text decoding error handler."""
107 self.source = source
108 """The source of input data."""
110 self.source_path = source_path
111 """A text reference to the source."""
113 if not source_path:
114 self.source_path = self.default_source_path
116 self.successful_encoding = None
117 """The encoding that successfully decoded the source data."""
119 def __repr__(self) -> str:
120 return '%s: source=%r, source_path=%r' % (self.__class__, self.source,
121 self.source_path)
123 def read(self) -> str:
124 """Return input as `str`. Define in subclasses."""
125 raise NotImplementedError
127 def decode(self, data: str | bytes) -> str:
128 """
129 Decode `data` if required.
131 Return Unicode `str` instances unchanged (nothing to decode).
133 If `self.encoding` is None, determine encoding from data
134 or try UTF-8 and the locale's preferred encoding.
135 The client application should call ``locale.setlocale()`` at the
136 beginning of processing::
138 locale.setlocale(locale.LC_ALL, '')
140 Raise UnicodeError if unsuccessful.
142 Provisional: encoding detection will be removed in Docutils 1.0.
143 """
144 if isinstance(data, str):
145 return data # nothing to decode
146 if self.encoding:
147 # We believe the user/application when the encoding is
148 # explicitly given.
149 assert self.encoding.lower() != 'unicode', (
150 'input encoding is "unicode" but `data` is no `str` instance')
151 encoding_candidates = [self.encoding]
152 else:
153 with warnings.catch_warnings():
154 warnings.filterwarnings('ignore', category=DeprecationWarning)
155 data_encoding = self.determine_encoding_from_data(data)
156 if data_encoding:
157 # `data` declares its encoding with "magic comment" or BOM,
158 encoding_candidates = [data_encoding]
159 else:
160 # Apply heuristics if the encoding is not specified.
161 # Start with UTF-8, because that only matches
162 # data that *IS* UTF-8:
163 encoding_candidates = ['utf-8']
164 # If UTF-8 fails, fall back to the locale's preferred encoding:
165 if sys.version_info[:2] >= (3, 11):
166 fallback = locale.getencoding()
167 else:
168 fallback = locale.getpreferredencoding(do_setlocale=False)
169 if fallback and fallback.lower() != 'utf-8':
170 encoding_candidates.append(fallback)
171 if not self.encoding and encoding_candidates[0] != 'utf-8':
172 warnings.warn('Input encoding auto-detection will be removed and '
173 'the encoding values None and "" become invalid '
174 'in Docutils 1.0.', DeprecationWarning, stacklevel=2)
175 for enc in encoding_candidates:
176 try:
177 decoded = str(data, enc, self.error_handler)
178 self.successful_encoding = enc
179 return decoded
180 except (UnicodeError, LookupError) as err:
181 # keep exception instance for use outside of the "for" loop.
182 error = err
183 raise UnicodeError(
184 'Unable to decode input data. Tried the following encodings: '
185 f'{", ".join(repr(enc) for enc in encoding_candidates)}.\n'
186 f'({error_string(error)})')
188 coding_slug: ClassVar[re.Pattern[bytes]] = re.compile(
189 br'coding[:=]\s*([-\w.]+)'
190 )
191 """Encoding declaration pattern."""
193 byte_order_marks: ClassVar[tuple[tuple[bytes, str], ...]] = (
194 (codecs.BOM_UTF32_BE, 'utf-32'),
195 (codecs.BOM_UTF32_LE, 'utf-32'),
196 (codecs.BOM_UTF8, 'utf-8-sig'),
197 (codecs.BOM_UTF16_BE, 'utf-16'),
198 (codecs.BOM_UTF16_LE, 'utf-16'),
199 )
200 """Sequence of (start_bytes, encoding) tuples for encoding detection.
201 The first bytes of input data are checked against the start_bytes strings.
202 A match indicates the given encoding.
204 Internal. Will be removed in Docutils 1.0.
205 """
207 def determine_encoding_from_data(self, data: bytes) -> str | None:
208 """
209 Try to determine the encoding of `data` by looking *in* `data`.
210 Check for a byte order mark (BOM) or an encoding declaration.
212 Deprecated. Will be removed in Docutils 1.0.
213 """
214 warnings.warn('docutils.io.Input.determine_encoding_from_data() '
215 'will be removed in Docutils 1.0.',
216 DeprecationWarning, stacklevel=2)
217 # check for a byte order mark:
218 for start_bytes, encoding in self.byte_order_marks:
219 if data.startswith(start_bytes):
220 return encoding
221 # check for an encoding declaration pattern in first 2 lines of file:
222 for line in data.splitlines()[:2]:
223 match = self.coding_slug.search(line)
224 if match:
225 return match.group(1).decode('ascii')
226 return None
228 def isatty(self) -> bool:
229 """Return True, if the input source is connected to a TTY device."""
230 try:
231 return self.source.isatty()
232 except AttributeError:
233 return False
236class Output(TransformSpec):
237 """
238 Abstract base class for output wrappers.
240 Docutils output objects must provide a `write()` method that
241 expects and handles one argument (the output).
243 Inheriting `TransformSpec` allows output objects to add "transforms" to
244 the "Transformer". (Since Docutils 0.19, output objects are no longer
245 required to be `TransformSpec` instances.)
246 """
248 component_type: Final = 'output'
250 default_destination_path: ClassVar[str | None] = None
252 def __init__(
253 self,
254 destination: TextIO | str | bytes | None = None,
255 destination_path: StrPath | None = None,
256 encoding: str | None = None,
257 error_handler: str | None = 'strict',
258 ) -> None:
259 self.encoding: str | None = encoding
260 """Text encoding for the output destination."""
262 self.error_handler: str = error_handler or 'strict'
263 """Text encoding error handler."""
265 self.destination: TextIO | str | bytes | None = destination
266 """The destination for output data."""
268 self.destination_path: StrPath | None = destination_path
269 """A text reference to the destination."""
271 if not destination_path:
272 self.destination_path = self.default_destination_path
274 def __repr__(self) -> str:
275 return ('%s: destination=%r, destination_path=%r'
276 % (self.__class__, self.destination, self.destination_path))
278 def write(self, data: str | bytes) -> str | bytes | None:
279 """Write `data`. Define in subclasses."""
280 raise NotImplementedError
282 def encode(self, data: str | bytes) -> str | bytes:
283 """
284 Encode and return `data`.
286 If `data` is a `bytes` instance, it is returned unchanged.
287 Otherwise it is encoded with `self.encoding`.
289 Provisional: If `self.encoding` is set to the pseudo encoding name
290 "unicode", `data` must be a `str` instance and is returned unchanged.
291 """
292 if self.encoding and self.encoding.lower() == 'unicode':
293 assert isinstance(data, str), ('output encoding is "unicode" '
294 'but `data` is no `str` instance')
295 return data
296 if not isinstance(data, str):
297 # Non-unicode (e.g. bytes) output.
298 return data
299 else:
300 return data.encode(self.encoding, self.error_handler)
303class ErrorOutput:
304 """
305 Wrapper class for file-like error streams with
306 failsafe de- and encoding of `str`, `bytes`, and `Exception` instances.
307 """
309 def __init__(
310 self,
311 destination: TextIO | BinaryIO | str | Literal[False] | None = None,
312 encoding: str | None = None,
313 encoding_errors: str = 'backslashreplace',
314 decoding_errors: str = 'replace',
315 ) -> None:
316 """
317 :Parameters:
318 - `destination`: a file-like object,
319 a string (path to a file),
320 `None` (write to `sys.stderr`, default), or
321 evaluating to `False` (write() requests are ignored).
322 - `encoding`: `destination` text encoding. Guessed if None.
323 - `encoding_errors`: how to treat encoding errors.
324 """
325 if destination is None:
326 destination = sys.stderr
327 elif not destination:
328 destination = False
329 # if `destination` is a file name, open it
330 elif isinstance(destination, str):
331 destination = open(destination, 'w')
333 self.destination: TextIO | BinaryIO | Literal[False] = destination
334 """Where warning output is sent."""
336 self.encoding: str = (
337 encoding
338 or getattr(destination, 'encoding', None)
339 or _locale_encoding
340 or 'ascii'
341 )
342 """The output character encoding."""
344 self.encoding_errors: str = encoding_errors
345 """Encoding error handler."""
347 self.decoding_errors: str = decoding_errors
348 """Decoding error handler."""
350 def write(self, data: str | bytes | Exception) -> None:
351 """
352 Write `data` to self.destination. Ignore, if self.destination is False.
354 `data` can be a `bytes`, `str`, or `Exception` instance.
355 """
356 if not self.destination:
357 return
358 if isinstance(data, Exception):
359 data = str(data)
360 # The destination is either opened in text or binary mode.
361 # If data has the wrong type, try to convert it.
362 try:
363 self.destination.write(data)
364 except UnicodeEncodeError:
365 # Encoding data from string to bytes failed with the
366 # destination's encoding and error handler.
367 # Try again with our own encoding and error handler.
368 binary = data.encode(self.encoding, self.encoding_errors)
369 self.destination.write(binary)
370 except TypeError:
371 if isinstance(data, str): # destination may expect bytes
372 binary = data.encode(self.encoding, self.encoding_errors)
373 self.destination.write(binary)
374 elif self.destination in (sys.stderr, sys.stdout):
375 # write bytes to raw stream
376 self.destination.buffer.write(data)
377 else:
378 # destination in text mode, write str
379 string = data.decode(self.encoding, self.decoding_errors)
380 self.destination.write(string)
382 def close(self) -> None:
383 """
384 Close the error-output stream.
386 Ignored if the destination is` sys.stderr` or `sys.stdout` or has no
387 close() method.
388 """
389 if self.destination in (sys.stdout, sys.stderr):
390 return
391 try:
392 self.destination.close()
393 except AttributeError:
394 pass
396 def isatty(self) -> bool:
397 """Return True, if the destination is connected to a TTY device."""
398 try:
399 return self.destination.isatty()
400 except AttributeError:
401 return False
404class FileInput(Input):
406 """
407 Input for single, simple file-like objects.
408 """
409 def __init__(
410 self,
411 source: TextIO | None = None,
412 source_path: StrPath | None = None,
413 encoding: str | Literal['unicode'] | None = 'utf-8',
414 error_handler: str | None = 'strict',
415 autoclose: bool = True,
416 mode: Literal['r', 'rb', 'br'] = 'r'
417 ) -> None:
418 """
419 :Parameters:
420 - `source`: either a file-like object (with `read()` and `close()`
421 methods) or None (use source indicated by `source_path`).
422 - `source_path`: a path to a file (which is opened for reading
423 if `source` is None) or `None` (implies `sys.stdin`).
424 - `encoding`: the text encoding of the input file.
425 - `error_handler`: the encoding error handler to use.
426 - `autoclose`: close automatically after read (except when
427 the source is `sys.stdin`).
428 - `mode`: how the file is to be opened. Default is read only ('r').
429 """
430 super().__init__(source, source_path, encoding, error_handler)
431 self.autoclose = autoclose
432 self._stderr = ErrorOutput()
434 if source is None:
435 if source_path:
436 try:
437 self.source = open(source_path, mode,
438 encoding=self.encoding,
439 errors=self.error_handler)
440 except OSError as error:
441 raise InputError(error.errno, error.strerror, source_path)
442 else:
443 self.source = sys.stdin
444 elif check_encoding(self.source, self.encoding) is False:
445 # TODO: re-open, warn or raise error?
446 raise UnicodeError('Encoding clash: encoding given is "%s" '
447 'but source is opened with encoding "%s".' %
448 (self.encoding, self.source.encoding))
449 if not source_path:
450 try:
451 self.source_path = self.source.name
452 except AttributeError:
453 pass
455 def read(self) -> str:
456 """
457 Read and decode a single file, return as `str`.
458 """
459 try:
460 if not self.encoding and hasattr(self.source, 'buffer'):
461 # read as binary data
462 data = self.source.buffer.read()
463 # decode with heuristics
464 data = self.decode(data)
465 # normalize newlines
466 data = '\n'.join(data.splitlines()+[''])
467 else:
468 data = self.decode(self.source.read())
469 finally:
470 if self.autoclose:
471 self.close()
472 return data
474 def readlines(self) -> list[str]:
475 """
476 Return lines of a single file as list of strings.
477 """
478 return self.read().splitlines(True)
480 def close(self) -> None:
481 if self.source is not sys.stdin:
482 self.source.close()
485class FileOutput(Output):
487 """Output for single, simple file-like objects."""
489 default_destination_path: Final = '<file>'
491 mode: Literal['w', 'a', 'x', 'wb', 'ab', 'xb', 'bw', 'ba', 'bx'] = 'w'
492 """The mode argument for `open()`."""
493 # 'wb' for binary (e.g. OpenOffice) files (see also `BinaryFileOutput`).
494 # (Do not use binary mode ('wb') for text files, as this prevents the
495 # conversion of newlines to the system specific default.)
497 def __init__(self,
498 destination: TextIO | None = None,
499 destination_path: StrPath | None = None,
500 encoding: str | None = None,
501 error_handler: str | None = 'strict',
502 autoclose: bool = True,
503 handle_io_errors: None = None,
504 mode=None,
505 ) -> None:
506 """
507 :Parameters:
508 - `destination`: either a file-like object (which is written
509 directly) or `None` (which implies `sys.stdout` if no
510 `destination_path` given).
511 - `destination_path`: a path to a file, which is opened and then
512 written.
513 - `encoding`: the text encoding of the output file.
514 - `error_handler`: the encoding error handler to use.
515 - `autoclose`: close automatically after write (except when
516 `sys.stdout` or `sys.stderr` is the destination).
517 - `handle_io_errors`: ignored, deprecated, will be removed.
518 - `mode`: how the file is to be opened (see standard function
519 `open`). The default is 'w', providing universal newline
520 support for text files.
521 """
522 super().__init__(
523 destination, destination_path, encoding, error_handler)
524 self.opened = True
525 self.autoclose = autoclose
526 if handle_io_errors is not None:
527 warnings.warn('io.FileOutput: init argument "handle_io_errors" '
528 'is ignored and will be removed in '
529 'Docutils 2.0.', DeprecationWarning, stacklevel=2)
530 if mode is not None:
531 self.mode = mode
532 self._stderr = ErrorOutput()
533 if destination is None:
534 if destination_path:
535 self.opened = False
536 else:
537 self.destination = sys.stdout
538 elif ( # destination is file-type object -> check mode:
539 mode and hasattr(self.destination, 'mode')
540 and mode != self.destination.mode):
541 print('Warning: Destination mode "%s" differs from specified '
542 'mode "%s"' % (self.destination.mode, mode),
543 file=self._stderr)
544 if not destination_path:
545 try:
546 self.destination_path = self.destination.name
547 except AttributeError:
548 pass
550 def open(self) -> None:
551 # Specify encoding
552 if 'b' not in self.mode:
553 kwargs = {'encoding': self.encoding,
554 'errors': self.error_handler}
555 else:
556 kwargs = {}
557 try:
558 self.destination = open(self.destination_path, self.mode, **kwargs)
559 except OSError as error:
560 raise OutputError(error.errno, error.strerror,
561 self.destination_path)
562 self.opened = True
564 def write(self, data: str | bytes) -> str | bytes:
565 """Write `data` to a single file, also return it.
567 `data` can be a `str` or `bytes` instance.
568 If writing `bytes` fails, an attempt is made to write to
569 the low-level interface ``self.destination.buffer``.
571 If `data` is a `str` instance and `self.encoding` and
572 `self.destination.encoding` are set to different values, `data`
573 is encoded to a `bytes` instance using `self.encoding`.
575 Provisional: future versions may raise an error if `self.encoding`
576 and `self.destination.encoding` are set to different values.
577 """
578 if not self.opened:
579 self.open()
580 if (isinstance(data, str)
581 and check_encoding(self.destination, self.encoding) is False):
582 if os.linesep != '\n':
583 data = data.replace('\n', os.linesep) # fix endings
584 data = self.encode(data)
586 try:
587 self.destination.write(data)
588 except TypeError as err:
589 if isinstance(data, bytes):
590 try:
591 self.destination.buffer.write(data)
592 except AttributeError:
593 if check_encoding(self.destination,
594 self.encoding) is False:
595 raise ValueError(
596 f'Encoding of {self.destination_path} '
597 f'({self.destination.encoding}) differs \n'
598 f' from specified encoding ({self.encoding})')
599 else:
600 raise err
601 except (UnicodeError, LookupError) as err:
602 raise UnicodeError(
603 'Unable to encode output data. output-encoding is: '
604 f'{self.encoding}.\n({error_string(err)})')
605 finally:
606 if self.autoclose:
607 self.close()
608 return data
610 def close(self) -> None:
611 if self.destination not in (sys.stdout, sys.stderr):
612 self.destination.close()
613 self.opened = False
616class BinaryFileOutput(FileOutput):
617 """
618 A version of docutils.io.FileOutput which writes to a binary file.
620 Deprecated. Use `FileOutput` (works with `bytes` since Docutils 0.20).
621 Will be removed in Docutils 0.24.
622 """
623 # Used by core.publish_cmdline_to_binary() which is also deprecated.
624 mode = 'wb'
626 def __init__(self, *args: Any, **kwargs: Any) -> None:
627 warnings.warn('"BinaryFileOutput" is obsoleted by "FileOutput"'
628 ' and will be removed in Docutils 0.24.',
629 DeprecationWarning, stacklevel=2)
630 super().__init__(*args, **kwargs)
633class StringInput(Input):
634 """Input from a `str` or `bytes` instance."""
636 source: str | bytes
638 default_source_path: Final = '<string>'
640 def read(self) -> str:
641 """Return the source as `str` instance.
643 Decode, if required (see `Input.decode`).
644 """
645 return self.decode(self.source)
648class StringOutput(Output):
649 """Output to a `bytes` or `str` instance.
651 Provisional.
652 """
654 destination: str | bytes
656 default_destination_path: Final = '<string>'
658 def write(self, data: str | bytes) -> str | bytes:
659 """Store `data` in `self.destination`, and return it.
661 If `self.encoding` is set to the pseudo encoding name "unicode",
662 `data` must be a `str` instance and is stored/returned unchanged
663 (cf. `Output.encode`).
665 Otherwise, `data` can be a `bytes` or `str` instance and is
666 stored/returned as a `bytes` instance
667 (`str` data is encoded with `self.encode()`).
669 Attention: the `output_encoding`_ setting may affect the content
670 of the output (e.g. an encoding declaration in HTML or XML or the
671 representation of characters as LaTeX macro vs. literal character).
672 """
673 self.destination = self.encode(data)
674 return self.destination
677class NullInput(Input):
679 """Degenerate input: read nothing."""
681 source: None
683 default_source_path: Final = 'null input'
685 def read(self) -> str:
686 """Return an empty string."""
687 return ''
690class NullOutput(Output):
692 """Degenerate output: write nothing."""
694 destination: None
696 default_destination_path: Final = 'null output'
698 def write(self, data: str | bytes) -> None:
699 """Do nothing, return None."""
702class DocTreeInput(Input):
704 """
705 Adapter for document tree input.
707 The document tree must be passed in the ``source`` parameter.
708 """
710 source: nodes.document
712 default_source_path: Final = 'doctree input'
714 def read(self) -> nodes.document:
715 """Return the document tree."""
716 return self.source