Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/docutils/io.py: 34%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# $Id$
2# Author: David Goodger <goodger@python.org>
3# Copyright: This module has been placed in the public domain.
5"""
6I/O classes provide a uniform API for low-level input and output. Subclasses
7exist for a variety of input/output mechanisms.
8"""
10from __future__ import annotations
12__docformat__ = 'reStructuredText'
14import codecs
15import locale
16import os
17import re
18import sys
19import warnings
21from docutils import TransformSpec
23TYPE_CHECKING = False
24if TYPE_CHECKING:
25 from typing import Any, BinaryIO, ClassVar, Final, Literal, TextIO
27 from docutils import nodes
28 from docutils.nodes import StrPath
30# Guess the locale's preferred encoding.
31# If no valid guess can be made, _locale_encoding is set to `None`:
32#
33# TODO: check whether this is set correctly with every OS and Python version
34# or whether front-end tools need to call `locale.setlocale()`
35# before importing this module
36try:
37 # Return locale encoding also in UTF-8 mode
38 with warnings.catch_warnings():
39 warnings.simplefilter("ignore")
40 _locale_encoding: str | None = (locale.getlocale()[1]
41 or locale.getdefaultlocale()[1]
42 ).lower()
43except: # NoQA: E722 (catchall)
44 # Any problem determining the locale: use None
45 _locale_encoding = None
46try:
47 codecs.lookup(_locale_encoding)
48except (LookupError, TypeError):
49 _locale_encoding = None
52class InputError(OSError): pass
53class OutputError(OSError): pass
56def check_encoding(stream: TextIO, encoding: str) -> bool | None:
57 """Test, whether the encoding of `stream` matches `encoding`.
59 Returns
61 :None: if `encoding` or `stream.encoding` are not a valid encoding
62 argument (e.g. ``None``) or `stream.encoding is missing.
63 :True: if the encoding argument resolves to the same value as `encoding`,
64 :False: if the encodings differ.
65 """
66 try:
67 return codecs.lookup(stream.encoding) == codecs.lookup(encoding)
68 except (LookupError, AttributeError, TypeError):
69 return None
72def error_string(err: BaseException) -> str:
73 """Return string representation of Exception `err`.
74 """
75 return f'{err.__class__.__name__}: {err}'
78class Input(TransformSpec):
79 """
80 Abstract base class for input wrappers.
82 Docutils input objects must provide a `read()` method that
83 returns the source, typically as `str` instance.
85 Inheriting `TransformSpec` allows input objects to add "transforms" to
86 the "Transformer". (Since Docutils 0.19, input objects are no longer
87 required to be `TransformSpec` instances.)
88 """
90 component_type: Final = 'input'
92 default_source_path: ClassVar[str | None] = None
94 def __init__(
95 self,
96 source: str | TextIO | nodes.document | None = None,
97 source_path: StrPath | None = None,
98 encoding: str | Literal['unicode'] | None = 'utf-8',
99 error_handler: str | None = 'strict',
100 ) -> None:
101 self.encoding = encoding
102 """Text encoding for the input source."""
104 self.error_handler = error_handler
105 """Text decoding error handler."""
107 self.source = source
108 """The source of input data."""
110 self.source_path = source_path
111 """A text reference to the source."""
113 if not source_path:
114 self.source_path = self.default_source_path
116 self.successful_encoding = None
117 """The encoding that successfully decoded the source data."""
119 def __repr__(self) -> str:
120 return '%s: source=%r, source_path=%r' % (self.__class__, self.source,
121 self.source_path)
123 def read(self) -> str:
124 """Return input as `str`. Define in subclasses."""
125 raise NotImplementedError
127 def decode(self, data: str | bytes) -> str:
128 """
129 Decode `data` if required.
131 Return Unicode `str` instances unchanged (nothing to decode).
133 If `self.encoding` is None, determine encoding from data
134 or try UTF-8 and the locale's preferred encoding.
135 The client application should call ``locale.setlocale()`` at the
136 beginning of processing::
138 locale.setlocale(locale.LC_ALL, '')
140 Raise UnicodeError if unsuccessful.
142 Provisional: encoding detection will be removed in Docutils 1.0.
143 """
144 if self.encoding and self.encoding.lower() == 'unicode':
145 assert isinstance(data, str), ('input encoding is "unicode" '
146 'but `data` is no `str` instance')
147 if isinstance(data, str):
148 # nothing to decode
149 return data
150 if self.encoding:
151 # We believe the user/application when the encoding is
152 # explicitly given.
153 encoding_candidates = [self.encoding]
154 else:
155 with warnings.catch_warnings():
156 warnings.filterwarnings('ignore', category=DeprecationWarning)
157 data_encoding = self.determine_encoding_from_data(data)
158 if data_encoding:
159 # `data` declares its encoding with "magic comment" or BOM,
160 encoding_candidates = [data_encoding]
161 else:
162 # Apply heuristics if the encoding is not specified.
163 # Start with UTF-8, because that only matches
164 # data that *IS* UTF-8:
165 encoding_candidates = ['utf-8']
166 # If UTF-8 fails, fall back to the locale's preferred encoding:
167 if sys.version_info[:2] >= (3, 11):
168 fallback = locale.getencoding()
169 else:
170 fallback = locale.getpreferredencoding(do_setlocale=False)
171 if fallback and fallback.lower() != 'utf-8':
172 encoding_candidates.append(fallback)
173 if not self.encoding and encoding_candidates[0] != 'utf-8':
174 warnings.warn('Input encoding auto-detection will be removed and '
175 'the encoding values None and "" become invalid '
176 'in Docutils 1.0.', DeprecationWarning, stacklevel=2)
177 for enc in encoding_candidates:
178 try:
179 decoded = str(data, enc, self.error_handler)
180 self.successful_encoding = enc
181 return decoded
182 except (UnicodeError, LookupError) as err:
183 # keep exception instance for use outside of the "for" loop.
184 error = err
185 raise UnicodeError(
186 'Unable to decode input data. Tried the following encodings: '
187 f'{", ".join(repr(enc) for enc in encoding_candidates)}.\n'
188 f'({error_string(error)})')
190 coding_slug: ClassVar[re.Pattern[bytes]] = re.compile(
191 br'coding[:=]\s*([-\w.]+)'
192 )
193 """Encoding declaration pattern."""
195 byte_order_marks: ClassVar[tuple[tuple[bytes, str], ...]] = (
196 (codecs.BOM_UTF32_BE, 'utf-32'),
197 (codecs.BOM_UTF32_LE, 'utf-32'),
198 (codecs.BOM_UTF8, 'utf-8-sig'),
199 (codecs.BOM_UTF16_BE, 'utf-16'),
200 (codecs.BOM_UTF16_LE, 'utf-16'),
201 )
202 """Sequence of (start_bytes, encoding) tuples for encoding detection.
203 The first bytes of input data are checked against the start_bytes strings.
204 A match indicates the given encoding.
206 Internal. Will be removed in Docutils 1.0.
207 """
209 def determine_encoding_from_data(self, data: bytes) -> str | None:
210 """
211 Try to determine the encoding of `data` by looking *in* `data`.
212 Check for a byte order mark (BOM) or an encoding declaration.
214 Deprecated. Will be removed in Docutils 1.0.
215 """
216 warnings.warn('docutils.io.Input.determine_encoding_from_data() '
217 'will be removed in Docutils 1.0.',
218 DeprecationWarning, stacklevel=2)
219 # check for a byte order mark:
220 for start_bytes, encoding in self.byte_order_marks:
221 if data.startswith(start_bytes):
222 return encoding
223 # check for an encoding declaration pattern in first 2 lines of file:
224 for line in data.splitlines()[:2]:
225 match = self.coding_slug.search(line)
226 if match:
227 return match.group(1).decode('ascii')
228 return None
230 def isatty(self) -> bool:
231 """Return True, if the input source is connected to a TTY device."""
232 try:
233 return self.source.isatty()
234 except AttributeError:
235 return False
238class Output(TransformSpec):
239 """
240 Abstract base class for output wrappers.
242 Docutils output objects must provide a `write()` method that
243 expects and handles one argument (the output).
245 Inheriting `TransformSpec` allows output objects to add "transforms" to
246 the "Transformer". (Since Docutils 0.19, output objects are no longer
247 required to be `TransformSpec` instances.)
248 """
250 component_type: Final = 'output'
252 default_destination_path: ClassVar[str | None] = None
254 def __init__(
255 self,
256 destination: TextIO | str | bytes | None = None,
257 destination_path: StrPath | None = None,
258 encoding: str | None = None,
259 error_handler: str | None = 'strict',
260 ) -> None:
261 self.encoding: str | None = encoding
262 """Text encoding for the output destination."""
264 self.error_handler: str = error_handler or 'strict'
265 """Text encoding error handler."""
267 self.destination: TextIO | str | bytes | None = destination
268 """The destination for output data."""
270 self.destination_path: StrPath | None = destination_path
271 """A text reference to the destination."""
273 if not destination_path:
274 self.destination_path = self.default_destination_path
276 def __repr__(self) -> str:
277 return ('%s: destination=%r, destination_path=%r'
278 % (self.__class__, self.destination, self.destination_path))
280 def write(self, data: str | bytes) -> str | bytes | None:
281 """Write `data`. Define in subclasses."""
282 raise NotImplementedError
284 def encode(self, data: str | bytes) -> str | bytes:
285 """
286 Encode and return `data`.
288 If `data` is a `bytes` instance, it is returned unchanged.
289 Otherwise it is encoded with `self.encoding`.
291 Provisional: If `self.encoding` is set to the pseudo encoding name
292 "unicode", `data` must be a `str` instance and is returned unchanged.
293 """
294 if self.encoding and self.encoding.lower() == 'unicode':
295 assert isinstance(data, str), ('output encoding is "unicode" '
296 'but `data` is no `str` instance')
297 return data
298 if not isinstance(data, str):
299 # Non-unicode (e.g. bytes) output.
300 return data
301 else:
302 return data.encode(self.encoding, self.error_handler)
305class ErrorOutput:
306 """
307 Wrapper class for file-like error streams with
308 failsafe de- and encoding of `str`, `bytes`, and `Exception` instances.
309 """
311 def __init__(
312 self,
313 destination: TextIO | BinaryIO | str | Literal[False] | None = None,
314 encoding: str | None = None,
315 encoding_errors: str = 'backslashreplace',
316 decoding_errors: str = 'replace',
317 ) -> None:
318 """
319 :Parameters:
320 - `destination`: a file-like object,
321 a string (path to a file),
322 `None` (write to `sys.stderr`, default), or
323 evaluating to `False` (write() requests are ignored).
324 - `encoding`: `destination` text encoding. Guessed if None.
325 - `encoding_errors`: how to treat encoding errors.
326 """
327 if destination is None:
328 destination = sys.stderr
329 elif not destination:
330 destination = False
331 # if `destination` is a file name, open it
332 elif isinstance(destination, str):
333 destination = open(destination, 'w')
335 self.destination: TextIO | BinaryIO | Literal[False] = destination
336 """Where warning output is sent."""
338 self.encoding: str = (
339 encoding
340 or getattr(destination, 'encoding', None)
341 or _locale_encoding
342 or 'ascii'
343 )
344 """The output character encoding."""
346 self.encoding_errors: str = encoding_errors
347 """Encoding error handler."""
349 self.decoding_errors: str = decoding_errors
350 """Decoding error handler."""
352 def write(self, data: str | bytes | Exception) -> None:
353 """
354 Write `data` to self.destination. Ignore, if self.destination is False.
356 `data` can be a `bytes`, `str`, or `Exception` instance.
357 """
358 if not self.destination:
359 return
360 if isinstance(data, Exception):
361 data = str(data)
362 # The destination is either opened in text or binary mode.
363 # If data has the wrong type, try to convert it.
364 try:
365 self.destination.write(data)
366 except UnicodeEncodeError:
367 # Encoding data from string to bytes failed with the
368 # destination's encoding and error handler.
369 # Try again with our own encoding and error handler.
370 binary = data.encode(self.encoding, self.encoding_errors)
371 self.destination.write(binary)
372 except TypeError:
373 if isinstance(data, str): # destination may expect bytes
374 binary = data.encode(self.encoding, self.encoding_errors)
375 self.destination.write(binary)
376 elif self.destination in (sys.stderr, sys.stdout):
377 # write bytes to raw stream
378 self.destination.buffer.write(data)
379 else:
380 # destination in text mode, write str
381 string = data.decode(self.encoding, self.decoding_errors)
382 self.destination.write(string)
384 def close(self) -> None:
385 """
386 Close the error-output stream.
388 Ignored if the destination is` sys.stderr` or `sys.stdout` or has no
389 close() method.
390 """
391 if self.destination in (sys.stdout, sys.stderr):
392 return
393 try:
394 self.destination.close()
395 except AttributeError:
396 pass
398 def isatty(self) -> bool:
399 """Return True, if the destination is connected to a TTY device."""
400 try:
401 return self.destination.isatty()
402 except AttributeError:
403 return False
406class FileInput(Input):
408 """
409 Input for single, simple file-like objects.
410 """
411 def __init__(
412 self,
413 source: TextIO | None = None,
414 source_path: StrPath | None = None,
415 encoding: str | Literal['unicode'] | None = 'utf-8',
416 error_handler: str | None = 'strict',
417 autoclose: bool = True,
418 mode: Literal['r', 'rb', 'br'] = 'r'
419 ) -> None:
420 """
421 :Parameters:
422 - `source`: either a file-like object (which is read directly), or
423 `None` (which implies `sys.stdin` if no `source_path` given).
424 - `source_path`: a path to a file, which is opened for reading.
425 - `encoding`: the expected text encoding of the input file.
426 - `error_handler`: the encoding error handler to use.
427 - `autoclose`: close automatically after read (except when
428 `sys.stdin` is the source).
429 - `mode`: how the file is to be opened (see standard function
430 `open`). The default is read only ('r').
431 """
432 super().__init__(source, source_path, encoding, error_handler)
433 self.autoclose = autoclose
434 self._stderr = ErrorOutput()
436 if source is None:
437 if source_path:
438 try:
439 self.source = open(source_path, mode,
440 encoding=self.encoding,
441 errors=self.error_handler)
442 except OSError as error:
443 raise InputError(error.errno, error.strerror, source_path)
444 else:
445 self.source = sys.stdin
446 elif check_encoding(self.source, self.encoding) is False:
447 # TODO: re-open, warn or raise error?
448 raise UnicodeError('Encoding clash: encoding given is "%s" '
449 'but source is opened with encoding "%s".' %
450 (self.encoding, self.source.encoding))
451 if not source_path:
452 try:
453 self.source_path = self.source.name
454 except AttributeError:
455 pass
457 def read(self) -> str:
458 """
459 Read and decode a single file, return as `str`.
460 """
461 try:
462 if not self.encoding and hasattr(self.source, 'buffer'):
463 # read as binary data
464 data = self.source.buffer.read()
465 # decode with heuristics
466 data = self.decode(data)
467 # normalize newlines
468 data = '\n'.join(data.splitlines()+[''])
469 else:
470 data = self.source.read()
471 finally:
472 if self.autoclose:
473 self.close()
474 return data
476 def readlines(self) -> list[str]:
477 """
478 Return lines of a single file as list of strings.
479 """
480 return self.read().splitlines(True)
482 def close(self) -> None:
483 if self.source is not sys.stdin:
484 self.source.close()
487class FileOutput(Output):
489 """Output for single, simple file-like objects."""
491 default_destination_path: Final = '<file>'
493 mode: Literal['w', 'a', 'x', 'wb', 'ab', 'xb', 'bw', 'ba', 'bx'] = 'w'
494 """The mode argument for `open()`."""
495 # 'wb' for binary (e.g. OpenOffice) files (see also `BinaryFileOutput`).
496 # (Do not use binary mode ('wb') for text files, as this prevents the
497 # conversion of newlines to the system specific default.)
499 def __init__(self,
500 destination: TextIO | None = None,
501 destination_path: StrPath | None = None,
502 encoding: str | None = None,
503 error_handler: str | None = 'strict',
504 autoclose: bool = True,
505 handle_io_errors: None = None,
506 mode=None,
507 ) -> None:
508 """
509 :Parameters:
510 - `destination`: either a file-like object (which is written
511 directly) or `None` (which implies `sys.stdout` if no
512 `destination_path` given).
513 - `destination_path`: a path to a file, which is opened and then
514 written.
515 - `encoding`: the text encoding of the output file.
516 - `error_handler`: the encoding error handler to use.
517 - `autoclose`: close automatically after write (except when
518 `sys.stdout` or `sys.stderr` is the destination).
519 - `handle_io_errors`: ignored, deprecated, will be removed.
520 - `mode`: how the file is to be opened (see standard function
521 `open`). The default is 'w', providing universal newline
522 support for text files.
523 """
524 super().__init__(
525 destination, destination_path, encoding, error_handler)
526 self.opened = True
527 self.autoclose = autoclose
528 if handle_io_errors is not None:
529 warnings.warn('io.FileOutput: init argument "handle_io_errors" '
530 'is ignored and will be removed in '
531 'Docutils 2.0.', DeprecationWarning, stacklevel=2)
532 if mode is not None:
533 self.mode = mode
534 self._stderr = ErrorOutput()
535 if destination is None:
536 if destination_path:
537 self.opened = False
538 else:
539 self.destination = sys.stdout
540 elif ( # destination is file-type object -> check mode:
541 mode and hasattr(self.destination, 'mode')
542 and mode != self.destination.mode):
543 print('Warning: Destination mode "%s" differs from specified '
544 'mode "%s"' % (self.destination.mode, mode),
545 file=self._stderr)
546 if not destination_path:
547 try:
548 self.destination_path = self.destination.name
549 except AttributeError:
550 pass
552 def open(self) -> None:
553 # Specify encoding
554 if 'b' not in self.mode:
555 kwargs = {'encoding': self.encoding,
556 'errors': self.error_handler}
557 else:
558 kwargs = {}
559 try:
560 self.destination = open(self.destination_path, self.mode, **kwargs)
561 except OSError as error:
562 raise OutputError(error.errno, error.strerror,
563 self.destination_path)
564 self.opened = True
566 def write(self, data: str | bytes) -> str | bytes:
567 """Write `data` to a single file, also return it.
569 `data` can be a `str` or `bytes` instance.
570 If writing `bytes` fails, an attempt is made to write to
571 the low-level interface ``self.destination.buffer``.
573 If `data` is a `str` instance and `self.encoding` and
574 `self.destination.encoding` are set to different values, `data`
575 is encoded to a `bytes` instance using `self.encoding`.
577 Provisional: future versions may raise an error if `self.encoding`
578 and `self.destination.encoding` are set to different values.
579 """
580 if not self.opened:
581 self.open()
582 if (isinstance(data, str)
583 and check_encoding(self.destination, self.encoding) is False):
584 if os.linesep != '\n':
585 data = data.replace('\n', os.linesep) # fix endings
586 data = self.encode(data)
588 try:
589 self.destination.write(data)
590 except TypeError as err:
591 if isinstance(data, bytes):
592 try:
593 self.destination.buffer.write(data)
594 except AttributeError:
595 if check_encoding(self.destination,
596 self.encoding) is False:
597 raise ValueError(
598 f'Encoding of {self.destination_path} '
599 f'({self.destination.encoding}) differs \n'
600 f' from specified encoding ({self.encoding})')
601 else:
602 raise err
603 except (UnicodeError, LookupError) as err:
604 raise UnicodeError(
605 'Unable to encode output data. output-encoding is: '
606 f'{self.encoding}.\n({error_string(err)})')
607 finally:
608 if self.autoclose:
609 self.close()
610 return data
612 def close(self) -> None:
613 if self.destination not in (sys.stdout, sys.stderr):
614 self.destination.close()
615 self.opened = False
618class BinaryFileOutput(FileOutput):
619 """
620 A version of docutils.io.FileOutput which writes to a binary file.
622 Deprecated. Use `FileOutput` (works with `bytes` since Docutils 0.20).
623 Will be removed in Docutils 0.24.
624 """
625 # Used by core.publish_cmdline_to_binary() which is also deprecated.
626 mode = 'wb'
628 def __init__(self, *args: Any, **kwargs: Any) -> None:
629 warnings.warn('"BinaryFileOutput" is obsoleted by "FileOutput"'
630 ' and will be removed in Docutils 0.24.',
631 DeprecationWarning, stacklevel=2)
632 super().__init__(*args, **kwargs)
635class StringInput(Input):
636 """Input from a `str` or `bytes` instance."""
638 source: str | bytes
640 default_source_path: Final = '<string>'
642 def read(self) -> str:
643 """Return the source as `str` instance.
645 Decode, if required (see `Input.decode`).
646 """
647 return self.decode(self.source)
650class StringOutput(Output):
651 """Output to a `bytes` or `str` instance.
653 Provisional.
654 """
656 destination: str | bytes
658 default_destination_path: Final = '<string>'
660 def write(self, data: str | bytes) -> str | bytes:
661 """Store `data` in `self.destination`, and return it.
663 If `self.encoding` is set to the pseudo encoding name "unicode",
664 `data` must be a `str` instance and is stored/returned unchanged
665 (cf. `Output.encode`).
667 Otherwise, `data` can be a `bytes` or `str` instance and is
668 stored/returned as a `bytes` instance
669 (`str` data is encoded with `self.encode()`).
671 Attention: the `output_encoding`_ setting may affect the content
672 of the output (e.g. an encoding declaration in HTML or XML or the
673 representation of characters as LaTeX macro vs. literal character).
674 """
675 self.destination = self.encode(data)
676 return self.destination
679class NullInput(Input):
681 """Degenerate input: read nothing."""
683 source: None
685 default_source_path: Final = 'null input'
687 def read(self) -> str:
688 """Return an empty string."""
689 return ''
692class NullOutput(Output):
694 """Degenerate output: write nothing."""
696 destination: None
698 default_destination_path: Final = 'null output'
700 def write(self, data: str | bytes) -> None:
701 """Do nothing, return None."""
704class DocTreeInput(Input):
706 """
707 Adapter for document tree input.
709 The document tree must be passed in the ``source`` parameter.
710 """
712 source: nodes.document
714 default_source_path: Final = 'doctree input'
716 def read(self) -> nodes.document:
717 """Return the document tree."""
718 return self.source