Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/docutils/io.py: 36%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# $Id: io.py 10356 2026-06-12 21:49:56Z milde $
2# Author: David Goodger <goodger@python.org>
3# Copyright: This module has been placed in the public domain.
5"""
6I/O classes provide a uniform API for low-level input and output. Subclasses
7exist for a variety of input/output mechanisms.
8"""
10from __future__ import annotations
12__docformat__ = 'reStructuredText'
14import codecs
15import locale
16import os
17import sys
18import warnings
20from docutils import TransformSpec
22TYPE_CHECKING = False
23if TYPE_CHECKING:
24 from typing import BinaryIO, ClassVar, Final, Literal, TextIO
26 from docutils import nodes
27 from docutils.nodes import StrPath
29# Guess the locale's preferred encoding.
30# If no valid guess can be made, _locale_encoding is set to `None`:
31#
32# TODO: check whether this is set correctly with every OS and Python version
33# or whether front-end tools need to call `locale.setlocale()`
34# before importing this module
35try:
36 # Return locale encoding also in UTF-8 mode
37 with warnings.catch_warnings():
38 warnings.simplefilter("ignore")
39 _locale_encoding: str | None = (locale.getlocale()[1]
40 or locale.getdefaultlocale()[1]
41 ).lower()
42except: # NoQA: E722 (catchall)
43 # Any problem determining the locale: use None
44 _locale_encoding = None
45try:
46 codecs.lookup(_locale_encoding)
47except (LookupError, TypeError):
48 _locale_encoding = None
51class InputError(OSError): pass
52class OutputError(OSError): pass
55def check_encoding(stream: TextIO, encoding: str) -> bool | None:
56 """Test, whether the encoding of `stream` matches `encoding`.
58 Returns
60 :None: if `encoding` or `stream.encoding` are not a valid encoding
61 argument (e.g. ``None``) or `stream.encoding is missing.
62 :True: if the encoding argument resolves to the same value as `encoding`,
63 :False: if the encodings differ.
64 """
65 try:
66 return codecs.lookup(stream.encoding) == codecs.lookup(encoding)
67 except (LookupError, AttributeError, TypeError):
68 return None
71def error_string(err: BaseException) -> str:
72 """Return string representation of Exception `err`.
73 """
74 return f'{err.__class__.__name__}: {err}'
77class Input(TransformSpec):
78 """
79 Abstract base class for input wrappers.
81 Docutils input objects must provide a `read()` method that
82 returns the source, typically as `str` instance.
84 Inheriting `TransformSpec` allows input objects to add "transforms" to
85 the "Transformer". (Since Docutils 0.19, input objects are no longer
86 required to be `TransformSpec` instances.)
87 """
89 component_type: Final = 'input'
91 default_source_path: ClassVar[str | None] = None
93 def __init__(
94 self,
95 source: str | TextIO | nodes.document | None = None,
96 source_path: StrPath | None = None,
97 encoding: str | None = 'utf-8',
98 error_handler: str | None = 'strict',
99 ) -> None:
100 self.encoding = encoding
101 """Text encoding for the input source."""
103 self.error_handler = error_handler
104 """Text decoding error handler."""
106 self.source = source
107 """The source of input data."""
109 self.source_path = source_path
110 """A text reference to the source."""
112 if not source_path:
113 self.source_path = self.default_source_path
115 def __repr__(self) -> str:
116 return '%s: source=%r, source_path=%r' % (self.__class__, self.source,
117 self.source_path)
119 def read(self) -> str:
120 """Return input as `str`. Define in subclasses."""
121 raise NotImplementedError
123 def decode(self, data: str | bytes) -> str:
124 """
125 Decode `data` if required.
127 Return Unicode `str` instances unchanged (nothing to decode).
128 """
129 if isinstance(data, str):
130 return data # nothing to decode
131 return str(data, self.encoding or 'utf-8', self.error_handler)
133 def isatty(self) -> bool:
134 """Return True, if the input source is connected to a TTY device."""
135 try:
136 return self.source.isatty()
137 except AttributeError:
138 return False
141class Output(TransformSpec):
142 """
143 Abstract base class for output wrappers.
145 Docutils output objects must provide a `write()` method that
146 expects and handles one argument (the output).
148 Inheriting `TransformSpec` allows output objects to add "transforms" to
149 the "Transformer". (Since Docutils 0.19, output objects are no longer
150 required to be `TransformSpec` instances.)
151 """
153 component_type: Final = 'output'
155 default_destination_path: ClassVar[str | None] = None
157 def __init__(
158 self,
159 destination: TextIO | str | bytes | None = None,
160 destination_path: StrPath | None = None,
161 encoding: str | None = None,
162 error_handler: str | None = 'strict',
163 ) -> None:
164 self.encoding: str | None = encoding
165 """Text encoding for the output destination."""
167 self.error_handler: str = error_handler or 'strict'
168 """Text encoding error handler."""
170 self.destination: TextIO | str | bytes | None = destination
171 """The destination for output data."""
173 self.destination_path: StrPath | None = destination_path
174 """A text reference to the destination."""
176 if not destination_path:
177 self.destination_path = self.default_destination_path
179 def __repr__(self) -> str:
180 return ('%s: destination=%r, destination_path=%r'
181 % (self.__class__, self.destination, self.destination_path))
183 def write(self, data: str | bytes) -> str | bytes | None:
184 """Write `data`. Define in subclasses."""
185 raise NotImplementedError
187 def encode(self, data: str | bytes) -> str | bytes:
188 """
189 Encode and return `data`.
191 If `data` is a `bytes` instance, it is returned unchanged.
192 Otherwise it is encoded with `self.encoding`.
194 Provisional: If `self.encoding` is set to the pseudo encoding name
195 "unicode", `data` must be a `str` instance and is returned unchanged.
196 """
197 if self.encoding and self.encoding.lower() == 'unicode':
198 assert isinstance(data, str), ('output encoding is "unicode" '
199 'but `data` is no `str` instance')
200 return data
201 if not isinstance(data, str):
202 # Non-unicode (e.g. bytes) output.
203 return data
204 else:
205 return data.encode(self.encoding, self.error_handler)
208class ErrorOutput:
209 """
210 Wrapper class for file-like error streams with
211 failsafe de- and encoding of `str`, `bytes`, and `Exception` instances.
212 """
214 def __init__(
215 self,
216 destination: TextIO | BinaryIO | str | Literal[False] | None = None,
217 encoding: str | None = None,
218 encoding_errors: str = 'backslashreplace',
219 decoding_errors: str = 'replace',
220 ) -> None:
221 """
222 :Parameters:
223 - `destination`: a file-like object,
224 a string (path to a file),
225 `None` (write to `sys.stderr`, default), or
226 evaluating to `False` (write() requests are ignored).
227 - `encoding`: `destination` text encoding. Guessed if None.
228 - `encoding_errors`: how to treat encoding errors.
229 """
230 if destination is None:
231 destination = sys.stderr
232 elif not destination:
233 destination = False
234 # if `destination` is a file name, open it
235 elif isinstance(destination, str):
236 destination = open(destination, 'w')
238 self.destination: TextIO | BinaryIO | Literal[False] = destination
239 """Where warning output is sent."""
241 self.encoding: str = (
242 encoding
243 or getattr(destination, 'encoding', None)
244 or _locale_encoding
245 or 'ascii'
246 )
247 """The output character encoding."""
249 self.encoding_errors: str = encoding_errors
250 """Encoding error handler."""
252 self.decoding_errors: str = decoding_errors
253 """Decoding error handler."""
255 def write(self, data: str | bytes | Exception) -> None:
256 """
257 Write `data` to self.destination. Ignore, if self.destination is False.
259 `data` can be a `bytes`, `str`, or `Exception` instance.
260 """
261 if not self.destination:
262 return
263 if isinstance(data, Exception):
264 data = str(data)
265 # The destination is either opened in text or binary mode.
266 # If data has the wrong type, try to convert it.
267 try:
268 self.destination.write(data)
269 except UnicodeEncodeError:
270 # Encoding data from string to bytes failed with the
271 # destination's encoding and error handler.
272 # Try again with our own encoding and error handler.
273 binary = data.encode(self.encoding, self.encoding_errors)
274 self.destination.write(binary)
275 except TypeError:
276 if isinstance(data, str): # destination may expect bytes
277 binary = data.encode(self.encoding, self.encoding_errors)
278 self.destination.write(binary)
279 elif self.destination in (sys.stderr, sys.stdout):
280 # write bytes to raw stream
281 self.destination.buffer.write(data)
282 else:
283 # destination in text mode, write str
284 string = data.decode(self.encoding, self.decoding_errors)
285 self.destination.write(string)
287 def close(self) -> None:
288 """
289 Close the error-output stream.
291 Ignored if the destination is` sys.stderr` or `sys.stdout` or has no
292 close() method.
293 """
294 if self.destination in (sys.stdout, sys.stderr):
295 return
296 try:
297 self.destination.close()
298 except AttributeError:
299 pass
301 def isatty(self) -> bool:
302 """Return True, if the destination is connected to a TTY device."""
303 try:
304 return self.destination.isatty()
305 except AttributeError:
306 return False
309class FileInput(Input):
311 """
312 Input for single, simple file-like objects.
313 """
314 def __init__(
315 self,
316 source: TextIO | None = None,
317 source_path: StrPath | None = None,
318 encoding: str | Literal['unicode'] | None = 'utf-8',
319 error_handler: str | None = 'strict',
320 autoclose: bool = True,
321 mode: Literal['r', 'rb', 'br'] = 'r'
322 ) -> None:
323 """
324 :Parameters:
325 - `source`: either a file-like object (with `read()` and `close()`
326 methods) or None (use source indicated by `source_path`).
327 - `source_path`: a path to a file (which is opened for reading
328 if `source` is None) or `None` (implies `sys.stdin`).
329 - `encoding`: the text encoding of the input file.
330 - `error_handler`: the encoding error handler to use.
331 - `autoclose`: close automatically after read (except when
332 the source is `sys.stdin`).
333 - `mode`: how the file is to be opened. Default is read only ('r').
334 """
335 super().__init__(source, source_path, encoding, error_handler)
336 self.autoclose = autoclose
337 self._stderr = ErrorOutput()
339 if source is None:
340 if source_path:
341 try:
342 self.source = open(source_path, mode,
343 encoding=self.encoding,
344 errors=self.error_handler)
345 except OSError as error:
346 raise InputError(error.errno, error.strerror, source_path)
347 else:
348 self.source = sys.stdin
349 elif check_encoding(self.source, self.encoding) is False:
350 # TODO: re-open, warn or raise error?
351 raise UnicodeError('Encoding clash: encoding given is "%s" '
352 'but source is opened with encoding "%s".' %
353 (self.encoding, self.source.encoding))
354 if not source_path:
355 try:
356 self.source_path = self.source.name
357 except AttributeError:
358 pass
360 def read(self) -> str:
361 """
362 Read and decode a single file, return as `str`.
363 """
364 try:
365 if not self.encoding and hasattr(self.source, 'buffer'):
366 # read as binary data
367 data = self.source.buffer.read()
368 # decode with heuristics
369 data = self.decode(data)
370 # normalize newlines
371 data = '\n'.join(data.splitlines()+[''])
372 else:
373 data = self.decode(self.source.read())
374 finally:
375 if self.autoclose:
376 self.close()
377 return data
379 def readlines(self) -> list[str]:
380 """
381 Return lines of a single file as list of strings.
382 """
383 return self.read().splitlines(True)
385 def close(self) -> None:
386 if self.source is not sys.stdin:
387 self.source.close()
390class FileOutput(Output):
392 """Output for single, simple file-like objects."""
394 default_destination_path: Final = '<file>'
396 mode: Literal['w', 'a', 'x', 'wb', 'ab', 'xb', 'bw', 'ba', 'bx'] = 'w'
397 """The mode argument for `open()`."""
398 # 'wb' for binary (e.g. OpenOffice) files.
399 # (Do not use binary mode ('wb') for text files, as this prevents the
400 # conversion of newlines to the system specific default.)
402 def __init__(self,
403 destination: TextIO | None = None,
404 destination_path: StrPath | None = None,
405 encoding: str | None = None,
406 error_handler: str | None = 'strict',
407 autoclose: bool = True,
408 handle_io_errors: None = None,
409 mode=None,
410 ) -> None:
411 """
412 :Parameters:
413 - `destination`: either a file-like object (which is written
414 directly) or `None` (which implies `sys.stdout` if no
415 `destination_path` given).
416 - `destination_path`: a path to a file, which is opened and then
417 written.
418 - `encoding`: the text encoding of the output file.
419 - `error_handler`: the encoding error handler to use.
420 - `autoclose`: close automatically after write (except when
421 `sys.stdout` or `sys.stderr` is the destination).
422 - `handle_io_errors`: ignored, deprecated, will be removed.
423 - `mode`: how the file is to be opened (see standard function
424 `open`). The default is 'w', providing universal newline
425 support for text files.
426 """
427 super().__init__(
428 destination, destination_path, encoding, error_handler)
429 self.opened = True
430 self.autoclose = autoclose
431 if handle_io_errors is not None:
432 warnings.warn('io.FileOutput: init argument "handle_io_errors" '
433 'is ignored and will be removed in '
434 'Docutils 2.0.', DeprecationWarning, stacklevel=2)
435 if mode is not None:
436 self.mode = mode
437 self._stderr = ErrorOutput()
438 if destination is None:
439 if destination_path:
440 self.opened = False
441 else:
442 self.destination = sys.stdout
443 elif ( # destination is file-type object -> check mode:
444 mode and hasattr(self.destination, 'mode')
445 and mode != self.destination.mode):
446 print('Warning: Destination mode "%s" differs from specified '
447 'mode "%s"' % (self.destination.mode, mode),
448 file=self._stderr)
449 if not destination_path:
450 try:
451 self.destination_path = self.destination.name
452 except AttributeError:
453 pass
455 def open(self) -> None:
456 # Specify encoding
457 if 'b' not in self.mode:
458 kwargs = {'encoding': self.encoding,
459 'errors': self.error_handler}
460 else:
461 kwargs = {}
462 try:
463 self.destination = open(self.destination_path, self.mode, **kwargs)
464 except OSError as error:
465 raise OutputError(error.errno, error.strerror,
466 self.destination_path)
467 self.opened = True
469 def write(self, data: str | bytes) -> str | bytes:
470 """Write `data` to a single file, also return it.
472 `data` can be a `str` or `bytes` instance.
473 If writing `bytes` fails, an attempt is made to write to
474 the low-level interface ``self.destination.buffer``.
476 If `data` is a `str` instance and `self.encoding` and
477 `self.destination.encoding` are set to different values, `data`
478 is encoded to a `bytes` instance using `self.encoding`.
480 Provisional: future versions may raise an error if `self.encoding`
481 and `self.destination.encoding` are set to different values.
482 """
483 if not self.opened:
484 self.open()
485 if (isinstance(data, str)
486 and check_encoding(self.destination, self.encoding) is False):
487 if os.linesep != '\n':
488 data = data.replace('\n', os.linesep) # fix endings
489 data = self.encode(data)
491 try:
492 self.destination.write(data)
493 except TypeError as err:
494 if isinstance(data, bytes):
495 try:
496 self.destination.buffer.write(data)
497 except AttributeError:
498 if check_encoding(self.destination,
499 self.encoding) is False:
500 raise ValueError(
501 f'Encoding of {self.destination_path} '
502 f'({self.destination.encoding}) differs \n'
503 f' from specified encoding ({self.encoding})')
504 else:
505 raise err
506 except (UnicodeError, LookupError) as err:
507 raise UnicodeError(
508 'Unable to encode output data. output-encoding is: '
509 f'{self.encoding}.\n({error_string(err)})')
510 finally:
511 if self.autoclose:
512 self.close()
513 return data
515 def close(self) -> None:
516 if self.destination not in (sys.stdout, sys.stderr):
517 self.destination.close()
518 self.opened = False
521class StringInput(Input):
522 """Input from a `str` or `bytes` instance."""
524 source: str | bytes
526 default_source_path: Final = '<string>'
528 def read(self) -> str:
529 """Return the source as `str` instance.
531 Decode, if required (see `Input.decode`).
532 """
533 return self.decode(self.source)
536class StringOutput(Output):
537 """Output to a `bytes` or `str` instance.
539 Provisional.
540 """
542 destination: str | bytes
544 default_destination_path: Final = '<string>'
546 def write(self, data: str | bytes) -> str | bytes:
547 """Store `data` in `self.destination`, and return it.
549 If `self.encoding` is set to the pseudo encoding name "unicode",
550 `data` must be a `str` instance and is stored/returned unchanged
551 (cf. `Output.encode`).
553 Otherwise, `data` can be a `bytes` or `str` instance and is
554 stored/returned as a `bytes` instance
555 (`str` data is encoded with `self.encode()`).
557 Attention: the `output_encoding`_ setting may affect the content
558 of the output (e.g. an encoding declaration in HTML or XML or the
559 representation of characters as LaTeX macro vs. literal character).
560 """
561 self.destination = self.encode(data)
562 return self.destination
565class NullInput(Input):
567 """Degenerate input: read nothing."""
569 source: None
571 default_source_path: Final = 'null input'
573 def read(self) -> str:
574 """Return an empty string."""
575 return ''
578class NullOutput(Output):
580 """Degenerate output: write nothing."""
582 destination: None
584 default_destination_path: Final = 'null output'
586 def write(self, data: str | bytes) -> None:
587 """Do nothing, return None."""
590class DocTreeInput(Input):
592 """
593 Adapter for document tree input.
595 The document tree must be passed in the ``source`` parameter.
596 """
598 source: nodes.document
600 default_source_path: Final = 'doctree input'
602 def read(self) -> nodes.document:
603 """Return the document tree."""
604 return self.source