Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/docutils/io.py: 36%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

250 statements  

1# $Id: io.py 10356 2026-06-12 21:49:56Z milde $ 

2# Author: David Goodger <goodger@python.org> 

3# Copyright: This module has been placed in the public domain. 

4 

5""" 

6I/O classes provide a uniform API for low-level input and output. Subclasses 

7exist for a variety of input/output mechanisms. 

8""" 

9 

10from __future__ import annotations 

11 

12__docformat__ = 'reStructuredText' 

13 

14import codecs 

15import locale 

16import os 

17import sys 

18import warnings 

19 

20from docutils import TransformSpec 

21 

22TYPE_CHECKING = False 

23if TYPE_CHECKING: 

24 from typing import BinaryIO, ClassVar, Final, Literal, TextIO 

25 

26 from docutils import nodes 

27 from docutils.nodes import StrPath 

28 

29# Guess the locale's preferred encoding. 

30# If no valid guess can be made, _locale_encoding is set to `None`: 

31# 

32# TODO: check whether this is set correctly with every OS and Python version 

33# or whether front-end tools need to call `locale.setlocale()` 

34# before importing this module 

35try: 

36 # Return locale encoding also in UTF-8 mode 

37 with warnings.catch_warnings(): 

38 warnings.simplefilter("ignore") 

39 _locale_encoding: str | None = (locale.getlocale()[1] 

40 or locale.getdefaultlocale()[1] 

41 ).lower() 

42except: # NoQA: E722 (catchall) 

43 # Any problem determining the locale: use None 

44 _locale_encoding = None 

45try: 

46 codecs.lookup(_locale_encoding) 

47except (LookupError, TypeError): 

48 _locale_encoding = None 

49 

50 

51class InputError(OSError): pass 

52class OutputError(OSError): pass 

53 

54 

55def check_encoding(stream: TextIO, encoding: str) -> bool | None: 

56 """Test, whether the encoding of `stream` matches `encoding`. 

57 

58 Returns 

59 

60 :None: if `encoding` or `stream.encoding` are not a valid encoding 

61 argument (e.g. ``None``) or `stream.encoding is missing. 

62 :True: if the encoding argument resolves to the same value as `encoding`, 

63 :False: if the encodings differ. 

64 """ 

65 try: 

66 return codecs.lookup(stream.encoding) == codecs.lookup(encoding) 

67 except (LookupError, AttributeError, TypeError): 

68 return None 

69 

70 

71def error_string(err: BaseException) -> str: 

72 """Return string representation of Exception `err`. 

73 """ 

74 return f'{err.__class__.__name__}: {err}' 

75 

76 

77class Input(TransformSpec): 

78 """ 

79 Abstract base class for input wrappers. 

80 

81 Docutils input objects must provide a `read()` method that 

82 returns the source, typically as `str` instance. 

83 

84 Inheriting `TransformSpec` allows input objects to add "transforms" to 

85 the "Transformer". (Since Docutils 0.19, input objects are no longer 

86 required to be `TransformSpec` instances.) 

87 """ 

88 

89 component_type: Final = 'input' 

90 

91 default_source_path: ClassVar[str | None] = None 

92 

93 def __init__( 

94 self, 

95 source: str | TextIO | nodes.document | None = None, 

96 source_path: StrPath | None = None, 

97 encoding: str | None = 'utf-8', 

98 error_handler: str | None = 'strict', 

99 ) -> None: 

100 self.encoding = encoding 

101 """Text encoding for the input source.""" 

102 

103 self.error_handler = error_handler 

104 """Text decoding error handler.""" 

105 

106 self.source = source 

107 """The source of input data.""" 

108 

109 self.source_path = source_path 

110 """A text reference to the source.""" 

111 

112 if not source_path: 

113 self.source_path = self.default_source_path 

114 

115 def __repr__(self) -> str: 

116 return '%s: source=%r, source_path=%r' % (self.__class__, self.source, 

117 self.source_path) 

118 

119 def read(self) -> str: 

120 """Return input as `str`. Define in subclasses.""" 

121 raise NotImplementedError 

122 

123 def decode(self, data: str | bytes) -> str: 

124 """ 

125 Decode `data` if required. 

126 

127 Return Unicode `str` instances unchanged (nothing to decode). 

128 """ 

129 if isinstance(data, str): 

130 return data # nothing to decode 

131 return str(data, self.encoding or 'utf-8', self.error_handler) 

132 

133 def isatty(self) -> bool: 

134 """Return True, if the input source is connected to a TTY device.""" 

135 try: 

136 return self.source.isatty() 

137 except AttributeError: 

138 return False 

139 

140 

141class Output(TransformSpec): 

142 """ 

143 Abstract base class for output wrappers. 

144 

145 Docutils output objects must provide a `write()` method that 

146 expects and handles one argument (the output). 

147 

148 Inheriting `TransformSpec` allows output objects to add "transforms" to 

149 the "Transformer". (Since Docutils 0.19, output objects are no longer 

150 required to be `TransformSpec` instances.) 

151 """ 

152 

153 component_type: Final = 'output' 

154 

155 default_destination_path: ClassVar[str | None] = None 

156 

157 def __init__( 

158 self, 

159 destination: TextIO | str | bytes | None = None, 

160 destination_path: StrPath | None = None, 

161 encoding: str | None = None, 

162 error_handler: str | None = 'strict', 

163 ) -> None: 

164 self.encoding: str | None = encoding 

165 """Text encoding for the output destination.""" 

166 

167 self.error_handler: str = error_handler or 'strict' 

168 """Text encoding error handler.""" 

169 

170 self.destination: TextIO | str | bytes | None = destination 

171 """The destination for output data.""" 

172 

173 self.destination_path: StrPath | None = destination_path 

174 """A text reference to the destination.""" 

175 

176 if not destination_path: 

177 self.destination_path = self.default_destination_path 

178 

179 def __repr__(self) -> str: 

180 return ('%s: destination=%r, destination_path=%r' 

181 % (self.__class__, self.destination, self.destination_path)) 

182 

183 def write(self, data: str | bytes) -> str | bytes | None: 

184 """Write `data`. Define in subclasses.""" 

185 raise NotImplementedError 

186 

187 def encode(self, data: str | bytes) -> str | bytes: 

188 """ 

189 Encode and return `data`. 

190 

191 If `data` is a `bytes` instance, it is returned unchanged. 

192 Otherwise it is encoded with `self.encoding`. 

193 

194 Provisional: If `self.encoding` is set to the pseudo encoding name 

195 "unicode", `data` must be a `str` instance and is returned unchanged. 

196 """ 

197 if self.encoding and self.encoding.lower() == 'unicode': 

198 assert isinstance(data, str), ('output encoding is "unicode" ' 

199 'but `data` is no `str` instance') 

200 return data 

201 if not isinstance(data, str): 

202 # Non-unicode (e.g. bytes) output. 

203 return data 

204 else: 

205 return data.encode(self.encoding, self.error_handler) 

206 

207 

208class ErrorOutput: 

209 """ 

210 Wrapper class for file-like error streams with 

211 failsafe de- and encoding of `str`, `bytes`, and `Exception` instances. 

212 """ 

213 

214 def __init__( 

215 self, 

216 destination: TextIO | BinaryIO | str | Literal[False] | None = None, 

217 encoding: str | None = None, 

218 encoding_errors: str = 'backslashreplace', 

219 decoding_errors: str = 'replace', 

220 ) -> None: 

221 """ 

222 :Parameters: 

223 - `destination`: a file-like object, 

224 a string (path to a file), 

225 `None` (write to `sys.stderr`, default), or 

226 evaluating to `False` (write() requests are ignored). 

227 - `encoding`: `destination` text encoding. Guessed if None. 

228 - `encoding_errors`: how to treat encoding errors. 

229 """ 

230 if destination is None: 

231 destination = sys.stderr 

232 elif not destination: 

233 destination = False 

234 # if `destination` is a file name, open it 

235 elif isinstance(destination, str): 

236 destination = open(destination, 'w') 

237 

238 self.destination: TextIO | BinaryIO | Literal[False] = destination 

239 """Where warning output is sent.""" 

240 

241 self.encoding: str = ( 

242 encoding 

243 or getattr(destination, 'encoding', None) 

244 or _locale_encoding 

245 or 'ascii' 

246 ) 

247 """The output character encoding.""" 

248 

249 self.encoding_errors: str = encoding_errors 

250 """Encoding error handler.""" 

251 

252 self.decoding_errors: str = decoding_errors 

253 """Decoding error handler.""" 

254 

255 def write(self, data: str | bytes | Exception) -> None: 

256 """ 

257 Write `data` to self.destination. Ignore, if self.destination is False. 

258 

259 `data` can be a `bytes`, `str`, or `Exception` instance. 

260 """ 

261 if not self.destination: 

262 return 

263 if isinstance(data, Exception): 

264 data = str(data) 

265 # The destination is either opened in text or binary mode. 

266 # If data has the wrong type, try to convert it. 

267 try: 

268 self.destination.write(data) 

269 except UnicodeEncodeError: 

270 # Encoding data from string to bytes failed with the 

271 # destination's encoding and error handler. 

272 # Try again with our own encoding and error handler. 

273 binary = data.encode(self.encoding, self.encoding_errors) 

274 self.destination.write(binary) 

275 except TypeError: 

276 if isinstance(data, str): # destination may expect bytes 

277 binary = data.encode(self.encoding, self.encoding_errors) 

278 self.destination.write(binary) 

279 elif self.destination in (sys.stderr, sys.stdout): 

280 # write bytes to raw stream 

281 self.destination.buffer.write(data) 

282 else: 

283 # destination in text mode, write str 

284 string = data.decode(self.encoding, self.decoding_errors) 

285 self.destination.write(string) 

286 

287 def close(self) -> None: 

288 """ 

289 Close the error-output stream. 

290 

291 Ignored if the destination is` sys.stderr` or `sys.stdout` or has no 

292 close() method. 

293 """ 

294 if self.destination in (sys.stdout, sys.stderr): 

295 return 

296 try: 

297 self.destination.close() 

298 except AttributeError: 

299 pass 

300 

301 def isatty(self) -> bool: 

302 """Return True, if the destination is connected to a TTY device.""" 

303 try: 

304 return self.destination.isatty() 

305 except AttributeError: 

306 return False 

307 

308 

309class FileInput(Input): 

310 

311 """ 

312 Input for single, simple file-like objects. 

313 """ 

314 def __init__( 

315 self, 

316 source: TextIO | None = None, 

317 source_path: StrPath | None = None, 

318 encoding: str | Literal['unicode'] | None = 'utf-8', 

319 error_handler: str | None = 'strict', 

320 autoclose: bool = True, 

321 mode: Literal['r', 'rb', 'br'] = 'r' 

322 ) -> None: 

323 """ 

324 :Parameters: 

325 - `source`: either a file-like object (with `read()` and `close()` 

326 methods) or None (use source indicated by `source_path`). 

327 - `source_path`: a path to a file (which is opened for reading 

328 if `source` is None) or `None` (implies `sys.stdin`). 

329 - `encoding`: the text encoding of the input file. 

330 - `error_handler`: the encoding error handler to use. 

331 - `autoclose`: close automatically after read (except when 

332 the source is `sys.stdin`). 

333 - `mode`: how the file is to be opened. Default is read only ('r'). 

334 """ 

335 super().__init__(source, source_path, encoding, error_handler) 

336 self.autoclose = autoclose 

337 self._stderr = ErrorOutput() 

338 

339 if source is None: 

340 if source_path: 

341 try: 

342 self.source = open(source_path, mode, 

343 encoding=self.encoding, 

344 errors=self.error_handler) 

345 except OSError as error: 

346 raise InputError(error.errno, error.strerror, source_path) 

347 else: 

348 self.source = sys.stdin 

349 elif check_encoding(self.source, self.encoding) is False: 

350 # TODO: re-open, warn or raise error? 

351 raise UnicodeError('Encoding clash: encoding given is "%s" ' 

352 'but source is opened with encoding "%s".' % 

353 (self.encoding, self.source.encoding)) 

354 if not source_path: 

355 try: 

356 self.source_path = self.source.name 

357 except AttributeError: 

358 pass 

359 

360 def read(self) -> str: 

361 """ 

362 Read and decode a single file, return as `str`. 

363 """ 

364 try: 

365 if not self.encoding and hasattr(self.source, 'buffer'): 

366 # read as binary data 

367 data = self.source.buffer.read() 

368 # decode with heuristics 

369 data = self.decode(data) 

370 # normalize newlines 

371 data = '\n'.join(data.splitlines()+['']) 

372 else: 

373 data = self.decode(self.source.read()) 

374 finally: 

375 if self.autoclose: 

376 self.close() 

377 return data 

378 

379 def readlines(self) -> list[str]: 

380 """ 

381 Return lines of a single file as list of strings. 

382 """ 

383 return self.read().splitlines(True) 

384 

385 def close(self) -> None: 

386 if self.source is not sys.stdin: 

387 self.source.close() 

388 

389 

390class FileOutput(Output): 

391 

392 """Output for single, simple file-like objects.""" 

393 

394 default_destination_path: Final = '<file>' 

395 

396 mode: Literal['w', 'a', 'x', 'wb', 'ab', 'xb', 'bw', 'ba', 'bx'] = 'w' 

397 """The mode argument for `open()`.""" 

398 # 'wb' for binary (e.g. OpenOffice) files. 

399 # (Do not use binary mode ('wb') for text files, as this prevents the 

400 # conversion of newlines to the system specific default.) 

401 

402 def __init__(self, 

403 destination: TextIO | None = None, 

404 destination_path: StrPath | None = None, 

405 encoding: str | None = None, 

406 error_handler: str | None = 'strict', 

407 autoclose: bool = True, 

408 handle_io_errors: None = None, 

409 mode=None, 

410 ) -> None: 

411 """ 

412 :Parameters: 

413 - `destination`: either a file-like object (which is written 

414 directly) or `None` (which implies `sys.stdout` if no 

415 `destination_path` given). 

416 - `destination_path`: a path to a file, which is opened and then 

417 written. 

418 - `encoding`: the text encoding of the output file. 

419 - `error_handler`: the encoding error handler to use. 

420 - `autoclose`: close automatically after write (except when 

421 `sys.stdout` or `sys.stderr` is the destination). 

422 - `handle_io_errors`: ignored, deprecated, will be removed. 

423 - `mode`: how the file is to be opened (see standard function 

424 `open`). The default is 'w', providing universal newline 

425 support for text files. 

426 """ 

427 super().__init__( 

428 destination, destination_path, encoding, error_handler) 

429 self.opened = True 

430 self.autoclose = autoclose 

431 if handle_io_errors is not None: 

432 warnings.warn('io.FileOutput: init argument "handle_io_errors" ' 

433 'is ignored and will be removed in ' 

434 'Docutils 2.0.', DeprecationWarning, stacklevel=2) 

435 if mode is not None: 

436 self.mode = mode 

437 self._stderr = ErrorOutput() 

438 if destination is None: 

439 if destination_path: 

440 self.opened = False 

441 else: 

442 self.destination = sys.stdout 

443 elif ( # destination is file-type object -> check mode: 

444 mode and hasattr(self.destination, 'mode') 

445 and mode != self.destination.mode): 

446 print('Warning: Destination mode "%s" differs from specified ' 

447 'mode "%s"' % (self.destination.mode, mode), 

448 file=self._stderr) 

449 if not destination_path: 

450 try: 

451 self.destination_path = self.destination.name 

452 except AttributeError: 

453 pass 

454 

455 def open(self) -> None: 

456 # Specify encoding 

457 if 'b' not in self.mode: 

458 kwargs = {'encoding': self.encoding, 

459 'errors': self.error_handler} 

460 else: 

461 kwargs = {} 

462 try: 

463 self.destination = open(self.destination_path, self.mode, **kwargs) 

464 except OSError as error: 

465 raise OutputError(error.errno, error.strerror, 

466 self.destination_path) 

467 self.opened = True 

468 

469 def write(self, data: str | bytes) -> str | bytes: 

470 """Write `data` to a single file, also return it. 

471 

472 `data` can be a `str` or `bytes` instance. 

473 If writing `bytes` fails, an attempt is made to write to 

474 the low-level interface ``self.destination.buffer``. 

475 

476 If `data` is a `str` instance and `self.encoding` and 

477 `self.destination.encoding` are set to different values, `data` 

478 is encoded to a `bytes` instance using `self.encoding`. 

479 

480 Provisional: future versions may raise an error if `self.encoding` 

481 and `self.destination.encoding` are set to different values. 

482 """ 

483 if not self.opened: 

484 self.open() 

485 if (isinstance(data, str) 

486 and check_encoding(self.destination, self.encoding) is False): 

487 if os.linesep != '\n': 

488 data = data.replace('\n', os.linesep) # fix endings 

489 data = self.encode(data) 

490 

491 try: 

492 self.destination.write(data) 

493 except TypeError as err: 

494 if isinstance(data, bytes): 

495 try: 

496 self.destination.buffer.write(data) 

497 except AttributeError: 

498 if check_encoding(self.destination, 

499 self.encoding) is False: 

500 raise ValueError( 

501 f'Encoding of {self.destination_path} ' 

502 f'({self.destination.encoding}) differs \n' 

503 f' from specified encoding ({self.encoding})') 

504 else: 

505 raise err 

506 except (UnicodeError, LookupError) as err: 

507 raise UnicodeError( 

508 'Unable to encode output data. output-encoding is: ' 

509 f'{self.encoding}.\n({error_string(err)})') 

510 finally: 

511 if self.autoclose: 

512 self.close() 

513 return data 

514 

515 def close(self) -> None: 

516 if self.destination not in (sys.stdout, sys.stderr): 

517 self.destination.close() 

518 self.opened = False 

519 

520 

521class StringInput(Input): 

522 """Input from a `str` or `bytes` instance.""" 

523 

524 source: str | bytes 

525 

526 default_source_path: Final = '<string>' 

527 

528 def read(self) -> str: 

529 """Return the source as `str` instance. 

530 

531 Decode, if required (see `Input.decode`). 

532 """ 

533 return self.decode(self.source) 

534 

535 

536class StringOutput(Output): 

537 """Output to a `bytes` or `str` instance. 

538 

539 Provisional. 

540 """ 

541 

542 destination: str | bytes 

543 

544 default_destination_path: Final = '<string>' 

545 

546 def write(self, data: str | bytes) -> str | bytes: 

547 """Store `data` in `self.destination`, and return it. 

548 

549 If `self.encoding` is set to the pseudo encoding name "unicode", 

550 `data` must be a `str` instance and is stored/returned unchanged 

551 (cf. `Output.encode`). 

552 

553 Otherwise, `data` can be a `bytes` or `str` instance and is 

554 stored/returned as a `bytes` instance 

555 (`str` data is encoded with `self.encode()`). 

556 

557 Attention: the `output_encoding`_ setting may affect the content 

558 of the output (e.g. an encoding declaration in HTML or XML or the 

559 representation of characters as LaTeX macro vs. literal character). 

560 """ 

561 self.destination = self.encode(data) 

562 return self.destination 

563 

564 

565class NullInput(Input): 

566 

567 """Degenerate input: read nothing.""" 

568 

569 source: None 

570 

571 default_source_path: Final = 'null input' 

572 

573 def read(self) -> str: 

574 """Return an empty string.""" 

575 return '' 

576 

577 

578class NullOutput(Output): 

579 

580 """Degenerate output: write nothing.""" 

581 

582 destination: None 

583 

584 default_destination_path: Final = 'null output' 

585 

586 def write(self, data: str | bytes) -> None: 

587 """Do nothing, return None.""" 

588 

589 

590class DocTreeInput(Input): 

591 

592 """ 

593 Adapter for document tree input. 

594 

595 The document tree must be passed in the ``source`` parameter. 

596 """ 

597 

598 source: nodes.document 

599 

600 default_source_path: Final = 'doctree input' 

601 

602 def read(self) -> nodes.document: 

603 """Return the document tree.""" 

604 return self.source