Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/docutils/io.py: 34%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

259 statements  

1# $Id$ 

2# Author: David Goodger <goodger@python.org> 

3# Copyright: This module has been placed in the public domain. 

4 

5""" 

6I/O classes provide a uniform API for low-level input and output. Subclasses 

7exist for a variety of input/output mechanisms. 

8""" 

9 

10__docformat__ = 'reStructuredText' 

11 

12import codecs 

13import locale 

14import os 

15import re 

16import sys 

17import warnings 

18 

19from docutils import TransformSpec 

20 

21 

22# Guess the locale's preferred encoding. 

23# If no valid guess can be made, _locale_encoding is set to `None`: 

24# 

25# TODO: check whether this is set correctly with every OS and Python version 

26# or whether front-end tools need to call `locale.setlocale()` 

27# before importing this module 

28try: 

29 # Return locale encoding also in UTF-8 mode 

30 with warnings.catch_warnings(): 

31 warnings.simplefilter("ignore") 

32 _locale_encoding = (locale.getlocale()[1] 

33 or locale.getdefaultlocale()[1]) 

34 _locale_encoding = _locale_encoding.lower() 

35except: # noqa any other problems determining the locale -> use None 

36 _locale_encoding = None 

37try: 

38 codecs.lookup(_locale_encoding) 

39except (LookupError, TypeError): 

40 _locale_encoding = None 

41 

42 

43class InputError(OSError): pass 

44class OutputError(OSError): pass 

45 

46 

47def check_encoding(stream, encoding): 

48 """Test, whether the encoding of `stream` matches `encoding`. 

49 

50 Returns 

51 

52 :None: if `encoding` or `stream.encoding` are not a valid encoding 

53 argument (e.g. ``None``) or `stream.encoding is missing. 

54 :True: if the encoding argument resolves to the same value as `encoding`, 

55 :False: if the encodings differ. 

56 """ 

57 try: 

58 return codecs.lookup(stream.encoding) == codecs.lookup(encoding) 

59 except (LookupError, AttributeError, TypeError): 

60 return None 

61 

62 

63def error_string(err): 

64 """Return string representation of Exception `err`. 

65 """ 

66 return f'{err.__class__.__name__}: {err}' 

67 

68 

69class Input(TransformSpec): 

70 """ 

71 Abstract base class for input wrappers. 

72 

73 Docutils input objects must provide a `read()` method that 

74 returns the source, typically as `str` instance. 

75 

76 Inheriting `TransformSpec` allows input objects to add 

77 "transforms" and "unknown_reference_resolvers" to the "Transformer". 

78 (Optional for custom input objects since Docutils 0.19.) 

79 """ 

80 

81 component_type = 'input' 

82 

83 default_source_path = None 

84 

85 def __init__(self, source=None, source_path=None, encoding=None, 

86 error_handler='strict'): 

87 self.encoding = encoding 

88 """Text encoding for the input source.""" 

89 

90 self.error_handler = error_handler 

91 """Text decoding error handler.""" 

92 

93 self.source = source 

94 """The source of input data.""" 

95 

96 self.source_path = source_path 

97 """A text reference to the source.""" 

98 

99 if not source_path: 

100 self.source_path = self.default_source_path 

101 

102 self.successful_encoding = None 

103 """The encoding that successfully decoded the source data.""" 

104 

105 def __repr__(self): 

106 return '%s: source=%r, source_path=%r' % (self.__class__, self.source, 

107 self.source_path) 

108 

109 def read(self): 

110 """Return input as `str`. Define in subclasses.""" 

111 raise NotImplementedError 

112 

113 def decode(self, data): 

114 """ 

115 Decode `data` if required. 

116 

117 Return Unicode `str` instances unchanged (nothing to decode). 

118 

119 If `self.encoding` is None, determine encoding from data 

120 or try UTF-8 and the locale's preferred encoding. 

121 The client application should call ``locale.setlocale()`` at the 

122 beginning of processing:: 

123 

124 locale.setlocale(locale.LC_ALL, '') 

125 

126 Raise UnicodeError if unsuccessful. 

127 

128 Provisional: encoding detection will be removed in Docutils 1.0. 

129 """ 

130 if self.encoding and self.encoding.lower() == 'unicode': 

131 assert isinstance(data, str), ('input encoding is "unicode" ' 

132 'but `data` is no `str` instance') 

133 if isinstance(data, str): 

134 # nothing to decode 

135 return data 

136 if self.encoding: 

137 # We believe the user/application when the encoding is 

138 # explicitly given. 

139 encoding_candidates = [self.encoding] 

140 else: 

141 data_encoding = self.determine_encoding_from_data(data) 

142 if data_encoding: 

143 # `data` declares its encoding with "magic comment" or BOM, 

144 encoding_candidates = [data_encoding] 

145 else: 

146 # Apply heuristics if the encoding is not specified. 

147 # Start with UTF-8, because that only matches 

148 # data that *IS* UTF-8: 

149 encoding_candidates = ['utf-8'] 

150 # If UTF-8 fails, fall back to the locale's preferred encoding: 

151 fallback = locale.getpreferredencoding(do_setlocale=False) 

152 if fallback and fallback.lower() != 'utf-8': 

153 encoding_candidates.append(fallback) 

154 for enc in encoding_candidates: 

155 try: 

156 decoded = str(data, enc, self.error_handler) 

157 self.successful_encoding = enc 

158 return decoded 

159 except (UnicodeError, LookupError) as err: 

160 # keep exception instance for use outside of the "for" loop. 

161 error = err 

162 raise UnicodeError( 

163 'Unable to decode input data. Tried the following encodings: ' 

164 f'{", ".join(repr(enc) for enc in encoding_candidates)}.\n' 

165 f'({error_string(error)})') 

166 

167 coding_slug = re.compile(br"coding[:=]\s*([-\w.]+)") 

168 """Encoding declaration pattern.""" 

169 

170 byte_order_marks = ((codecs.BOM_UTF32_BE, 'utf-32'), 

171 (codecs.BOM_UTF32_LE, 'utf-32'), 

172 (codecs.BOM_UTF8, 'utf-8-sig'), 

173 (codecs.BOM_UTF16_BE, 'utf-16'), 

174 (codecs.BOM_UTF16_LE, 'utf-16'), 

175 ) 

176 """Sequence of (start_bytes, encoding) tuples for encoding detection. 

177 The first bytes of input data are checked against the start_bytes strings. 

178 A match indicates the given encoding.""" 

179 

180 def determine_encoding_from_data(self, data): 

181 """ 

182 Try to determine the encoding of `data` by looking *in* `data`. 

183 Check for a byte order mark (BOM) or an encoding declaration. 

184 """ 

185 # check for a byte order mark: 

186 for start_bytes, encoding in self.byte_order_marks: 

187 if data.startswith(start_bytes): 

188 return encoding 

189 # check for an encoding declaration pattern in first 2 lines of file: 

190 for line in data.splitlines()[:2]: 

191 match = self.coding_slug.search(line) 

192 if match: 

193 return match.group(1).decode('ascii') 

194 return None 

195 

196 def isatty(self): 

197 """Return True, if the input source is connected to a TTY device.""" 

198 try: 

199 return self.source.isatty() 

200 except AttributeError: 

201 return False 

202 

203 

204class Output(TransformSpec): 

205 """ 

206 Abstract base class for output wrappers. 

207 

208 Docutils output objects must provide a `write()` method that 

209 expects and handles one argument (the output). 

210 

211 Inheriting `TransformSpec` allows output objects to add 

212 "transforms" and "unknown_reference_resolvers" to the "Transformer". 

213 (Optional for custom output objects since Docutils 0.19.) 

214 """ 

215 

216 component_type = 'output' 

217 

218 default_destination_path = None 

219 

220 def __init__(self, destination=None, destination_path=None, 

221 encoding=None, error_handler='strict'): 

222 self.encoding = encoding 

223 """Text encoding for the output destination.""" 

224 

225 self.error_handler = error_handler or 'strict' 

226 """Text encoding error handler.""" 

227 

228 self.destination = destination 

229 """The destination for output data.""" 

230 

231 self.destination_path = destination_path 

232 """A text reference to the destination.""" 

233 

234 if not destination_path: 

235 self.destination_path = self.default_destination_path 

236 

237 def __repr__(self): 

238 return ('%s: destination=%r, destination_path=%r' 

239 % (self.__class__, self.destination, self.destination_path)) 

240 

241 def write(self, data): 

242 """Write `data`. Define in subclasses.""" 

243 raise NotImplementedError 

244 

245 def encode(self, data): 

246 """ 

247 Encode and return `data`. 

248 

249 If `data` is a `bytes` instance, it is returned unchanged. 

250 Otherwise it is encoded with `self.encoding`. 

251 

252 Provisional: If `self.encoding` is set to the pseudo encoding name 

253 "unicode", `data` must be a `str` instance and is returned unchanged. 

254 """ 

255 if self.encoding and self.encoding.lower() == 'unicode': 

256 assert isinstance(data, str), ('output encoding is "unicode" ' 

257 'but `data` is no `str` instance') 

258 return data 

259 if not isinstance(data, str): 

260 # Non-unicode (e.g. bytes) output. 

261 return data 

262 else: 

263 return data.encode(self.encoding, self.error_handler) 

264 

265 

266class ErrorOutput: 

267 """ 

268 Wrapper class for file-like error streams with 

269 failsafe de- and encoding of `str`, `bytes`, `unicode` and 

270 `Exception` instances. 

271 """ 

272 

273 def __init__(self, destination=None, encoding=None, 

274 encoding_errors='backslashreplace', 

275 decoding_errors='replace'): 

276 """ 

277 :Parameters: 

278 - `destination`: a file-like object, 

279 a string (path to a file), 

280 `None` (write to `sys.stderr`, default), or 

281 evaluating to `False` (write() requests are ignored). 

282 - `encoding`: `destination` text encoding. Guessed if None. 

283 - `encoding_errors`: how to treat encoding errors. 

284 """ 

285 if destination is None: 

286 destination = sys.stderr 

287 elif not destination: 

288 destination = False 

289 # if `destination` is a file name, open it 

290 elif isinstance(destination, str): 

291 destination = open(destination, 'w') 

292 

293 self.destination = destination 

294 """Where warning output is sent.""" 

295 

296 self.encoding = (encoding or getattr(destination, 'encoding', None) 

297 or _locale_encoding or 'ascii') 

298 """The output character encoding.""" 

299 

300 self.encoding_errors = encoding_errors 

301 """Encoding error handler.""" 

302 

303 self.decoding_errors = decoding_errors 

304 """Decoding error handler.""" 

305 

306 def write(self, data): 

307 """ 

308 Write `data` to self.destination. Ignore, if self.destination is False. 

309 

310 `data` can be a `bytes`, `str`, or `Exception` instance. 

311 """ 

312 if not self.destination: 

313 return 

314 if isinstance(data, Exception): 

315 data = str(data) 

316 try: 

317 self.destination.write(data) 

318 except UnicodeEncodeError: 

319 self.destination.write(data.encode(self.encoding, 

320 self.encoding_errors)) 

321 except TypeError: 

322 if isinstance(data, str): # destination may expect bytes 

323 self.destination.write(data.encode(self.encoding, 

324 self.encoding_errors)) 

325 elif self.destination in (sys.stderr, sys.stdout): 

326 # write bytes to raw stream 

327 self.destination.buffer.write(data) 

328 else: 

329 self.destination.write(str(data, self.encoding, 

330 self.decoding_errors)) 

331 

332 def close(self): 

333 """ 

334 Close the error-output stream. 

335 

336 Ignored if the destination is` sys.stderr` or `sys.stdout` or has no 

337 close() method. 

338 """ 

339 if self.destination in (sys.stdout, sys.stderr): 

340 return 

341 try: 

342 self.destination.close() 

343 except AttributeError: 

344 pass 

345 

346 def isatty(self): 

347 """Return True, if the destination is connected to a TTY device.""" 

348 try: 

349 return self.destination.isatty() 

350 except AttributeError: 

351 return False 

352 

353 

354class FileInput(Input): 

355 

356 """ 

357 Input for single, simple file-like objects. 

358 """ 

359 def __init__(self, source=None, source_path=None, 

360 encoding=None, error_handler='strict', 

361 autoclose=True, mode='r'): 

362 """ 

363 :Parameters: 

364 - `source`: either a file-like object (which is read directly), or 

365 `None` (which implies `sys.stdin` if no `source_path` given). 

366 - `source_path`: a path to a file, which is opened for reading. 

367 - `encoding`: the expected text encoding of the input file. 

368 - `error_handler`: the encoding error handler to use. 

369 - `autoclose`: close automatically after read (except when 

370 `sys.stdin` is the source). 

371 - `mode`: how the file is to be opened (see standard function 

372 `open`). The default is read only ('r'). 

373 """ 

374 Input.__init__(self, source, source_path, encoding, error_handler) 

375 self.autoclose = autoclose 

376 self._stderr = ErrorOutput() 

377 

378 if source is None: 

379 if source_path: 

380 try: 

381 self.source = open(source_path, mode, 

382 encoding=self.encoding, 

383 errors=self.error_handler) 

384 except OSError as error: 

385 raise InputError(error.errno, error.strerror, source_path) 

386 else: 

387 self.source = sys.stdin 

388 elif check_encoding(self.source, self.encoding) is False: 

389 # TODO: re-open, warn or raise error? 

390 raise UnicodeError('Encoding clash: encoding given is "%s" ' 

391 'but source is opened with encoding "%s".' % 

392 (self.encoding, self.source.encoding)) 

393 if not source_path: 

394 try: 

395 self.source_path = self.source.name 

396 except AttributeError: 

397 pass 

398 

399 def read(self): 

400 """ 

401 Read and decode a single file, return as `str`. 

402 """ 

403 try: 

404 if not self.encoding and hasattr(self.source, 'buffer'): 

405 # read as binary data 

406 data = self.source.buffer.read() 

407 # decode with heuristics 

408 data = self.decode(data) 

409 # normalize newlines 

410 data = '\n'.join(data.splitlines()+['']) 

411 else: 

412 data = self.source.read() 

413 finally: 

414 if self.autoclose: 

415 self.close() 

416 return data 

417 

418 def readlines(self): 

419 """ 

420 Return lines of a single file as list of strings. 

421 """ 

422 return self.read().splitlines(True) 

423 

424 def close(self): 

425 if self.source is not sys.stdin: 

426 self.source.close() 

427 

428 

429class FileOutput(Output): 

430 

431 """Output for single, simple file-like objects.""" 

432 

433 default_destination_path = '<file>' 

434 

435 mode = 'w' 

436 """The mode argument for `open()`.""" 

437 # 'wb' for binary (e.g. OpenOffice) files (see also `BinaryFileOutput`). 

438 # (Do not use binary mode ('wb') for text files, as this prevents the 

439 # conversion of newlines to the system specific default.) 

440 

441 def __init__(self, destination=None, destination_path=None, 

442 encoding=None, error_handler='strict', autoclose=True, 

443 handle_io_errors=None, mode=None): 

444 """ 

445 :Parameters: 

446 - `destination`: either a file-like object (which is written 

447 directly) or `None` (which implies `sys.stdout` if no 

448 `destination_path` given). 

449 - `destination_path`: a path to a file, which is opened and then 

450 written. 

451 - `encoding`: the text encoding of the output file. 

452 - `error_handler`: the encoding error handler to use. 

453 - `autoclose`: close automatically after write (except when 

454 `sys.stdout` or `sys.stderr` is the destination). 

455 - `handle_io_errors`: ignored, deprecated, will be removed. 

456 - `mode`: how the file is to be opened (see standard function 

457 `open`). The default is 'w', providing universal newline 

458 support for text files. 

459 """ 

460 Output.__init__(self, destination, destination_path, 

461 encoding, error_handler) 

462 self.opened = True 

463 self.autoclose = autoclose 

464 if handle_io_errors is not None: 

465 warnings.warn('io.FileOutput: init argument "handle_io_errors" ' 

466 'is ignored and will be removed in ' 

467 'Docutils 2.0.', DeprecationWarning, stacklevel=2) 

468 if mode is not None: 

469 self.mode = mode 

470 self._stderr = ErrorOutput() 

471 if destination is None: 

472 if destination_path: 

473 self.opened = False 

474 else: 

475 self.destination = sys.stdout 

476 elif ( # destination is file-type object -> check mode: 

477 mode and hasattr(self.destination, 'mode') 

478 and mode != self.destination.mode): 

479 print('Warning: Destination mode "%s" differs from specified ' 

480 'mode "%s"' % (self.destination.mode, mode), 

481 file=self._stderr) 

482 if not destination_path: 

483 try: 

484 self.destination_path = self.destination.name 

485 except AttributeError: 

486 pass 

487 

488 def open(self): 

489 # Specify encoding 

490 if 'b' not in self.mode: 

491 kwargs = {'encoding': self.encoding, 

492 'errors': self.error_handler} 

493 else: 

494 kwargs = {} 

495 try: 

496 self.destination = open(self.destination_path, self.mode, **kwargs) 

497 except OSError as error: 

498 raise OutputError(error.errno, error.strerror, 

499 self.destination_path) 

500 self.opened = True 

501 

502 def write(self, data): 

503 """Write `data` to a single file, also return it. 

504 

505 `data` can be a `str` or `bytes` instance. 

506 If writing `bytes` fails, an attempt is made to write to 

507 the low-level interface ``self.destination.buffer``. 

508 

509 If `data` is a `str` instance and `self.encoding` and 

510 `self.destination.encoding` are set to different values, `data` 

511 is encoded to a `bytes` instance using `self.encoding`. 

512 

513 Provisional: future versions may raise an error if `self.encoding` 

514 and `self.destination.encoding` are set to different values. 

515 """ 

516 if not self.opened: 

517 self.open() 

518 if (isinstance(data, str) 

519 and check_encoding(self.destination, self.encoding) is False): 

520 if os.linesep != '\n': 

521 data = data.replace('\n', os.linesep) # fix endings 

522 data = self.encode(data) 

523 

524 try: 

525 self.destination.write(data) 

526 except TypeError as err: 

527 if isinstance(data, bytes): 

528 try: 

529 self.destination.buffer.write(data) 

530 except AttributeError: 

531 if check_encoding(self.destination, 

532 self.encoding) is False: 

533 raise ValueError( 

534 f'Encoding of {self.destination_path} ' 

535 f'({self.destination.encoding}) differs \n' 

536 f' from specified encoding ({self.encoding})') 

537 else: 

538 raise err 

539 except (UnicodeError, LookupError) as err: 

540 raise UnicodeError( 

541 'Unable to encode output data. output-encoding is: ' 

542 f'{self.encoding}.\n({error_string(err)})') 

543 finally: 

544 if self.autoclose: 

545 self.close() 

546 return data 

547 

548 def close(self): 

549 if self.destination not in (sys.stdout, sys.stderr): 

550 self.destination.close() 

551 self.opened = False 

552 

553 

554class BinaryFileOutput(FileOutput): 

555 """ 

556 A version of docutils.io.FileOutput which writes to a binary file. 

557 """ 

558 # Used by core.publish_cmdline_to_binary() which in turn is used by 

559 # tools/rst2odt.py but not by core.rst2odt(). 

560 mode = 'wb' 

561 

562 

563class StringInput(Input): 

564 """Input from a `str` or `bytes` instance.""" 

565 

566 default_source_path = '<string>' 

567 

568 def read(self): 

569 """Return the source as `str` instance. 

570 

571 Decode, if required (see `Input.decode`). 

572 """ 

573 return self.decode(self.source) 

574 

575 

576class StringOutput(Output): 

577 """Output to a `bytes` or `str` instance. 

578 

579 Provisional. 

580 """ 

581 

582 default_destination_path = '<string>' 

583 

584 def write(self, data): 

585 """Store `data` in `self.destination`, and return it. 

586 

587 If `self.encoding` is set to the pseudo encoding name "unicode", 

588 `data` must be a `str` instance and is stored/returned unchanged 

589 (cf. `Output.encode`). 

590 

591 Otherwise, `data` can be a `bytes` or `str` instance and is 

592 stored/returned as a `bytes` instance 

593 (`str` data is encoded with `self.encode()`). 

594 

595 Attention: the `output_encoding`_ setting may affect the content 

596 of the output (e.g. an encoding declaration in HTML or XML or the 

597 representation of characters as LaTeX macro vs. literal character). 

598 """ 

599 self.destination = self.encode(data) 

600 return self.destination 

601 

602 

603class NullInput(Input): 

604 

605 """Degenerate input: read nothing.""" 

606 

607 default_source_path = 'null input' 

608 

609 def read(self): 

610 """Return an empty string.""" 

611 return '' 

612 

613 

614class NullOutput(Output): 

615 

616 """Degenerate output: write nothing.""" 

617 

618 default_destination_path = 'null output' 

619 

620 def write(self, data): 

621 """Do nothing, return None.""" 

622 pass 

623 

624 

625class DocTreeInput(Input): 

626 

627 """ 

628 Adapter for document tree input. 

629 

630 The document tree must be passed in the ``source`` parameter. 

631 """ 

632 

633 default_source_path = 'doctree input' 

634 

635 def read(self): 

636 """Return the document tree.""" 

637 return self.source