Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/docutils/utils/__init__.py: 34%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

331 statements  

1# $Id$ 

2# Author: David Goodger <goodger@python.org> 

3# Copyright: This module has been placed in the public domain. 

4 

5""" 

6Miscellaneous utilities for the documentation utilities. 

7""" 

8 

9from __future__ import annotations 

10 

11__docformat__ = 'reStructuredText' 

12 

13import itertools 

14import os 

15import os.path 

16import re 

17import sys 

18import unicodedata 

19import warnings 

20from pathlib import PurePath, Path 

21from typing import TYPE_CHECKING 

22 

23from docutils import ApplicationError, DataError 

24from docutils import io, nodes 

25# for backwards compatibility 

26from docutils.nodes import unescape # noqa: F401 (imported but unused) 

27 

28if TYPE_CHECKING: 

29 from collections.abc import Callable, Sequence, Iterable 

30 from typing import Any, Final, Literal, TextIO 

31 if sys.version_info[:2] >= (3, 12): 

32 from typing import TypeAlias 

33 else: 

34 from typing_extensions import TypeAlias 

35 

36 from docutils.nodes import Node, StrPath 

37 from docutils.frontend import Values 

38 

39 _ObserverFunc: TypeAlias = Callable[[nodes.system_message], None] 

40 

41 

42class SystemMessage(ApplicationError): 

43 

44 def __init__(self, system_message: nodes.system_message, level: int, 

45 ) -> None: 

46 Exception.__init__(self, system_message.astext()) 

47 self.level = level 

48 

49 

50class SystemMessagePropagation(ApplicationError): 

51 pass 

52 

53 

54class Reporter: 

55 

56 """ 

57 Info/warning/error reporter and ``system_message`` element generator. 

58 

59 Five levels of system messages are defined, along with corresponding 

60 methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`. 

61 

62 There is typically one Reporter object per process. A Reporter object is 

63 instantiated with thresholds for reporting (generating warnings) and 

64 halting processing (raising exceptions), a switch to turn debug output on 

65 or off, and an I/O stream for warnings. These are stored as instance 

66 attributes. 

67 

68 When a system message is generated, its level is compared to the stored 

69 thresholds, and a warning or error is generated as appropriate. Debug 

70 messages are produced if the stored debug switch is on, independently of 

71 other thresholds. Message output is sent to the stored warning stream if 

72 not set to ''. 

73 

74 The Reporter class also employs a modified form of the "Observer" pattern 

75 [GoF95]_ to track system messages generated. The `attach_observer` method 

76 should be called before parsing, with a bound method or function which 

77 accepts system messages. The observer can be removed with 

78 `detach_observer`, and another added in its place. 

79 

80 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of 

81 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA, 

82 1995. 

83 """ 

84 

85 # Reporter.get_source_and_line is patched in by ``RSTState.runtime_init`` 

86 get_source_and_line: Callable[[int|None], tuple[StrPath|None, int|None]] 

87 

88 levels: Final[Sequence[str]] = ( 

89 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'SEVERE') 

90 """List of names for system message levels, indexed by level.""" 

91 

92 # system message level constants: 

93 DEBUG_LEVEL: Final = 0 

94 INFO_LEVEL: Final = 1 

95 WARNING_LEVEL: Final = 2 

96 ERROR_LEVEL: Final = 3 

97 SEVERE_LEVEL: Final = 4 

98 

99 def __init__( 

100 self, 

101 source: StrPath, 

102 report_level: int, 

103 halt_level: int, 

104 stream: io.ErrorOutput|TextIO|str|Literal[False]|None = None, 

105 debug: bool = False, 

106 encoding: str|None = None, 

107 error_handler: str = 'backslashreplace', 

108 ) -> None: 

109 """Low level instantiating. See also `new_reporter().`. 

110 

111 :Parameters: 

112 - `source`: The path to or description of the source data. 

113 - `report_level`: The level at or above which warning output will 

114 be sent to `stream`. 

115 - `halt_level`: The level at or above which `SystemMessage` 

116 exceptions will be raised, halting execution. 

117 - `debug`: Show debug (level=0) system messages? 

118 - `stream`: Where warning output is sent. Can be file-like (has a 

119 ``.write`` method), a string (file name, opened for writing), 

120 '' (empty string) or `False` (for discarding all stream messages) 

121 or `None` (implies `sys.stderr`; default). 

122 - `encoding`: The output encoding. 

123 - `error_handler`: The error handler for stderr output encoding. 

124 """ 

125 

126 self.source = source 

127 """The path to or description of the source data.""" 

128 

129 self.error_handler = error_handler 

130 """The character encoding error handler.""" 

131 

132 self.debug_flag = debug 

133 """Show debug (level=0) system messages?""" 

134 

135 self.report_level = report_level 

136 """The level at or above which warning output will be sent 

137 to `self.stream`.""" 

138 

139 self.halt_level = halt_level 

140 """The level at or above which `SystemMessage` exceptions 

141 will be raised, halting execution.""" 

142 

143 if not isinstance(stream, io.ErrorOutput): 

144 stream = io.ErrorOutput(stream, encoding, error_handler) 

145 

146 self.stream: io.ErrorOutput = stream 

147 """Where warning output is sent.""" 

148 

149 self.encoding: str = encoding or getattr(stream, 'encoding', 'ascii') 

150 """The output character encoding.""" 

151 

152 self.observers: list[_ObserverFunc] = [] 

153 """List of bound methods or functions to call with each system_message 

154 created.""" 

155 

156 self.max_level: int = -1 

157 """The highest level system message generated so far.""" 

158 

159 def attach_observer(self, observer: _ObserverFunc) -> None: 

160 """ 

161 The `observer` parameter is a function or bound method which takes one 

162 argument, a `nodes.system_message` instance. 

163 """ 

164 self.observers.append(observer) 

165 

166 def detach_observer(self, observer: _ObserverFunc) -> None: 

167 self.observers.remove(observer) 

168 

169 def notify_observers(self, message: nodes.system_message) -> None: 

170 for observer in self.observers: 

171 observer(message) 

172 

173 def system_message(self, 

174 level: int, 

175 message: str, 

176 *children: Node, 

177 **kwargs: Any 

178 ) -> nodes.system_message: 

179 """ 

180 Return a system_message object. 

181 

182 Raise an exception or generate a warning if appropriate. 

183 """ 

184 # `message` can be a `str` or `Exception` instance. 

185 if isinstance(message, Exception): 

186 message = str(message) 

187 

188 attributes = kwargs.copy() 

189 if 'base_node' in kwargs: 

190 source, line = get_source_line(kwargs['base_node']) 

191 del attributes['base_node'] 

192 if source is not None: 

193 attributes.setdefault('source', source) 

194 if line is not None: 

195 attributes.setdefault('line', line) 

196 # assert source is not None, "line- but no source-argument" 

197 if 'source' not in attributes: 

198 # 'line' is absolute line number 

199 try: 

200 source, line = self.get_source_and_line(attributes.get('line')) 

201 except AttributeError: 

202 source, line = None, None 

203 if source is not None: 

204 attributes['source'] = source 

205 if line is not None: 

206 attributes['line'] = line 

207 # assert attributes['line'] is not None, (message, kwargs) 

208 # assert attributes['source'] is not None, (message, kwargs) 

209 attributes.setdefault('source', self.source) 

210 

211 msg = nodes.system_message(message, level=level, 

212 type=self.levels[level], 

213 *children, **attributes) 

214 if self.stream and (level >= self.report_level 

215 or self.debug_flag and level == self.DEBUG_LEVEL 

216 or level >= self.halt_level): 

217 self.stream.write(msg.astext() + '\n') 

218 if level >= self.halt_level: 

219 raise SystemMessage(msg, level) 

220 if level > self.DEBUG_LEVEL or self.debug_flag: 

221 self.notify_observers(msg) 

222 self.max_level = max(level, self.max_level) 

223 return msg 

224 

225 def debug(self, *args: Node, **kwargs: Any) -> nodes.system_message: 

226 """ 

227 Level-0, "DEBUG": an internal reporting issue. 

228 

229 Typically, there is no effect on the processing. Level-0 system 

230 messages are handled separately from the others. 

231 """ 

232 if self.debug_flag: 

233 return self.system_message(self.DEBUG_LEVEL, *args, **kwargs) 

234 

235 def info(self, *args: Node, **kwargs: Any) -> nodes.system_message: 

236 """ 

237 Level-1, "INFO": a minor issue that can be ignored. 

238 

239 Typically, there is no effect on processing and level-1 system 

240 messages are not reported. 

241 """ 

242 return self.system_message(self.INFO_LEVEL, *args, **kwargs) 

243 

244 def warning(self, *args: Node, **kwargs: Any) -> nodes.system_message: 

245 """ 

246 Level-2, "WARNING": an issue that should be addressed. 

247 

248 If ignored, there may be unpredictable problems with the output. 

249 """ 

250 return self.system_message(self.WARNING_LEVEL, *args, **kwargs) 

251 

252 def error(self, *args: Node, **kwargs: Any) -> nodes.system_message: 

253 """ 

254 Level-3, "ERROR": an error that should be addressed. 

255 

256 If ignored, the output will contain errors. 

257 """ 

258 return self.system_message(self.ERROR_LEVEL, *args, **kwargs) 

259 

260 def severe(self, *args: Node, **kwargs: Any) -> nodes.system_message: 

261 """ 

262 Level-4, "SEVERE": a severe error that must be addressed. 

263 

264 If ignored, the output will contain severe errors. Typically level-4 

265 system messages are turned into exceptions which halt processing. 

266 """ 

267 return self.system_message(self.SEVERE_LEVEL, *args, **kwargs) 

268 

269 

270class ExtensionOptionError(DataError): pass # NoQA: E701 

271class BadOptionError(ExtensionOptionError): pass # NoQA: E701 

272class BadOptionDataError(ExtensionOptionError): pass # NoQA: E701 

273class DuplicateOptionError(ExtensionOptionError): pass # NoQA: E701 

274 

275 

276def extract_extension_options(field_list: nodes.field_list, 

277 options_spec: dict[str, Callable[object], Any], 

278 ) -> dict[str, Any]: 

279 """ 

280 Return a dictionary mapping extension option names to converted values. 

281 

282 :Parameters: 

283 - `field_list`: A flat field list without field arguments, where each 

284 field body consists of a single paragraph only. 

285 - `options_spec`: Dictionary mapping known option names to a 

286 conversion function such as `int` or `float`. 

287 

288 :Exceptions: 

289 - `KeyError` for unknown option names. 

290 - `ValueError` for invalid option values (raised by the conversion 

291 function). 

292 - `TypeError` for invalid option value types (raised by conversion 

293 function). 

294 - `DuplicateOptionError` for duplicate options. 

295 - `BadOptionError` for invalid fields. 

296 - `BadOptionDataError` for invalid option data (missing name, 

297 missing data, bad quotes, etc.). 

298 """ 

299 option_list = extract_options(field_list) 

300 return assemble_option_dict(option_list, options_spec) 

301 

302 

303def extract_options(field_list: nodes.field_list 

304 ) -> list[tuple[str, str|None]]: 

305 """ 

306 Return a list of option (name, value) pairs from field names & bodies. 

307 

308 :Parameter: 

309 `field_list`: A flat field list, where each field name is a single 

310 word and each field body consists of a single paragraph only. 

311 

312 :Exceptions: 

313 - `BadOptionError` for invalid fields. 

314 - `BadOptionDataError` for invalid option data (missing name, 

315 missing data, bad quotes, etc.). 

316 """ 

317 option_list = [] 

318 for field in field_list: 

319 if len(field[0].astext().split()) != 1: 

320 raise BadOptionError( 

321 'extension option field name may not contain multiple words') 

322 name = str(field[0].astext().lower()) 

323 body = field[1] 

324 if len(body) == 0: 

325 data = None 

326 elif (len(body) > 1 

327 or not isinstance(body[0], nodes.paragraph) 

328 or len(body[0]) != 1 

329 or not isinstance(body[0][0], nodes.Text)): 

330 raise BadOptionDataError( 

331 'extension option field body may contain\n' 

332 'a single paragraph only (option "%s")' % name) 

333 else: 

334 data = body[0][0].astext() 

335 option_list.append((name, data)) 

336 return option_list 

337 

338 

339def assemble_option_dict(option_list: list[tuple[str, str|None]], 

340 options_spec: dict[str, Callable[object], Any], 

341 ) -> dict[str, Any]: 

342 """ 

343 Return a mapping of option names to values. 

344 

345 :Parameters: 

346 - `option_list`: A list of (name, value) pairs (the output of 

347 `extract_options()`). 

348 - `options_spec`: Dictionary mapping known option names to a 

349 conversion function such as `int` or `float`. 

350 

351 :Exceptions: 

352 - `KeyError` for unknown option names. 

353 - `DuplicateOptionError` for duplicate options. 

354 - `ValueError` for invalid option values (raised by conversion 

355 function). 

356 - `TypeError` for invalid option value types (raised by conversion 

357 function). 

358 """ 

359 options = {} 

360 for name, value in option_list: 

361 convertor = options_spec[name] # raises KeyError if unknown 

362 if convertor is None: 

363 raise KeyError(name) # or if explicitly disabled 

364 if name in options: 

365 raise DuplicateOptionError('duplicate option "%s"' % name) 

366 try: 

367 options[name] = convertor(value) 

368 except (ValueError, TypeError) as detail: 

369 raise detail.__class__('(option: "%s"; value: %r)\n%s' 

370 % (name, value, ' '.join(detail.args))) 

371 return options 

372 

373 

374class NameValueError(DataError): pass 

375 

376 

377def decode_path(path: str|bytes|None) -> str: 

378 """ 

379 Ensure `path` is Unicode. Return `str` instance. 

380 

381 Decode file/path string in a failsafe manner if not already done. 

382 

383 Deprecated. 

384 """ 

385 # TODO: is this still required with Python 3? 

386 if isinstance(path, str): 

387 return path 

388 if path is None: 

389 return '' 

390 try: 

391 path = path.decode(sys.getfilesystemencoding(), 'strict') 

392 except AttributeError: 

393 raise ValueError('`path` value must be a String or ``None``, ' 

394 f'not {path!r}') 

395 except UnicodeDecodeError: 

396 try: 

397 path = path.decode('utf-8', 'strict') 

398 except UnicodeDecodeError: 

399 path = path.decode('ascii', 'replace') 

400 return path 

401 

402 

403def extract_name_value(line): 

404 """ 

405 Return a list of (name, value) from a line of the form "name=value ...". 

406 

407 :Exception: 

408 `NameValueError` for invalid input (missing name, missing data, bad 

409 quotes, etc.). 

410 """ 

411 attlist = [] 

412 while line: 

413 equals = line.find('=') 

414 if equals == -1: 

415 raise NameValueError('missing "="') 

416 attname = line[:equals].strip() 

417 if equals == 0 or not attname: 

418 raise NameValueError( 

419 'missing attribute name before "="') 

420 line = line[equals+1:].lstrip() 

421 if not line: 

422 raise NameValueError( 

423 'missing value after "%s="' % attname) 

424 if line[0] in '\'"': 

425 endquote = line.find(line[0], 1) 

426 if endquote == -1: 

427 raise NameValueError( 

428 'attribute "%s" missing end quote (%s)' 

429 % (attname, line[0])) 

430 if len(line) > endquote + 1 and line[endquote + 1].strip(): 

431 raise NameValueError( 

432 'attribute "%s" end quote (%s) not followed by ' 

433 'whitespace' % (attname, line[0])) 

434 data = line[1:endquote] 

435 line = line[endquote+1:].lstrip() 

436 else: 

437 space = line.find(' ') 

438 if space == -1: 

439 data = line 

440 line = '' 

441 else: 

442 data = line[:space] 

443 line = line[space+1:].lstrip() 

444 attlist.append((attname.lower(), data)) 

445 return attlist 

446 

447 

448def new_reporter(source_path: StrPath, settings: Values) -> Reporter: 

449 """ 

450 Return a new Reporter object. 

451 

452 :Parameters: 

453 `source` : string 

454 The path to or description of the source text of the document. 

455 `settings` : optparse.Values object 

456 Runtime settings. 

457 """ 

458 reporter = Reporter( 

459 source_path, settings.report_level, settings.halt_level, 

460 stream=settings.warning_stream, debug=settings.debug, 

461 encoding=settings.error_encoding, 

462 error_handler=settings.error_encoding_error_handler) 

463 return reporter 

464 

465 

466def new_document(source_path: StrPath, settings: Values|None = None 

467 ) -> nodes.document: 

468 """ 

469 Return a new empty document object. 

470 

471 :Parameters: 

472 `source_path` : string 

473 The path to or description of the source text of the document. 

474 `settings` : optparse.Values object 

475 Runtime settings. If none are provided, a default core set will 

476 be used. If you will use the document object with any Docutils 

477 components, you must provide their default settings as well. 

478 

479 For example, if parsing rST, at least provide the rst-parser 

480 settings, obtainable as follows: 

481 

482 Defaults for parser component:: 

483 

484 settings = docutils.frontend.get_default_settings( 

485 docutils.parsers.rst.Parser) 

486 

487 Defaults and configuration file customizations:: 

488 

489 settings = docutils.core.Publisher( 

490 parser=docutils.parsers.rst.Parser).get_settings() 

491 

492 """ 

493 # Import at top of module would lead to circular dependency! 

494 from docutils import frontend 

495 if settings is None: 

496 settings = frontend.get_default_settings() 

497 source_path = decode_path(source_path) 

498 reporter = new_reporter(source_path, settings) 

499 document = nodes.document(settings, reporter, source=source_path) 

500 document.note_source(source_path, -1) 

501 return document 

502 

503 

504def clean_rcs_keywords( 

505 paragraph: nodes.paragraph, 

506 keyword_substitutions: Sequence[tuple[re.Pattern[[str], str]]], 

507 ) -> None: 

508 if len(paragraph) == 1 and isinstance(paragraph[0], nodes.Text): 

509 textnode = paragraph[0] 

510 for pattern, substitution in keyword_substitutions: 

511 match = pattern.search(textnode) 

512 if match: 

513 paragraph[0] = nodes.Text(pattern.sub(substitution, textnode)) 

514 return 

515 

516 

517def relative_path(source: StrPath|None, target: StrPath) -> str: 

518 """ 

519 Build and return a path to `target`, relative to `source` (both files). 

520 

521 The return value is a `str` suitable to be included in `source` 

522 as a reference to `target`. 

523 

524 :Parameters: 

525 `source` : path-like object or None 

526 Path of a file in the start directory for the relative path 

527 (the file does not need to exist). 

528 The value ``None`` is replaced with "<cwd>/dummy_file". 

529 `target` : path-like object 

530 End point of the returned relative path. 

531 

532 Differences to `os.path.relpath()`: 

533 

534 * Inverse argument order. 

535 * `source` is assumed to be a FILE in the start directory (add a "dummy" 

536 file name to obtain the path relative from a directory) 

537 while `os.path.relpath()` expects a DIRECTORY as `start` argument. 

538 * Always use Posix path separator ("/") for the output. 

539 * Use `os.sep` for parsing the input 

540 (changing the value of `os.sep` is ignored by `os.relpath()`). 

541 * If there is no common prefix, return the absolute path to `target`. 

542 

543 Differences to `pathlib.PurePath.relative_to(other)`: 

544 

545 * pathlib offers an object oriented interface. 

546 * `source` expects path to a FILE while `other` expects a DIRECTORY. 

547 * `target` defaults to the cwd, no default value for `other`. 

548 * `relative_path()` always returns a path (relative or absolute), 

549 while `PurePath.relative_to()` raises a ValueError 

550 if `target` is not a subpath of `other` (no ".." inserted). 

551 """ 

552 source_parts = os.path.abspath(source or type(target)('dummy_file') 

553 ).split(os.sep) 

554 target_parts = os.path.abspath(target).split(os.sep) 

555 # Check first 2 parts because '/dir'.split('/') == ['', 'dir']: 

556 if source_parts[:2] != target_parts[:2]: 

557 # Nothing in common between paths. 

558 # Return absolute path, using '/' for URLs: 

559 return '/'.join(target_parts) 

560 source_parts.reverse() 

561 target_parts.reverse() 

562 while (source_parts and target_parts 

563 and source_parts[-1] == target_parts[-1]): 

564 # Remove path components in common: 

565 source_parts.pop() 

566 target_parts.pop() 

567 target_parts.reverse() 

568 parts = ['..'] * (len(source_parts) - 1) + target_parts 

569 return '/'.join(parts) 

570 

571 

572def get_stylesheet_reference(settings: Values, 

573 relative_to: StrPath|None = None 

574 ) -> str: 

575 """ 

576 Retrieve a stylesheet reference from the settings object. 

577 

578 Deprecated. Use get_stylesheet_list() instead to 

579 enable specification of multiple stylesheets as a comma-separated 

580 list. 

581 """ 

582 warnings.warn('utils.get_stylesheet_reference()' 

583 ' is obsoleted by utils.get_stylesheet_list()' 

584 ' and will be removed in Docutils 2.0.', 

585 DeprecationWarning, stacklevel=2) 

586 if settings.stylesheet_path: 

587 assert not settings.stylesheet, ( 

588 'stylesheet and stylesheet_path are mutually exclusive.') 

589 if relative_to is None: 

590 relative_to = settings._destination 

591 return relative_path(relative_to, settings.stylesheet_path) 

592 else: 

593 return settings.stylesheet 

594 

595 

596# Return 'stylesheet' or 'stylesheet_path' arguments as list. 

597# 

598# The original settings arguments are kept unchanged: you can test 

599# with e.g. ``if settings.stylesheet_path: ...``. 

600# 

601# Differences to the depracated `get_stylesheet_reference()`: 

602# * return value is a list 

603# * no re-writing of the path (and therefore no optional argument) 

604# (if required, use ``utils.relative_path(source, target)`` 

605# in the calling script) 

606def get_stylesheet_list(settings: Values) -> list[str]: 

607 """Retrieve list of stylesheet references from the settings object.""" 

608 assert not (settings.stylesheet and settings.stylesheet_path), ( 

609 'stylesheet and stylesheet_path are mutually exclusive.') 

610 stylesheets = settings.stylesheet_path or settings.stylesheet or [] 

611 # programmatically set default may be string with comma separated list: 

612 if not isinstance(stylesheets, list): 

613 stylesheets = [path.strip() for path in stylesheets.split(',')] 

614 if settings.stylesheet_path: 

615 # expand relative paths if found in stylesheet-dirs: 

616 stylesheets = [find_file_in_dirs(path, settings.stylesheet_dirs) 

617 for path in stylesheets] 

618 return stylesheets 

619 

620 

621def find_file_in_dirs(path: StrPath, dirs: Iterable[StrPath]) -> str: 

622 """ 

623 Search for `path` in the list of directories `dirs`. 

624 

625 Return the first expansion that matches an existing file. 

626 """ 

627 path = Path(path) 

628 if path.is_absolute(): 

629 return path.as_posix() 

630 for d in dirs: 

631 f = Path(d).expanduser() / path 

632 if f.exists(): 

633 return f.as_posix() 

634 return path.as_posix() 

635 

636 

637def get_trim_footnote_ref_space(settings: Values) -> bool: 

638 """ 

639 Return whether or not to trim footnote space. 

640 

641 If trim_footnote_reference_space is not None, return it. 

642 

643 If trim_footnote_reference_space is None, return False unless the 

644 footnote reference style is 'superscript'. 

645 """ 

646 if settings.setdefault('trim_footnote_reference_space', None) is None: 

647 return getattr(settings, 'footnote_references', None) == 'superscript' 

648 else: 

649 return settings.trim_footnote_reference_space 

650 

651 

652def get_source_line(node: Node) -> tuple[StrPath|None, int|None]: 

653 """ 

654 Return the "source" and "line" attributes from the `node` given or from 

655 its closest ancestor. 

656 """ 

657 while node: 

658 if node.source or node.line: 

659 return node.source, node.line 

660 node = node.parent 

661 return None, None 

662 

663 

664def escape2null(text: str) -> str: 

665 """Return a string with escape-backslashes converted to nulls.""" 

666 parts = [] 

667 start = 0 

668 while True: 

669 found = text.find('\\', start) 

670 if found == -1: 

671 parts.append(text[start:]) 

672 return ''.join(parts) 

673 parts.extend((text[start:found], 

674 '\x00' + text[found + 1:found + 2])) 

675 start = found + 2 # skip character after escape 

676 

677 

678def split_escaped_whitespace(text: str) -> list[str]: 

679 """ 

680 Split `text` on escaped whitespace (null+space or null+newline). 

681 Return a list of strings. 

682 """ 

683 strings = text.split('\x00 ') 

684 strings = [string.split('\x00\n') for string in strings] 

685 # flatten list of lists of strings to list of strings: 

686 return list(itertools.chain(*strings)) 

687 

688 

689def strip_combining_chars(text: str) -> str: 

690 return ''.join(c for c in text if not unicodedata.combining(c)) 

691 

692 

693def find_combining_chars(text: str) -> list[int]: 

694 """Return indices of all combining chars in Unicode string `text`. 

695 

696 >>> from docutils.utils import find_combining_chars 

697 >>> find_combining_chars('A t̆ab̆lĕ') 

698 [3, 6, 9] 

699 

700 """ 

701 return [i for i, c in enumerate(text) if unicodedata.combining(c)] 

702 

703 

704def column_indices(text: str) -> list[int]: 

705 """Indices of Unicode string `text` when skipping combining characters. 

706 

707 >>> from docutils.utils import column_indices 

708 >>> column_indices('A t̆ab̆lĕ') 

709 [0, 1, 2, 4, 5, 7, 8] 

710 

711 """ 

712 # TODO: account for asian wide chars here instead of using dummy 

713 # replacements in the tableparser? 

714 string_indices = list(range(len(text))) 

715 for index in find_combining_chars(text): 

716 string_indices[index] = None 

717 return [i for i in string_indices if i is not None] 

718 

719 

720east_asian_widths = {'W': 2, # Wide 

721 'F': 2, # Full-width (wide) 

722 'Na': 1, # Narrow 

723 'H': 1, # Half-width (narrow) 

724 'N': 1, # Neutral (not East Asian, treated as narrow) 

725 'A': 1, # Ambiguous (s/b wide in East Asian context, 

726 } # narrow otherwise, but that doesn't work) 

727"""Mapping of result codes from `unicodedata.east_asian_widt()` to character 

728column widths.""" 

729 

730 

731def column_width(text: str) -> int: 

732 """Return the column width of text. 

733 

734 Correct ``len(text)`` for wide East Asian and combining Unicode chars. 

735 """ 

736 width = sum(east_asian_widths[unicodedata.east_asian_width(c)] 

737 for c in text) 

738 # correction for combining chars: 

739 width -= len(find_combining_chars(text)) 

740 return width 

741 

742 

743def uniq(L: list) -> list: 

744 r = [] 

745 for item in L: 

746 if item not in r: 

747 r.append(item) 

748 return r 

749 

750 

751def normalize_language_tag(tag: str) -> list[str]: 

752 """Return a list of normalized combinations for a `BCP 47` language tag. 

753 

754 Example: 

755 

756 >>> from docutils.utils import normalize_language_tag 

757 >>> normalize_language_tag('de_AT-1901') 

758 ['de-at-1901', 'de-at', 'de-1901', 'de'] 

759 >>> normalize_language_tag('de-CH-x_altquot') 

760 ['de-ch-x-altquot', 'de-ch', 'de-x-altquot', 'de'] 

761 

762 """ 

763 # normalize: 

764 tag = tag.lower().replace('-', '_') 

765 # split (except singletons, which mark the following tag as non-standard): 

766 tag = re.sub(r'_([a-zA-Z0-9])_', r'_\1-', tag) 

767 subtags = list(tag.split('_')) 

768 base_tag = (subtags.pop(0),) 

769 # find all combinations of subtags 

770 taglist = ['-'.join(base_tag + tags) 

771 for n in range(len(subtags), 0, -1) 

772 for tags in itertools.combinations(subtags, n) 

773 ] 

774 taglist += base_tag 

775 return taglist 

776 

777 

778def xml_declaration(encoding: str|Literal['unicode']|None = None) -> str: 

779 """Return an XML text declaration. 

780 

781 Include an encoding declaration, if `encoding` 

782 is not 'unicode', '', or None. 

783 """ 

784 if encoding and encoding.lower() != 'unicode': 

785 encoding_declaration = f' encoding="{encoding}"' 

786 else: 

787 encoding_declaration = '' 

788 return f'<?xml version="1.0"{encoding_declaration}?>\n' 

789 

790 

791class DependencyList: 

792 

793 """ 

794 List of dependencies, with file recording support. 

795 

796 Note that the output file is not automatically closed. You have 

797 to explicitly call the close() method. 

798 """ 

799 

800 def __init__(self, 

801 output_file: Literal['-'] | StrPath | None = None, 

802 dependencies: Iterable[StrPath] = () 

803 ) -> None: 

804 """ 

805 Initialize the dependency list, automatically setting the 

806 output file to `output_file` (see `set_output()`) and adding 

807 all supplied dependencies. 

808 

809 If output_file is None, no file output is done when calling add(). 

810 """ 

811 self.set_output(output_file) 

812 self.add(*dependencies) 

813 

814 def set_output(self, output_file: Literal['-']|StrPath|None) -> None: 

815 """ 

816 Set the output file and clear the list of already added 

817 dependencies. 

818 

819 The specified file is immediately overwritten. 

820 

821 If `output_file` is '-', the output will be written to stdout. 

822 The empty string or None stop output. 

823 """ 

824 if output_file == '-': 

825 self.file = sys.stdout 

826 elif output_file: 

827 self.file = open(output_file, 'w', encoding='utf-8') 

828 else: 

829 self.file = None 

830 self.list = [] 

831 

832 def add(self, *paths: StrPath) -> None: 

833 """ 

834 Append `path` to `self.list` unless it is already there. 

835 

836 Also append to `self.file` unless it is already there 

837 or `self.file is `None`. 

838 """ 

839 for path in paths: 

840 if isinstance(path, PurePath): 

841 path = path.as_posix() # use '/' as separator 

842 if path not in self.list: 

843 self.list.append(path) 

844 if self.file is not None: 

845 self.file.write(path+'\n') 

846 

847 def close(self) -> None: 

848 """ 

849 Close the output file. 

850 """ 

851 if self.file is not sys.stdout: 

852 self.file.close() 

853 self.file = None 

854 

855 def __repr__(self) -> str: 

856 try: 

857 output_file = self.file.name 

858 except AttributeError: 

859 output_file = None 

860 return '%s(%r, %s)' % (self.__class__.__name__, output_file, self.list)