Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/docutils/utils/__init__.py: 55%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

329 statements  

1# $Id$ 

2# Author: David Goodger <goodger@python.org> 

3# Copyright: This module has been placed in the public domain. 

4 

5""" 

6Miscellaneous utilities for the documentation utilities. 

7""" 

8 

9from __future__ import annotations 

10 

11__docformat__ = 'reStructuredText' 

12 

13import itertools 

14import os 

15import os.path 

16import re 

17import sys 

18import unicodedata 

19import warnings 

20from pathlib import PurePath, Path 

21 

22from docutils import ApplicationError, DataError 

23from docutils import io, nodes 

24# for backwards compatibility 

25from docutils.nodes import unescape # noqa: F401 (imported but unused) 

26 

27TYPE_CHECKING = False 

28if TYPE_CHECKING: 

29 from collections.abc import Callable, Sequence, Iterable 

30 from typing import Any, Final, Literal, TextIO 

31 

32 from docutils.utils._typing import TypeAlias 

33 

34 from docutils.nodes import StrPath 

35 from docutils.frontend import Values 

36 

37 _ObserverFunc: TypeAlias = Callable[[nodes.system_message], None] 

38 

39 

40class SystemMessage(ApplicationError): 

41 

42 def __init__(self, system_message: nodes.system_message, level: int, 

43 ) -> None: 

44 Exception.__init__(self, system_message.astext()) 

45 self.level = level 

46 

47 

48class SystemMessagePropagation(ApplicationError): 

49 pass 

50 

51 

52class Reporter: 

53 

54 """ 

55 Info/warning/error reporter and ``system_message`` element generator. 

56 

57 Five levels of system messages are defined, along with corresponding 

58 methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`. 

59 

60 There is typically one Reporter object per process. A Reporter object is 

61 instantiated with thresholds for reporting (generating warnings) and 

62 halting processing (raising exceptions), a switch to turn debug output on 

63 or off, and an I/O stream for warnings. These are stored as instance 

64 attributes. 

65 

66 When a system message is generated, its level is compared to the stored 

67 thresholds, and a warning or error is generated as appropriate. Debug 

68 messages are produced if the stored debug switch is on, independently of 

69 other thresholds. Message output is sent to the stored warning stream if 

70 not set to ''. 

71 

72 The Reporter class also employs a modified form of the "Observer" pattern 

73 [GoF95]_ to track system messages generated. The `attach_observer` method 

74 should be called before parsing, with a bound method or function which 

75 accepts system messages. The observer can be removed with 

76 `detach_observer`, and another added in its place. 

77 

78 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of 

79 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA, 

80 1995. 

81 """ 

82 

83 # Reporter.get_source_and_line is patched in by ``RSTState.runtime_init`` 

84 get_source_and_line: Callable[[int|None], tuple[StrPath|None, int|None]] 

85 

86 levels: Final[Sequence[str]] = ( 

87 'DEBUG', 'INFO', 'WARNING', 'ERROR', 'SEVERE') 

88 """List of names for system message levels, indexed by level.""" 

89 

90 # system message level constants: 

91 DEBUG_LEVEL: Final = 0 

92 INFO_LEVEL: Final = 1 

93 WARNING_LEVEL: Final = 2 

94 ERROR_LEVEL: Final = 3 

95 SEVERE_LEVEL: Final = 4 

96 

97 def __init__( 

98 self, 

99 source: StrPath, 

100 report_level: int, 

101 halt_level: int, 

102 stream: io.ErrorOutput|TextIO|str|Literal[False]|None = None, 

103 debug: bool = False, 

104 encoding: str|None = None, 

105 error_handler: str = 'backslashreplace', 

106 ) -> None: 

107 """Low level instantiating. See also `new_reporter().`. 

108 

109 :Parameters: 

110 - `source`: The path to or description of the source data. 

111 - `report_level`: The level at or above which warning output will 

112 be sent to `stream`. 

113 - `halt_level`: The level at or above which `SystemMessage` 

114 exceptions will be raised, halting execution. 

115 - `debug`: Show debug (level=0) system messages? 

116 - `stream`: Where warning output is sent. Can be file-like (has a 

117 ``.write`` method), a string (file name, opened for writing), 

118 '' (empty string) or `False` (for discarding all stream messages) 

119 or `None` (implies `sys.stderr`; default). 

120 - `encoding`: The output encoding. 

121 - `error_handler`: The error handler for stderr output encoding. 

122 """ 

123 

124 self.source = source 

125 """The path to or description of the source data.""" 

126 

127 self.error_handler = error_handler 

128 """The character encoding error handler.""" 

129 

130 self.debug_flag = debug 

131 """Show debug (level=0) system messages?""" 

132 

133 self.report_level = report_level 

134 """The level at or above which warning output will be sent 

135 to `self.stream`.""" 

136 

137 self.halt_level = halt_level 

138 """The level at or above which `SystemMessage` exceptions 

139 will be raised, halting execution.""" 

140 

141 if not isinstance(stream, io.ErrorOutput): 

142 stream = io.ErrorOutput(stream, encoding, error_handler) 

143 

144 self.stream: io.ErrorOutput = stream 

145 """Where warning output is sent.""" 

146 

147 self.encoding: str = encoding or getattr(stream, 'encoding', 'ascii') 

148 """The output character encoding.""" 

149 

150 self.observers: list[_ObserverFunc] = [] 

151 """List of bound methods or functions to call with each system_message 

152 created.""" 

153 

154 self.max_level: int = -1 

155 """The highest level system message generated so far.""" 

156 

157 def attach_observer(self, observer: _ObserverFunc) -> None: 

158 """ 

159 The `observer` parameter is a function or bound method which takes one 

160 argument, a `nodes.system_message` instance. 

161 """ 

162 self.observers.append(observer) 

163 

164 def detach_observer(self, observer: _ObserverFunc) -> None: 

165 self.observers.remove(observer) 

166 

167 def notify_observers(self, message: nodes.system_message) -> None: 

168 for observer in self.observers: 

169 observer(message) 

170 

171 def system_message(self, 

172 level: int, 

173 message: str, 

174 *children, 

175 **kwargs: Any 

176 ) -> nodes.system_message: 

177 """ 

178 Return a system_message object. 

179 

180 Raise an exception or generate a warning if appropriate. 

181 """ 

182 # `message` can be a `str` or `Exception` instance. 

183 if isinstance(message, Exception): 

184 message = str(message) 

185 

186 attributes = kwargs.copy() 

187 if 'base_node' in kwargs: 

188 source, line = get_source_line(kwargs['base_node']) 

189 del attributes['base_node'] 

190 if source is not None: 

191 attributes.setdefault('source', source) 

192 if line is not None: 

193 attributes.setdefault('line', line) 

194 # assert source is not None, "line- but no source-argument" 

195 if 'source' not in attributes: 

196 # 'line' is absolute line number 

197 try: 

198 source, line = self.get_source_and_line(attributes.get('line')) 

199 except AttributeError: 

200 source, line = None, None 

201 if source is not None: 

202 attributes['source'] = source 

203 if line is not None: 

204 attributes['line'] = line 

205 # assert attributes['line'] is not None, (message, kwargs) 

206 # assert attributes['source'] is not None, (message, kwargs) 

207 attributes.setdefault('source', self.source) 

208 

209 msg = nodes.system_message(message, level=level, 

210 type=self.levels[level], 

211 *children, **attributes) 

212 if self.stream and (level >= self.report_level 

213 or self.debug_flag and level == self.DEBUG_LEVEL 

214 or level >= self.halt_level): 

215 self.stream.write(msg.astext() + '\n') 

216 if level >= self.halt_level: 

217 raise SystemMessage(msg, level) 

218 if level > self.DEBUG_LEVEL or self.debug_flag: 

219 self.notify_observers(msg) 

220 self.max_level = max(level, self.max_level) 

221 return msg 

222 

223 def debug(self, *args, **kwargs: Any) -> nodes.system_message: 

224 """ 

225 Level-0, "DEBUG": an internal reporting issue. 

226 

227 Typically, there is no effect on the processing. Level-0 system 

228 messages are handled separately from the others. 

229 """ 

230 if self.debug_flag: 

231 return self.system_message(self.DEBUG_LEVEL, *args, **kwargs) 

232 

233 def info(self, *args, **kwargs: Any) -> nodes.system_message: 

234 """ 

235 Level-1, "INFO": a minor issue that can be ignored. 

236 

237 Typically, there is no effect on processing and level-1 system 

238 messages are not reported. 

239 """ 

240 return self.system_message(self.INFO_LEVEL, *args, **kwargs) 

241 

242 def warning(self, *args, **kwargs: Any) -> nodes.system_message: 

243 """ 

244 Level-2, "WARNING": an issue that should be addressed. 

245 

246 If ignored, there may be unpredictable problems with the output. 

247 """ 

248 return self.system_message(self.WARNING_LEVEL, *args, **kwargs) 

249 

250 def error(self, *args, **kwargs: Any) -> nodes.system_message: 

251 """ 

252 Level-3, "ERROR": an error that should be addressed. 

253 

254 If ignored, the output will contain errors. 

255 """ 

256 return self.system_message(self.ERROR_LEVEL, *args, **kwargs) 

257 

258 def severe(self, *args, **kwargs: Any) -> nodes.system_message: 

259 """ 

260 Level-4, "SEVERE": a severe error that must be addressed. 

261 

262 If ignored, the output will contain severe errors. Typically level-4 

263 system messages are turned into exceptions which halt processing. 

264 """ 

265 return self.system_message(self.SEVERE_LEVEL, *args, **kwargs) 

266 

267 

268class ExtensionOptionError(DataError): pass # NoQA: E701 

269class BadOptionError(ExtensionOptionError): pass # NoQA: E701 

270class BadOptionDataError(ExtensionOptionError): pass # NoQA: E701 

271class DuplicateOptionError(ExtensionOptionError): pass # NoQA: E701 

272 

273 

274def extract_extension_options(field_list: nodes.field_list, 

275 options_spec: dict[str, Callable[object], Any], 

276 ) -> dict[str, Any]: 

277 """ 

278 Return a dictionary mapping extension option names to converted values. 

279 

280 :Parameters: 

281 - `field_list`: A flat field list without field arguments, where each 

282 field body consists of a single paragraph only. 

283 - `options_spec`: Dictionary mapping known option names to a 

284 conversion function such as `int` or `float`. 

285 

286 :Exceptions: 

287 - `KeyError` for unknown option names. 

288 - `ValueError` for invalid option values (raised by the conversion 

289 function). 

290 - `TypeError` for invalid option value types (raised by conversion 

291 function). 

292 - `DuplicateOptionError` for duplicate options. 

293 - `BadOptionError` for invalid fields. 

294 - `BadOptionDataError` for invalid option data (missing name, 

295 missing data, bad quotes, etc.). 

296 """ 

297 option_list = extract_options(field_list) 

298 return assemble_option_dict(option_list, options_spec) 

299 

300 

301def extract_options(field_list: nodes.field_list 

302 ) -> list[tuple[str, str|None]]: 

303 """ 

304 Return a list of option (name, value) pairs from field names & bodies. 

305 

306 :Parameter: 

307 `field_list`: A flat field list, where each field name is a single 

308 word and each field body consists of a single paragraph only. 

309 

310 :Exceptions: 

311 - `BadOptionError` for invalid fields. 

312 - `BadOptionDataError` for invalid option data (missing name, 

313 missing data, bad quotes, etc.). 

314 """ 

315 option_list = [] 

316 for field in field_list: 

317 if len(field[0].astext().split()) != 1: 

318 raise BadOptionError( 

319 'extension option field name may not contain multiple words') 

320 name = str(field[0].astext().lower()) 

321 body = field[1] 

322 if len(body) == 0: 

323 data = None 

324 elif (len(body) > 1 

325 or not isinstance(body[0], nodes.paragraph) 

326 or len(body[0]) != 1 

327 or not isinstance(body[0][0], nodes.Text)): 

328 raise BadOptionDataError( 

329 'extension option field body may contain\n' 

330 'a single paragraph only (option "%s")' % name) 

331 else: 

332 data = body[0][0].astext() 

333 option_list.append((name, data)) 

334 return option_list 

335 

336 

337def assemble_option_dict(option_list: list[tuple[str, str|None]], 

338 options_spec: dict[str, Callable[object], Any], 

339 ) -> dict[str, Any]: 

340 """ 

341 Return a mapping of option names to values. 

342 

343 :Parameters: 

344 - `option_list`: A list of (name, value) pairs (the output of 

345 `extract_options()`). 

346 - `options_spec`: Dictionary mapping known option names to a 

347 conversion function such as `int` or `float`. 

348 

349 :Exceptions: 

350 - `KeyError` for unknown option names. 

351 - `DuplicateOptionError` for duplicate options. 

352 - `ValueError` for invalid option values (raised by conversion 

353 function). 

354 - `TypeError` for invalid option value types (raised by conversion 

355 function). 

356 """ 

357 options = {} 

358 for name, value in option_list: 

359 convertor = options_spec[name] # raises KeyError if unknown 

360 if convertor is None: 

361 raise KeyError(name) # or if explicitly disabled 

362 if name in options: 

363 raise DuplicateOptionError('duplicate option "%s"' % name) 

364 try: 

365 options[name] = convertor(value) 

366 except (ValueError, TypeError) as detail: 

367 raise detail.__class__('(option: "%s"; value: %r)\n%s' 

368 % (name, value, ' '.join(detail.args))) 

369 return options 

370 

371 

372class NameValueError(DataError): pass 

373 

374 

375def decode_path(path: str|bytes|None) -> str: 

376 """ 

377 Ensure `path` is Unicode. Return `str` instance. 

378 

379 Decode file/path string in a failsafe manner if not already done. 

380 

381 Deprecated. Will be removed in Docutils 1.0. 

382 """ 

383 if isinstance(path, str): 

384 return path 

385 if path is None: 

386 return '' 

387 try: 

388 path = path.decode(sys.getfilesystemencoding(), 'strict') 

389 except AttributeError: 

390 raise ValueError('`path` value must be a String or ``None``, ' 

391 f'not {path!r}') 

392 except UnicodeDecodeError: 

393 try: 

394 path = path.decode('utf-8', 'strict') 

395 except UnicodeDecodeError: 

396 path = path.decode('ascii', 'replace') 

397 return path 

398 

399 

400def extract_name_value(line): 

401 """ 

402 Return a list of (name, value) from a line of the form "name=value ...". 

403 

404 :Exception: 

405 `NameValueError` for invalid input (missing name, missing data, bad 

406 quotes, etc.). 

407 """ 

408 attlist = [] 

409 while line: 

410 equals_index = line.find('=') 

411 if equals_index == -1: 

412 raise NameValueError('missing "="') 

413 attname = line[:equals_index].strip() 

414 if equals_index == 0 or not attname: 

415 raise NameValueError('missing attribute name before "="') 

416 line = line[equals_index+1:].lstrip() 

417 if not line: 

418 raise NameValueError(f'missing value after "{attname}="') 

419 if line[0] in '\'"': 

420 endquote_index = line.find(line[0], 1) 

421 if endquote_index == -1: 

422 raise NameValueError( 

423 f'attribute "{attname}" missing end quote ({line[0]})') 

424 if (len(line) > endquote_index + 1 

425 and line[endquote_index + 1].strip()): 

426 raise NameValueError(f'attribute "{attname}" end quote ' 

427 f'({line[0]}) not followed by whitespace') 

428 data = line[1:endquote_index] 

429 line = line[endquote_index+1:].lstrip() 

430 else: 

431 space_index = line.find(' ') 

432 if space_index == -1: 

433 data = line 

434 line = '' 

435 else: 

436 data = line[:space_index] 

437 line = line[space_index+1:].lstrip() 

438 attlist.append((attname.lower(), data)) 

439 return attlist 

440 

441 

442def new_reporter(source_path: StrPath, settings: Values) -> Reporter: 

443 """ 

444 Return a new Reporter object. 

445 

446 :Parameters: 

447 `source` : string 

448 The path to or description of the source text of the document. 

449 `settings` : optparse.Values object 

450 Runtime settings. 

451 """ 

452 reporter = Reporter( 

453 source_path, settings.report_level, settings.halt_level, 

454 stream=settings.warning_stream, debug=settings.debug, 

455 encoding=settings.error_encoding, 

456 error_handler=settings.error_encoding_error_handler) 

457 return reporter 

458 

459 

460def new_document(source_path: StrPath, settings: Values|None = None 

461 ) -> nodes.document: 

462 """ 

463 Return a new empty document object. 

464 

465 :Parameters: 

466 `source_path` : str or pathlib.Path 

467 The path to or description of the source text of the document. 

468 `settings` : optparse.Values object 

469 Runtime settings. If none are provided, a default core set will 

470 be used. If you will use the document object with any Docutils 

471 components, you must provide their default settings as well. 

472 

473 For example, if parsing rST, at least provide the rst-parser 

474 settings, obtainable as follows: 

475 

476 Defaults for parser component:: 

477 

478 settings = docutils.frontend.get_default_settings( 

479 docutils.parsers.rst.Parser) 

480 

481 Defaults and configuration file customizations:: 

482 

483 settings = docutils.core.Publisher( 

484 parser=docutils.parsers.rst.Parser).get_settings() 

485 

486 """ 

487 # Import at top of module would lead to circular dependency! 

488 from docutils import frontend 

489 if settings is None: 

490 settings = frontend.get_default_settings() 

491 reporter = new_reporter(source_path, settings) 

492 document = nodes.document(settings, reporter, source=source_path) 

493 document.note_source(source_path, -1) 

494 return document 

495 

496 

497def clean_rcs_keywords( 

498 paragraph: nodes.paragraph, 

499 keyword_substitutions: Sequence[tuple[re.Pattern[[str], str]]], 

500) -> None: 

501 if len(paragraph) == 1 and isinstance(paragraph[0], nodes.Text): 

502 textnode = paragraph[0] 

503 for pattern, substitution in keyword_substitutions: 

504 match = pattern.search(textnode) 

505 if match: 

506 paragraph[0] = nodes.Text(pattern.sub(substitution, textnode)) 

507 return 

508 

509 

510def relative_path(source: StrPath|None, target: StrPath) -> str: 

511 """ 

512 Build and return a path to `target`, relative to `source` (both files). 

513 

514 The return value is a `str` suitable to be included in `source` 

515 as a reference to `target`. 

516 

517 :Parameters: 

518 `source` : path-like object or None 

519 Path of a file in the start directory for the relative path 

520 (the file does not need to exist). 

521 The value ``None`` is replaced with "<cwd>/dummy_file". 

522 `target` : path-like object 

523 End point of the returned relative path. 

524 

525 Differences to `os.path.relpath()`: 

526 

527 * Inverse argument order. 

528 * `source` is assumed to be a FILE in the start directory (add a "dummy" 

529 file name to obtain the path relative from a directory) 

530 while `os.path.relpath()` expects a DIRECTORY as `start` argument. 

531 * Always use Posix path separator ("/") for the output. 

532 * Use `os.sep` for parsing the input 

533 (changing the value of `os.sep` is ignored by `os.relpath()`). 

534 * If there is no common prefix, return the absolute path to `target`. 

535 

536 Differences to `pathlib.PurePath.relative_to(other)`: 

537 

538 * pathlib offers an object oriented interface. 

539 * `source` expects path to a FILE while `other` expects a DIRECTORY. 

540 * `target` defaults to the cwd, no default value for `other`. 

541 * `relative_path()` always returns a path (relative or absolute), 

542 while `PurePath.relative_to()` raises a ValueError 

543 if `target` is not a subpath of `other` (no ".." inserted). 

544 """ 

545 source_parts = os.path.abspath(source or type(target)('dummy_file') 

546 ).split(os.sep) 

547 target_parts = os.path.abspath(target).split(os.sep) 

548 # Check first 2 parts because '/dir'.split('/') == ['', 'dir']: 

549 if source_parts[:2] != target_parts[:2]: 

550 # Nothing in common between paths. 

551 # Return absolute path, using '/' for URLs: 

552 return '/'.join(target_parts) 

553 source_parts.reverse() 

554 target_parts.reverse() 

555 while (source_parts and target_parts 

556 and source_parts[-1] == target_parts[-1]): 

557 # Remove path components in common: 

558 source_parts.pop() 

559 target_parts.pop() 

560 target_parts.reverse() 

561 parts = ['..'] * (len(source_parts) - 1) + target_parts 

562 return '/'.join(parts) 

563 

564 

565def get_stylesheet_reference(settings: Values, 

566 relative_to: StrPath|None = None 

567 ) -> str: 

568 """ 

569 Retrieve a stylesheet reference from the settings object. 

570 

571 Deprecated. Will be removed in Docutils 1.0. 

572 Use get_stylesheet_list() instead to enable specification of multiple 

573 stylesheets as a comma-separated list. 

574 """ 

575 warnings.warn('utils.get_stylesheet_reference()' 

576 ' is obsoleted by utils.get_stylesheet_list()' 

577 ' and will be removed in Docutils 2.0.', 

578 DeprecationWarning, stacklevel=2) 

579 if settings.stylesheet_path: 

580 assert not settings.stylesheet, ( 

581 'stylesheet and stylesheet_path are mutually exclusive.') 

582 if relative_to is None: 

583 relative_to = settings.output_path 

584 return relative_path(relative_to, settings.stylesheet_path) 

585 else: 

586 return settings.stylesheet 

587 

588 

589# Return 'stylesheet' or 'stylesheet_path' arguments as list. 

590# 

591# The original settings arguments are kept unchanged: you can test 

592# with e.g. ``if settings.stylesheet_path: ...``. 

593# 

594# Differences to the depracated `get_stylesheet_reference()`: 

595# * return value is a list 

596# * no re-writing of the path (and therefore no optional argument) 

597# (if required, use ``utils.relative_path(source, target)`` 

598# in the calling script) 

599def get_stylesheet_list(settings: Values) -> list[str]: 

600 """Retrieve list of stylesheet references from the settings object.""" 

601 assert not (settings.stylesheet and settings.stylesheet_path), ( 

602 'stylesheet and stylesheet_path are mutually exclusive.') 

603 stylesheets = settings.stylesheet_path or settings.stylesheet or [] 

604 # programmatically set default may be string with comma separated list: 

605 if not isinstance(stylesheets, list): 

606 stylesheets = [path.strip() for path in stylesheets.split(',')] 

607 if settings.stylesheet_path: 

608 # expand relative paths if found in stylesheet-dirs: 

609 stylesheets = [find_file_in_dirs(path, settings.stylesheet_dirs) 

610 for path in stylesheets] 

611 return stylesheets 

612 

613 

614def find_file_in_dirs(path: StrPath, dirs: Iterable[StrPath]) -> str: 

615 """ 

616 Search for `path` in the list of directories `dirs`. 

617 

618 Return the first expansion that matches an existing file. 

619 """ 

620 path = Path(path) 

621 if path.is_absolute(): 

622 return path.as_posix() 

623 for d in dirs: 

624 f = Path(d).expanduser() / path 

625 if f.exists(): 

626 return f.as_posix() 

627 return path.as_posix() 

628 

629 

630def get_trim_footnote_ref_space(settings: Values) -> bool: 

631 """ 

632 Return whether or not to trim footnote space. 

633 

634 If trim_footnote_reference_space is not None, return it. 

635 

636 If trim_footnote_reference_space is None, return False unless the 

637 footnote reference style is 'superscript'. 

638 """ 

639 if settings.setdefault('trim_footnote_reference_space', None) is None: 

640 return getattr(settings, 'footnote_references', None) == 'superscript' 

641 else: 

642 return settings.trim_footnote_reference_space 

643 

644 

645def get_source_line(node) -> tuple[StrPath|None, int|None]: 

646 """ 

647 Return the "source" and "line" attributes from the `node` given or from 

648 its closest ancestor. 

649 """ 

650 while node: 

651 if node.source or node.line: 

652 return node.source, node.line 

653 node = node.parent 

654 return None, None 

655 

656 

657def escape2null(text: str) -> str: 

658 """Return a string with escape-backslashes converted to nulls.""" 

659 parts = [] 

660 start = 0 

661 while True: 

662 bs_index = text.find('\\', start) 

663 if bs_index == -1: 

664 parts.append(text[start:]) 

665 return ''.join(parts) 

666 parts.extend((text[start:bs_index], 

667 '\x00' + text[bs_index + 1:bs_index + 2])) 

668 start = bs_index + 2 # skip character after escape 

669 

670 

671def split_escaped_whitespace(text: str) -> list[str]: 

672 """ 

673 Split `text` on escaped whitespace (null+space or null+newline). 

674 Return a list of strings. 

675 """ 

676 strings = text.split('\x00 ') 

677 strings = [string.split('\x00\n') for string in strings] 

678 # flatten list of lists of strings to list of strings: 

679 return list(itertools.chain(*strings)) 

680 

681 

682def strip_combining_chars(text: str) -> str: 

683 return ''.join(c for c in text if not unicodedata.combining(c)) 

684 

685 

686def find_combining_chars(text: str) -> list[int]: 

687 """Return indices of all combining chars in Unicode string `text`. 

688 

689 >>> from docutils.utils import find_combining_chars 

690 >>> find_combining_chars('A t̆ab̆lĕ') 

691 [3, 6, 9] 

692 

693 """ 

694 return [i for i, c in enumerate(text) if unicodedata.combining(c)] 

695 

696 

697def column_indices(text: str) -> list[int]: 

698 """Indices of Unicode string `text` when skipping combining characters. 

699 

700 >>> from docutils.utils import column_indices 

701 >>> column_indices('A t̆ab̆lĕ') 

702 [0, 1, 2, 4, 5, 7, 8] 

703 

704 """ 

705 # TODO: account for asian wide chars here instead of using dummy 

706 # replacements in the tableparser? 

707 string_indices = list(range(len(text))) 

708 for index in find_combining_chars(text): 

709 string_indices[index] = None 

710 return [i for i in string_indices if i is not None] 

711 

712 

713east_asian_widths = {'W': 2, # Wide 

714 'F': 2, # Full-width (wide) 

715 'Na': 1, # Narrow 

716 'H': 1, # Half-width (narrow) 

717 'N': 1, # Neutral (not East Asian, treated as narrow) 

718 'A': 1, # Ambiguous (s/b wide in East Asian context, 

719 } # narrow otherwise, but that doesn't work) 

720"""Mapping of result codes from `unicodedata.east_asian_widt()` to character 

721column widths.""" 

722 

723 

724def column_width(text: str) -> int: 

725 """Return the column width of text. 

726 

727 Correct ``len(text)`` for wide East Asian and combining Unicode chars. 

728 """ 

729 width = sum(east_asian_widths[unicodedata.east_asian_width(c)] 

730 for c in text) 

731 # correction for combining chars: 

732 width -= len(find_combining_chars(text)) 

733 return width 

734 

735 

736def uniq(L: list) -> list: 

737 r = [] 

738 for item in L: 

739 if item not in r: 

740 r.append(item) 

741 return r 

742 

743 

744def normalize_language_tag(tag: str) -> list[str]: 

745 """Return a list of normalized combinations for a `BCP 47` language tag. 

746 

747 Example: 

748 

749 >>> from docutils.utils import normalize_language_tag 

750 >>> normalize_language_tag('de_AT-1901') 

751 ['de-at-1901', 'de-at', 'de-1901', 'de'] 

752 >>> normalize_language_tag('de-CH-x_altquot') 

753 ['de-ch-x-altquot', 'de-ch', 'de-x-altquot', 'de'] 

754 

755 """ 

756 # normalize: 

757 tag = tag.lower().replace('-', '_') 

758 # split (except singletons, which mark the following tag as non-standard): 

759 tag = re.sub(r'_([a-zA-Z0-9])_', r'_\1-', tag) 

760 subtags = list(tag.split('_')) 

761 base_tag = (subtags.pop(0),) 

762 # find all combinations of subtags 

763 taglist = ['-'.join(base_tag + tags) 

764 for n in range(len(subtags), 0, -1) 

765 for tags in itertools.combinations(subtags, n) 

766 ] 

767 taglist += base_tag 

768 return taglist 

769 

770 

771def xml_declaration(encoding: str|Literal['unicode']|None = None) -> str: 

772 """Return an XML text declaration. 

773 

774 Include an encoding declaration, if `encoding` 

775 is not 'unicode', '', or None. 

776 """ 

777 if encoding and encoding.lower() != 'unicode': 

778 encoding_declaration = f' encoding="{encoding}"' 

779 else: 

780 encoding_declaration = '' 

781 return f'<?xml version="1.0"{encoding_declaration}?>\n' 

782 

783 

784class DependencyList: 

785 

786 """ 

787 List of dependencies, with file recording support. 

788 

789 Note that the output file is not automatically closed. You have 

790 to explicitly call the close() method. 

791 """ 

792 

793 def __init__(self, 

794 output_file: Literal['-'] | StrPath | None = None, 

795 dependencies: Iterable[StrPath] = () 

796 ) -> None: 

797 """ 

798 Initialize the dependency list, automatically setting the 

799 output file to `output_file` (see `set_output()`) and adding 

800 all supplied dependencies. 

801 

802 If output_file is None, no file output is done when calling add(). 

803 """ 

804 self.set_output(output_file) 

805 self.add(*dependencies) 

806 

807 def set_output(self, output_file: Literal['-']|StrPath|None) -> None: 

808 """ 

809 Set the output file and clear the list of already added 

810 dependencies. 

811 

812 The specified file is immediately overwritten. 

813 

814 If `output_file` is '-', the output will be written to stdout. 

815 The empty string or None stop output. 

816 """ 

817 if output_file == '-': 

818 self.file = sys.stdout 

819 elif output_file: 

820 self.file = open(output_file, 'w', encoding='utf-8') 

821 else: 

822 self.file = None 

823 self.list = [] 

824 

825 def add(self, *paths: StrPath) -> None: 

826 """ 

827 Append `path` to `self.list` unless it is already there. 

828 

829 Also append to `self.file` unless it is already there 

830 or `self.file is `None`. 

831 """ 

832 for path in paths: 

833 if isinstance(path, PurePath): 

834 path = path.as_posix() # use '/' as separator 

835 if path not in self.list: 

836 self.list.append(path) 

837 if self.file is not None: 

838 self.file.write(path+'\n') 

839 

840 def close(self) -> None: 

841 """ 

842 Close the output file. 

843 """ 

844 if self.file is not sys.stdout: 

845 self.file.close() 

846 self.file = None 

847 

848 def __repr__(self) -> str: 

849 try: 

850 output_file = self.file.name 

851 except AttributeError: 

852 output_file = None 

853 return '%s(%r, %s)' % (self.__class__.__name__, output_file, self.list)