Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/docutils/utils/__init__.py: 51%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

333 statements  

1# $Id$ 

2# Author: David Goodger <goodger@python.org> 

3# Copyright: This module has been placed in the public domain. 

4 

5""" 

6Miscellaneous utilities for the documentation utilities. 

7""" 

8 

9__docformat__ = 'reStructuredText' 

10 

11import sys 

12import os 

13import os.path 

14from pathlib import PurePath, Path 

15import re 

16import itertools 

17import warnings 

18import unicodedata 

19 

20from docutils import ApplicationError, DataError, __version_info__ 

21from docutils import io, nodes 

22# for backwards compatibility 

23from docutils.nodes import unescape # noqa: F401 

24 

25 

26class SystemMessage(ApplicationError): 

27 

28 def __init__(self, system_message, level): 

29 Exception.__init__(self, system_message.astext()) 

30 self.level = level 

31 

32 

33class SystemMessagePropagation(ApplicationError): 

34 pass 

35 

36 

37class Reporter: 

38 

39 """ 

40 Info/warning/error reporter and ``system_message`` element generator. 

41 

42 Five levels of system messages are defined, along with corresponding 

43 methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`. 

44 

45 There is typically one Reporter object per process. A Reporter object is 

46 instantiated with thresholds for reporting (generating warnings) and 

47 halting processing (raising exceptions), a switch to turn debug output on 

48 or off, and an I/O stream for warnings. These are stored as instance 

49 attributes. 

50 

51 When a system message is generated, its level is compared to the stored 

52 thresholds, and a warning or error is generated as appropriate. Debug 

53 messages are produced if the stored debug switch is on, independently of 

54 other thresholds. Message output is sent to the stored warning stream if 

55 not set to ''. 

56 

57 The Reporter class also employs a modified form of the "Observer" pattern 

58 [GoF95]_ to track system messages generated. The `attach_observer` method 

59 should be called before parsing, with a bound method or function which 

60 accepts system messages. The observer can be removed with 

61 `detach_observer`, and another added in its place. 

62 

63 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of 

64 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA, 

65 1995. 

66 """ 

67 

68 levels = 'DEBUG INFO WARNING ERROR SEVERE'.split() 

69 """List of names for system message levels, indexed by level.""" 

70 

71 # system message level constants: 

72 (DEBUG_LEVEL, 

73 INFO_LEVEL, 

74 WARNING_LEVEL, 

75 ERROR_LEVEL, 

76 SEVERE_LEVEL) = range(5) 

77 

78 def __init__(self, source, report_level, halt_level, stream=None, 

79 debug=False, encoding=None, error_handler='backslashreplace'): 

80 """ 

81 :Parameters: 

82 - `source`: The path to or description of the source data. 

83 - `report_level`: The level at or above which warning output will 

84 be sent to `stream`. 

85 - `halt_level`: The level at or above which `SystemMessage` 

86 exceptions will be raised, halting execution. 

87 - `debug`: Show debug (level=0) system messages? 

88 - `stream`: Where warning output is sent. Can be file-like (has a 

89 ``.write`` method), a string (file name, opened for writing), 

90 '' (empty string) or `False` (for discarding all stream messages) 

91 or `None` (implies `sys.stderr`; default). 

92 - `encoding`: The output encoding. 

93 - `error_handler`: The error handler for stderr output encoding. 

94 """ 

95 

96 self.source = source 

97 """The path to or description of the source data.""" 

98 

99 self.error_handler = error_handler 

100 """The character encoding error handler.""" 

101 

102 self.debug_flag = debug 

103 """Show debug (level=0) system messages?""" 

104 

105 self.report_level = report_level 

106 """The level at or above which warning output will be sent 

107 to `self.stream`.""" 

108 

109 self.halt_level = halt_level 

110 """The level at or above which `SystemMessage` exceptions 

111 will be raised, halting execution.""" 

112 

113 if not isinstance(stream, io.ErrorOutput): 

114 stream = io.ErrorOutput(stream, encoding, error_handler) 

115 

116 self.stream = stream 

117 """Where warning output is sent.""" 

118 

119 self.encoding = encoding or getattr(stream, 'encoding', 'ascii') 

120 """The output character encoding.""" 

121 

122 self.observers = [] 

123 """List of bound methods or functions to call with each system_message 

124 created.""" 

125 

126 self.max_level = -1 

127 """The highest level system message generated so far.""" 

128 

129 def set_conditions(self, category, report_level, halt_level, 

130 stream=None, debug=False): 

131 warnings.warn('docutils.utils.Reporter.set_conditions() deprecated; ' 

132 'Will be removed in Docutils 0.21 or later. ' 

133 'Set attributes via configuration settings or directly.', 

134 DeprecationWarning, stacklevel=2) 

135 self.report_level = report_level 

136 self.halt_level = halt_level 

137 if not isinstance(stream, io.ErrorOutput): 

138 stream = io.ErrorOutput(stream, self.encoding, self.error_handler) 

139 self.stream = stream 

140 self.debug_flag = debug 

141 

142 def attach_observer(self, observer): 

143 """ 

144 The `observer` parameter is a function or bound method which takes one 

145 argument, a `nodes.system_message` instance. 

146 """ 

147 self.observers.append(observer) 

148 

149 def detach_observer(self, observer): 

150 self.observers.remove(observer) 

151 

152 def notify_observers(self, message): 

153 for observer in self.observers: 

154 observer(message) 

155 

156 def system_message(self, level, message, *children, **kwargs): 

157 """ 

158 Return a system_message object. 

159 

160 Raise an exception or generate a warning if appropriate. 

161 """ 

162 # `message` can be a `str` or `Exception` instance. 

163 if isinstance(message, Exception): 

164 message = str(message) 

165 

166 attributes = kwargs.copy() 

167 if 'base_node' in kwargs: 

168 source, line = get_source_line(kwargs['base_node']) 

169 del attributes['base_node'] 

170 if source is not None: 

171 attributes.setdefault('source', source) 

172 if line is not None: 

173 attributes.setdefault('line', line) 

174 # assert source is not None, "line- but no source-argument" 

175 if 'source' not in attributes: 

176 # 'line' is absolute line number 

177 try: 

178 source, line = self.get_source_and_line(attributes.get('line')) 

179 except AttributeError: 

180 source, line = None, None 

181 if source is not None: 

182 attributes['source'] = source 

183 if line is not None: 

184 attributes['line'] = line 

185 # assert attributes['line'] is not None, (message, kwargs) 

186 # assert attributes['source'] is not None, (message, kwargs) 

187 attributes.setdefault('source', self.source) 

188 

189 msg = nodes.system_message(message, level=level, 

190 type=self.levels[level], 

191 *children, **attributes) 

192 if self.stream and (level >= self.report_level 

193 or self.debug_flag and level == self.DEBUG_LEVEL 

194 or level >= self.halt_level): 

195 self.stream.write(msg.astext() + '\n') 

196 if level >= self.halt_level: 

197 raise SystemMessage(msg, level) 

198 if level > self.DEBUG_LEVEL or self.debug_flag: 

199 self.notify_observers(msg) 

200 self.max_level = max(level, self.max_level) 

201 return msg 

202 

203 def debug(self, *args, **kwargs): 

204 """ 

205 Level-0, "DEBUG": an internal reporting issue. Typically, there is no 

206 effect on the processing. Level-0 system messages are handled 

207 separately from the others. 

208 """ 

209 if self.debug_flag: 

210 return self.system_message(self.DEBUG_LEVEL, *args, **kwargs) 

211 

212 def info(self, *args, **kwargs): 

213 """ 

214 Level-1, "INFO": a minor issue that can be ignored. Typically there is 

215 no effect on processing, and level-1 system messages are not reported. 

216 """ 

217 return self.system_message(self.INFO_LEVEL, *args, **kwargs) 

218 

219 def warning(self, *args, **kwargs): 

220 """ 

221 Level-2, "WARNING": an issue that should be addressed. If ignored, 

222 there may be unpredictable problems with the output. 

223 """ 

224 return self.system_message(self.WARNING_LEVEL, *args, **kwargs) 

225 

226 def error(self, *args, **kwargs): 

227 """ 

228 Level-3, "ERROR": an error that should be addressed. If ignored, the 

229 output will contain errors. 

230 """ 

231 return self.system_message(self.ERROR_LEVEL, *args, **kwargs) 

232 

233 def severe(self, *args, **kwargs): 

234 """ 

235 Level-4, "SEVERE": a severe error that must be addressed. If ignored, 

236 the output will contain severe errors. Typically level-4 system 

237 messages are turned into exceptions which halt processing. 

238 """ 

239 return self.system_message(self.SEVERE_LEVEL, *args, **kwargs) 

240 

241 

242class ExtensionOptionError(DataError): pass 

243class BadOptionError(ExtensionOptionError): pass 

244class BadOptionDataError(ExtensionOptionError): pass 

245class DuplicateOptionError(ExtensionOptionError): pass 

246 

247 

248def extract_extension_options(field_list, options_spec): 

249 """ 

250 Return a dictionary mapping extension option names to converted values. 

251 

252 :Parameters: 

253 - `field_list`: A flat field list without field arguments, where each 

254 field body consists of a single paragraph only. 

255 - `options_spec`: Dictionary mapping known option names to a 

256 conversion function such as `int` or `float`. 

257 

258 :Exceptions: 

259 - `KeyError` for unknown option names. 

260 - `ValueError` for invalid option values (raised by the conversion 

261 function). 

262 - `TypeError` for invalid option value types (raised by conversion 

263 function). 

264 - `DuplicateOptionError` for duplicate options. 

265 - `BadOptionError` for invalid fields. 

266 - `BadOptionDataError` for invalid option data (missing name, 

267 missing data, bad quotes, etc.). 

268 """ 

269 option_list = extract_options(field_list) 

270 return assemble_option_dict(option_list, options_spec) 

271 

272 

273def extract_options(field_list): 

274 """ 

275 Return a list of option (name, value) pairs from field names & bodies. 

276 

277 :Parameter: 

278 `field_list`: A flat field list, where each field name is a single 

279 word and each field body consists of a single paragraph only. 

280 

281 :Exceptions: 

282 - `BadOptionError` for invalid fields. 

283 - `BadOptionDataError` for invalid option data (missing name, 

284 missing data, bad quotes, etc.). 

285 """ 

286 option_list = [] 

287 for field in field_list: 

288 if len(field[0].astext().split()) != 1: 

289 raise BadOptionError( 

290 'extension option field name may not contain multiple words') 

291 name = str(field[0].astext().lower()) 

292 body = field[1] 

293 if len(body) == 0: 

294 data = None 

295 elif (len(body) > 1 

296 or not isinstance(body[0], nodes.paragraph) 

297 or len(body[0]) != 1 

298 or not isinstance(body[0][0], nodes.Text)): 

299 raise BadOptionDataError( 

300 'extension option field body may contain\n' 

301 'a single paragraph only (option "%s")' % name) 

302 else: 

303 data = body[0][0].astext() 

304 option_list.append((name, data)) 

305 return option_list 

306 

307 

308def assemble_option_dict(option_list, options_spec): 

309 """ 

310 Return a mapping of option names to values. 

311 

312 :Parameters: 

313 - `option_list`: A list of (name, value) pairs (the output of 

314 `extract_options()`). 

315 - `options_spec`: Dictionary mapping known option names to a 

316 conversion function such as `int` or `float`. 

317 

318 :Exceptions: 

319 - `KeyError` for unknown option names. 

320 - `DuplicateOptionError` for duplicate options. 

321 - `ValueError` for invalid option values (raised by conversion 

322 function). 

323 - `TypeError` for invalid option value types (raised by conversion 

324 function). 

325 """ 

326 options = {} 

327 for name, value in option_list: 

328 convertor = options_spec[name] # raises KeyError if unknown 

329 if convertor is None: 

330 raise KeyError(name) # or if explicitly disabled 

331 if name in options: 

332 raise DuplicateOptionError('duplicate option "%s"' % name) 

333 try: 

334 options[name] = convertor(value) 

335 except (ValueError, TypeError) as detail: 

336 raise detail.__class__('(option: "%s"; value: %r)\n%s' 

337 % (name, value, ' '.join(detail.args))) 

338 return options 

339 

340 

341class NameValueError(DataError): pass 

342 

343 

344def decode_path(path): 

345 """ 

346 Ensure `path` is Unicode. Return `str` instance. 

347 

348 Decode file/path string in a failsafe manner if not already done. 

349 """ 

350 # TODO: is this still required with Python 3? 

351 if isinstance(path, str): 

352 return path 

353 try: 

354 path = path.decode(sys.getfilesystemencoding(), 'strict') 

355 except AttributeError: # default value None has no decode method 

356 if not path: 

357 return '' 

358 raise ValueError('`path` value must be a String or ``None``, ' 

359 f'not {path!r}') 

360 except UnicodeDecodeError: 

361 try: 

362 path = path.decode('utf-8', 'strict') 

363 except UnicodeDecodeError: 

364 path = path.decode('ascii', 'replace') 

365 return path 

366 

367 

368def extract_name_value(line): 

369 """ 

370 Return a list of (name, value) from a line of the form "name=value ...". 

371 

372 :Exception: 

373 `NameValueError` for invalid input (missing name, missing data, bad 

374 quotes, etc.). 

375 """ 

376 attlist = [] 

377 while line: 

378 equals = line.find('=') 

379 if equals == -1: 

380 raise NameValueError('missing "="') 

381 attname = line[:equals].strip() 

382 if equals == 0 or not attname: 

383 raise NameValueError( 

384 'missing attribute name before "="') 

385 line = line[equals+1:].lstrip() 

386 if not line: 

387 raise NameValueError( 

388 'missing value after "%s="' % attname) 

389 if line[0] in '\'"': 

390 endquote = line.find(line[0], 1) 

391 if endquote == -1: 

392 raise NameValueError( 

393 'attribute "%s" missing end quote (%s)' 

394 % (attname, line[0])) 

395 if len(line) > endquote + 1 and line[endquote + 1].strip(): 

396 raise NameValueError( 

397 'attribute "%s" end quote (%s) not followed by ' 

398 'whitespace' % (attname, line[0])) 

399 data = line[1:endquote] 

400 line = line[endquote+1:].lstrip() 

401 else: 

402 space = line.find(' ') 

403 if space == -1: 

404 data = line 

405 line = '' 

406 else: 

407 data = line[:space] 

408 line = line[space+1:].lstrip() 

409 attlist.append((attname.lower(), data)) 

410 return attlist 

411 

412 

413def new_reporter(source_path, settings): 

414 """ 

415 Return a new Reporter object. 

416 

417 :Parameters: 

418 `source` : string 

419 The path to or description of the source text of the document. 

420 `settings` : optparse.Values object 

421 Runtime settings. 

422 """ 

423 reporter = Reporter( 

424 source_path, settings.report_level, settings.halt_level, 

425 stream=settings.warning_stream, debug=settings.debug, 

426 encoding=settings.error_encoding, 

427 error_handler=settings.error_encoding_error_handler) 

428 return reporter 

429 

430 

431def new_document(source_path, settings=None): 

432 """ 

433 Return a new empty document object. 

434 

435 :Parameters: 

436 `source_path` : string 

437 The path to or description of the source text of the document. 

438 `settings` : optparse.Values object 

439 Runtime settings. If none are provided, a default core set will 

440 be used. If you will use the document object with any Docutils 

441 components, you must provide their default settings as well. 

442 

443 For example, if parsing rST, at least provide the rst-parser 

444 settings, obtainable as follows: 

445 

446 Defaults for parser component:: 

447 

448 settings = docutils.frontend.get_default_settings( 

449 docutils.parsers.rst.Parser) 

450 

451 Defaults and configuration file customizations:: 

452 

453 settings = docutils.core.Publisher( 

454 parser=docutils.parsers.rst.Parser).get_settings() 

455 

456 """ 

457 # Import at top of module would lead to circular dependency! 

458 from docutils import frontend 

459 if settings is None: 

460 settings = frontend.get_default_settings() 

461 source_path = decode_path(source_path) 

462 reporter = new_reporter(source_path, settings) 

463 document = nodes.document(settings, reporter, source=source_path) 

464 document.note_source(source_path, -1) 

465 return document 

466 

467 

468def clean_rcs_keywords(paragraph, keyword_substitutions): 

469 if len(paragraph) == 1 and isinstance(paragraph[0], nodes.Text): 

470 textnode = paragraph[0] 

471 for pattern, substitution in keyword_substitutions: 

472 match = pattern.search(textnode) 

473 if match: 

474 paragraph[0] = nodes.Text(pattern.sub(substitution, textnode)) 

475 return 

476 

477 

478def relative_path(source, target): 

479 """ 

480 Build and return a path to `target`, relative to `source` (both files). 

481 

482 The return value is a `str` suitable to be included in `source` 

483 as a reference to `target`. 

484 

485 :Parameters: 

486 `source` : path-like object or None 

487 Path of a file in the start directory for the relative path 

488 (the file does not need to exist). 

489 The value ``None`` is replaced with "<cwd>/dummy_file". 

490 `target` : path-like object 

491 End point of the returned relative path. 

492 

493 Differences to `os.path.relpath()`: 

494 

495 * Inverse argument order. 

496 * `source` is assumed to be a FILE in the start directory (add a "dummy" 

497 file name to obtain the path relative from a directory) 

498 while `os.path.relpath()` expects a DIRECTORY as `start` argument. 

499 * Always use Posix path separator ("/") for the output. 

500 * Use `os.sep` for parsing the input 

501 (changing the value of `os.sep` is ignored by `os.relpath()`). 

502 * If there is no common prefix, return the absolute path to `target`. 

503 

504 Differences to `pathlib.PurePath.relative_to(other)`: 

505 

506 * pathlib offers an object oriented interface. 

507 * `source` expects path to a FILE while `other` expects a DIRECTORY. 

508 * `target` defaults to the cwd, no default value for `other`. 

509 * `relative_path()` always returns a path (relative or absolute), 

510 while `PurePath.relative_to()` raises a ValueError 

511 if `target` is not a subpath of `other` (no ".." inserted). 

512 """ 

513 source_parts = os.path.abspath(source or type(target)('dummy_file') 

514 ).split(os.sep) 

515 target_parts = os.path.abspath(target).split(os.sep) 

516 # Check first 2 parts because '/dir'.split('/') == ['', 'dir']: 

517 if source_parts[:2] != target_parts[:2]: 

518 # Nothing in common between paths. 

519 # Return absolute path, using '/' for URLs: 

520 return '/'.join(target_parts) 

521 source_parts.reverse() 

522 target_parts.reverse() 

523 while (source_parts and target_parts 

524 and source_parts[-1] == target_parts[-1]): 

525 # Remove path components in common: 

526 source_parts.pop() 

527 target_parts.pop() 

528 target_parts.reverse() 

529 parts = ['..'] * (len(source_parts) - 1) + target_parts 

530 return '/'.join(parts) 

531 

532 

533def get_stylesheet_reference(settings, relative_to=None): 

534 """ 

535 Retrieve a stylesheet reference from the settings object. 

536 

537 Deprecated. Use get_stylesheet_list() instead to 

538 enable specification of multiple stylesheets as a comma-separated 

539 list. 

540 """ 

541 warnings.warn('utils.get_stylesheet_reference()' 

542 ' is obsoleted by utils.get_stylesheet_list()' 

543 ' and will be removed in Docutils 2.0.', 

544 DeprecationWarning, stacklevel=2) 

545 if settings.stylesheet_path: 

546 assert not settings.stylesheet, ( 

547 'stylesheet and stylesheet_path are mutually exclusive.') 

548 if relative_to is None: 

549 relative_to = settings._destination 

550 return relative_path(relative_to, settings.stylesheet_path) 

551 else: 

552 return settings.stylesheet 

553 

554 

555# Return 'stylesheet' or 'stylesheet_path' arguments as list. 

556# 

557# The original settings arguments are kept unchanged: you can test 

558# with e.g. ``if settings.stylesheet_path: ...``. 

559# 

560# Differences to the depracated `get_stylesheet_reference()`: 

561# * return value is a list 

562# * no re-writing of the path (and therefore no optional argument) 

563# (if required, use ``utils.relative_path(source, target)`` 

564# in the calling script) 

565def get_stylesheet_list(settings): 

566 """ 

567 Retrieve list of stylesheet references from the settings object. 

568 """ 

569 assert not (settings.stylesheet and settings.stylesheet_path), ( 

570 'stylesheet and stylesheet_path are mutually exclusive.') 

571 stylesheets = settings.stylesheet_path or settings.stylesheet or [] 

572 # programmatically set default may be string with comma separated list: 

573 if not isinstance(stylesheets, list): 

574 stylesheets = [path.strip() for path in stylesheets.split(',')] 

575 if settings.stylesheet_path: 

576 # expand relative paths if found in stylesheet-dirs: 

577 stylesheets = [find_file_in_dirs(path, settings.stylesheet_dirs) 

578 for path in stylesheets] 

579 return stylesheets 

580 

581 

582def find_file_in_dirs(path, dirs): 

583 """ 

584 Search for `path` in the list of directories `dirs`. 

585 

586 Return the first expansion that matches an existing file. 

587 """ 

588 path = Path(path) 

589 if path.is_absolute(): 

590 return path.as_posix() 

591 for d in dirs: 

592 f = Path(d).expanduser() / path 

593 if f.exists(): 

594 return f.as_posix() 

595 return path.as_posix() 

596 

597 

598def get_trim_footnote_ref_space(settings): 

599 """ 

600 Return whether or not to trim footnote space. 

601 

602 If trim_footnote_reference_space is not None, return it. 

603 

604 If trim_footnote_reference_space is None, return False unless the 

605 footnote reference style is 'superscript'. 

606 """ 

607 if settings.setdefault('trim_footnote_reference_space', None) is None: 

608 return getattr(settings, 'footnote_references', None) == 'superscript' 

609 else: 

610 return settings.trim_footnote_reference_space 

611 

612 

613def get_source_line(node): 

614 """ 

615 Return the "source" and "line" attributes from the `node` given or from 

616 its closest ancestor. 

617 """ 

618 while node: 

619 if node.source or node.line: 

620 return node.source, node.line 

621 node = node.parent 

622 return None, None 

623 

624 

625def escape2null(text): 

626 """Return a string with escape-backslashes converted to nulls.""" 

627 parts = [] 

628 start = 0 

629 while True: 

630 found = text.find('\\', start) 

631 if found == -1: 

632 parts.append(text[start:]) 

633 return ''.join(parts) 

634 parts.append(text[start:found]) 

635 parts.append('\x00' + text[found+1:found+2]) 

636 start = found + 2 # skip character after escape 

637 

638 

639def split_escaped_whitespace(text): 

640 """ 

641 Split `text` on escaped whitespace (null+space or null+newline). 

642 Return a list of strings. 

643 """ 

644 strings = text.split('\x00 ') 

645 strings = [string.split('\x00\n') for string in strings] 

646 # flatten list of lists of strings to list of strings: 

647 return list(itertools.chain(*strings)) 

648 

649 

650def strip_combining_chars(text): 

651 return ''.join(c for c in text if not unicodedata.combining(c)) 

652 

653 

654def find_combining_chars(text): 

655 """Return indices of all combining chars in Unicode string `text`. 

656 

657 >>> from docutils.utils import find_combining_chars 

658 >>> find_combining_chars('A t̆ab̆lĕ') 

659 [3, 6, 9] 

660 

661 """ 

662 return [i for i, c in enumerate(text) if unicodedata.combining(c)] 

663 

664 

665def column_indices(text): 

666 """Indices of Unicode string `text` when skipping combining characters. 

667 

668 >>> from docutils.utils import column_indices 

669 >>> column_indices('A t̆ab̆lĕ') 

670 [0, 1, 2, 4, 5, 7, 8] 

671 

672 """ 

673 # TODO: account for asian wide chars here instead of using dummy 

674 # replacements in the tableparser? 

675 string_indices = list(range(len(text))) 

676 for index in find_combining_chars(text): 

677 string_indices[index] = None 

678 return [i for i in string_indices if i is not None] 

679 

680 

681east_asian_widths = {'W': 2, # Wide 

682 'F': 2, # Full-width (wide) 

683 'Na': 1, # Narrow 

684 'H': 1, # Half-width (narrow) 

685 'N': 1, # Neutral (not East Asian, treated as narrow) 

686 'A': 1, # Ambiguous (s/b wide in East Asian context, 

687 } # narrow otherwise, but that doesn't work) 

688"""Mapping of result codes from `unicodedata.east_asian_widt()` to character 

689column widths.""" 

690 

691 

692def column_width(text): 

693 """Return the column width of text. 

694 

695 Correct ``len(text)`` for wide East Asian and combining Unicode chars. 

696 """ 

697 width = sum(east_asian_widths[unicodedata.east_asian_width(c)] 

698 for c in text) 

699 # correction for combining chars: 

700 width -= len(find_combining_chars(text)) 

701 return width 

702 

703 

704def uniq(L): 

705 r = [] 

706 for item in L: 

707 if item not in r: 

708 r.append(item) 

709 return r 

710 

711 

712def normalize_language_tag(tag): 

713 """Return a list of normalized combinations for a `BCP 47` language tag. 

714 

715 Example: 

716 

717 >>> from docutils.utils import normalize_language_tag 

718 >>> normalize_language_tag('de_AT-1901') 

719 ['de-at-1901', 'de-at', 'de-1901', 'de'] 

720 >>> normalize_language_tag('de-CH-x_altquot') 

721 ['de-ch-x-altquot', 'de-ch', 'de-x-altquot', 'de'] 

722 

723 """ 

724 # normalize: 

725 tag = tag.lower().replace('-', '_') 

726 # split (except singletons, which mark the following tag as non-standard): 

727 tag = re.sub(r'_([a-zA-Z0-9])_', r'_\1-', tag) 

728 subtags = [subtag for subtag in tag.split('_')] 

729 base_tag = (subtags.pop(0),) 

730 # find all combinations of subtags 

731 taglist = [] 

732 for n in range(len(subtags), 0, -1): 

733 for tags in itertools.combinations(subtags, n): 

734 taglist.append('-'.join(base_tag+tags)) 

735 taglist += base_tag 

736 return taglist 

737 

738 

739def xml_declaration(encoding=None): 

740 """Return an XML text declaration. 

741 

742 Include an encoding declaration, if `encoding` 

743 is not 'unicode', '', or None. 

744 """ 

745 if encoding and encoding.lower() != 'unicode': 

746 encoding_declaration = f' encoding="{encoding}"' 

747 else: 

748 encoding_declaration = '' 

749 return f'<?xml version="1.0"{encoding_declaration}?>\n' 

750 

751 

752class DependencyList: 

753 

754 """ 

755 List of dependencies, with file recording support. 

756 

757 Note that the output file is not automatically closed. You have 

758 to explicitly call the close() method. 

759 """ 

760 

761 def __init__(self, output_file=None, dependencies=()): 

762 """ 

763 Initialize the dependency list, automatically setting the 

764 output file to `output_file` (see `set_output()`) and adding 

765 all supplied dependencies. 

766 

767 If output_file is None, no file output is done when calling add(). 

768 """ 

769 self.list = [] 

770 self.file = None 

771 if output_file: 

772 self.set_output(output_file) 

773 self.add(*dependencies) 

774 

775 def set_output(self, output_file): 

776 """ 

777 Set the output file and clear the list of already added 

778 dependencies. 

779 

780 `output_file` must be a string. The specified file is 

781 immediately overwritten. 

782 

783 If output_file is '-', the output will be written to stdout. 

784 """ 

785 if output_file: 

786 if output_file == '-': 

787 self.file = sys.stdout 

788 else: 

789 self.file = open(output_file, 'w', encoding='utf-8') 

790 

791 def add(self, *paths): 

792 """ 

793 Append `path` to `self.list` unless it is already there. 

794 

795 Also append to `self.file` unless it is already there 

796 or `self.file is `None`. 

797 """ 

798 for path in paths: 

799 if isinstance(path, PurePath): 

800 path = path.as_posix() # use '/' as separator 

801 if path not in self.list: 

802 self.list.append(path) 

803 if self.file is not None: 

804 self.file.write(path+'\n') 

805 

806 def close(self): 

807 """ 

808 Close the output file. 

809 """ 

810 if self.file is not sys.stdout: 

811 self.file.close() 

812 self.file = None 

813 

814 def __repr__(self): 

815 try: 

816 output_file = self.file.name 

817 except AttributeError: 

818 output_file = None 

819 return '%s(%r, %s)' % (self.__class__.__name__, output_file, self.list) 

820 

821 

822release_level_abbreviations = { 

823 'alpha': 'a', 

824 'beta': 'b', 

825 'candidate': 'rc', 

826 'final': ''} 

827 

828 

829def version_identifier(version_info=None): 

830 """ 

831 Return a version identifier string built from `version_info`, a 

832 `docutils.VersionInfo` namedtuple instance or compatible tuple. If 

833 `version_info` is not provided, by default return a version identifier 

834 string based on `docutils.__version_info__` (i.e. the current Docutils 

835 version). 

836 """ 

837 if version_info is None: 

838 version_info = __version_info__ 

839 if version_info.micro: 

840 micro = '.%s' % version_info.micro 

841 else: 

842 # 0 is omitted: 

843 micro = '' 

844 releaselevel = release_level_abbreviations[version_info.releaselevel] 

845 if version_info.serial: 

846 serial = version_info.serial 

847 else: 

848 # 0 is omitted: 

849 serial = '' 

850 if version_info.release: 

851 dev = '' 

852 else: 

853 dev = '.dev' 

854 version = '%s.%s%s%s%s%s' % ( 

855 version_info.major, 

856 version_info.minor, 

857 micro, 

858 releaselevel, 

859 serial, 

860 dev) 

861 return version