Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/docutils/parsers/rst/__init__.py: 60%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

98 statements  

1# $Id$ 

2# Author: David Goodger <goodger@python.org> 

3# Copyright: This module has been placed in the public domain. 

4 

5""" 

6This is ``docutils.parsers.rst`` package. It exports a single class, `Parser`, 

7the reStructuredText parser. 

8 

9 

10Usage 

11===== 

12 

131. Create a parser:: 

14 

15 parser = docutils.parsers.rst.Parser() 

16 

17 Several optional arguments may be passed to modify the parser's behavior. 

18 Please see `Customizing the Parser`_ below for details. 

19 

202. Gather input (a multi-line string), by reading a file or the standard 

21 input:: 

22 

23 input = sys.stdin.read() 

24 

253. Create a new empty `docutils.nodes.document` tree:: 

26 

27 document = docutils.utils.new_document(source, settings) 

28 

29 See `docutils.utils.new_document()` for parameter details. 

30 

314. Run the parser, populating the document tree:: 

32 

33 parser.parse(input, document) 

34 

35 

36Parser Overview 

37=============== 

38 

39The reStructuredText parser is implemented as a state machine, examining its 

40input one line at a time. To understand how the parser works, please first 

41become familiar with the `docutils.statemachine` module, then see the 

42`states` module. 

43 

44 

45Customizing the Parser 

46---------------------- 

47 

48Anything that isn't already customizable is that way simply because that type 

49of customizability hasn't been implemented yet. Patches welcome! 

50 

51When instantiating an object of the `Parser` class, two parameters may be 

52passed: ``rfc2822`` and ``inliner``. Pass ``rfc2822=True`` to enable an 

53initial RFC-2822 style header block, parsed as a "field_list" element (with 

54"class" attribute set to "rfc2822"). Currently this is the only body-level 

55element which is customizable without subclassing. (Tip: subclass `Parser` 

56and change its "state_classes" and "initial_state" attributes to refer to new 

57classes. Contact the author if you need more details.) 

58 

59The ``inliner`` parameter takes an instance of `states.Inliner` or a subclass. 

60It handles inline markup recognition. A common extension is the addition of 

61further implicit hyperlinks, like "RFC 2822". This can be done by subclassing 

62`states.Inliner`, adding a new method for the implicit markup, and adding a 

63``(pattern, method)`` pair to the "implicit_dispatch" attribute of the 

64subclass. See `states.Inliner.implicit_inline()` for details. Explicit 

65inline markup can be customized in a `states.Inliner` subclass via the 

66``patterns.initial`` and ``dispatch`` attributes (and new methods as 

67appropriate). 

68""" 

69 

70from __future__ import annotations 

71 

72__docformat__ = 'reStructuredText' 

73 

74import docutils.parsers 

75import docutils.statemachine 

76from docutils.parsers.rst import roles, states 

77from docutils import frontend, nodes 

78from docutils.transforms import universal 

79 

80 

81class Parser(docutils.parsers.Parser): 

82 

83 """The reStructuredText parser.""" 

84 

85 supported = ('rst', 'restructuredtext', 'rest', 'restx', 'rtxt', 'rstx') 

86 """Aliases this parser supports.""" 

87 

88 settings_spec = docutils.parsers.Parser.settings_spec + ( 

89 'reStructuredText Parser Options', 

90 None, 

91 (('Recognize and link to standalone PEP references (like "PEP 258").', 

92 ['--pep-references'], 

93 {'action': 'store_true', 'validator': frontend.validate_boolean}), 

94 ('Base URL for PEP references ' 

95 '(default "https://peps.python.org/").', 

96 ['--pep-base-url'], 

97 {'metavar': '<URL>', 'default': 'https://peps.python.org/', 

98 'validator': frontend.validate_url_trailing_slash}), 

99 ('Template for PEP file part of URL. (default "pep-%04d")', 

100 ['--pep-file-url-template'], 

101 {'metavar': '<URL>', 'default': 'pep-%04d'}), 

102 ('Recognize and link to standalone RFC references (like "RFC 822").', 

103 ['--rfc-references'], 

104 {'action': 'store_true', 'validator': frontend.validate_boolean}), 

105 ('Base URL for RFC references ' 

106 '(default "https://tools.ietf.org/html/").', 

107 ['--rfc-base-url'], 

108 {'metavar': '<URL>', 'default': 'https://tools.ietf.org/html/', 

109 'validator': frontend.validate_url_trailing_slash}), 

110 ('Set number of spaces for tab expansion (default 8).', 

111 ['--tab-width'], 

112 {'metavar': '<width>', 'type': 'int', 'default': 8, 

113 'validator': frontend.validate_nonnegative_int}), 

114 ('Remove spaces before footnote references.', 

115 ['--trim-footnote-reference-space'], 

116 {'action': 'store_true', 'validator': frontend.validate_boolean}), 

117 ('Leave spaces before footnote references.', 

118 ['--leave-footnote-reference-space'], 

119 {'action': 'store_false', 'dest': 'trim_footnote_reference_space'}), 

120 ('Token name set for parsing code with Pygments: one of ' 

121 '"long", "short", or "none" (no parsing). Default is "long".', 

122 ['--syntax-highlight'], 

123 {'choices': ['long', 'short', 'none'], 

124 'default': 'long', 'metavar': '<format>'}), 

125 ('Change straight quotation marks to typographic form: ' 

126 'one of "yes", "no", "alt[ernative]" (default "no").', 

127 ['--smart-quotes'], 

128 {'default': False, 'metavar': '<yes/no/alt>', 

129 'validator': frontend.validate_ternary}), 

130 ('Characters to use as "smart quotes" for <language>. ', 

131 ['--smartquotes-locales'], 

132 {'metavar': '<language:quotes[,language:quotes,...]>', 

133 'action': 'append', 

134 'validator': frontend.validate_smartquotes_locales}), 

135 ('Inline markup recognized at word boundaries only ' 

136 '(adjacent to punctuation or whitespace). ' 

137 'Force character-level inline markup recognition with ' 

138 '"\\ " (backslash + space). Default.', 

139 ['--word-level-inline-markup'], 

140 {'action': 'store_false', 'dest': 'character_level_inline_markup'}), 

141 ('Inline markup recognized anywhere, regardless of surrounding ' 

142 'characters. Backslash-escapes must be used to avoid unwanted ' 

143 'markup recognition. Useful for East Asian languages. ' 

144 'Experimental.', 

145 ['--character-level-inline-markup'], 

146 {'action': 'store_true', 'default': False, 

147 'dest': 'character_level_inline_markup'}), 

148 ) 

149 ) 

150 

151 config_section = 'restructuredtext parser' 

152 config_section_dependencies = ('parsers',) 

153 

154 def __init__(self, rfc2822=False, inliner=None) -> None: 

155 if rfc2822: 

156 self.initial_state = 'RFC2822Body' 

157 else: 

158 self.initial_state = 'Body' 

159 self.state_classes = states.state_classes 

160 self.inliner = inliner 

161 

162 def get_transforms(self): 

163 return super().get_transforms() + [universal.SmartQuotes] 

164 

165 def parse(self, inputstring, document) -> None: 

166 """Parse `inputstring` and populate `document`, a document tree.""" 

167 self.setup_parse(inputstring, document) 

168 # provide fallbacks in case the document has only generic settings 

169 self.document.settings.setdefault('tab_width', 8) 

170 self.document.settings.setdefault('syntax_highlight', 'long') 

171 self.statemachine = states.RSTStateMachine( 

172 state_classes=self.state_classes, 

173 initial_state=self.initial_state, 

174 debug=document.reporter.debug_flag) 

175 inputlines = docutils.statemachine.string2lines( 

176 inputstring, tab_width=document.settings.tab_width, 

177 convert_whitespace=True) 

178 for i, line in enumerate(inputlines): 

179 if len(line) > self.document.settings.line_length_limit: 

180 error = self.document.reporter.error( 

181 'Line %d exceeds the line-length-limit.'%(i+1)) 

182 self.document.append(error) 

183 break 

184 else: 

185 self.statemachine.run(inputlines, document, inliner=self.inliner) 

186 # restore the "default" default role after parsing a document 

187 if '' in roles._roles: 

188 del roles._roles[''] 

189 self.finish_parse() 

190 

191 

192class DirectiveError(Exception): 

193 

194 """ 

195 Store a message and a system message level. 

196 

197 To be thrown from inside directive code. 

198 

199 Do not instantiate directly -- use `Directive.directive_error()` 

200 instead! 

201 """ 

202 

203 def __init__(self, level, message) -> None: 

204 """Set error `message` and `level`""" 

205 Exception.__init__(self) 

206 self.level = level 

207 self.msg = message 

208 

209 

210class Directive: 

211 

212 """ 

213 Base class for reStructuredText directives. 

214 

215 The following attributes may be set by subclasses. They are 

216 interpreted by the directive parser (which runs the directive 

217 class): 

218 

219 - `required_arguments`: The number of required arguments (default: 

220 0). 

221 

222 - `optional_arguments`: The number of optional arguments (default: 

223 0). 

224 

225 - `final_argument_whitespace`: A boolean, indicating if the final 

226 argument may contain whitespace (default: False). 

227 

228 - `option_spec`: A dictionary, mapping known option names to 

229 conversion functions such as `int` or `float` (default: {}, no 

230 options). Several conversion functions are defined in the 

231 directives/__init__.py module. 

232 

233 Option conversion functions take a single parameter, the option 

234 argument (a string or ``None``), validate it and/or convert it 

235 to the appropriate form. Conversion functions may raise 

236 `ValueError` and `TypeError` exceptions. 

237 

238 - `has_content`: A boolean; True if content is allowed. Client 

239 code must handle the case where content is required but not 

240 supplied (an empty content list will be supplied). 

241 

242 Arguments are normally single whitespace-separated words. The 

243 final argument may contain whitespace and/or newlines if 

244 `final_argument_whitespace` is True. 

245 

246 If the form of the arguments is more complex, specify only one 

247 argument (either required or optional) and set 

248 `final_argument_whitespace` to True; the client code must do any 

249 context-sensitive parsing. 

250 

251 When a directive implementation is being run, the directive class 

252 is instantiated, and the `run()` method is executed. During 

253 instantiation, the following instance variables are set: 

254 

255 - ``name`` is the directive type or name (string). 

256 

257 - ``arguments`` is the list of positional arguments (strings). 

258 

259 - ``options`` is a dictionary mapping option names (strings) to 

260 values (type depends on option conversion functions; see 

261 `option_spec` above). 

262 

263 - ``content`` is a list of strings, the directive content line by line. 

264 

265 - ``lineno`` is the absolute line number of the first line 

266 of the directive. 

267 

268 - ``content_offset`` is the line offset of the first line 

269 of the content from the beginning of the current input. 

270 Used when initiating a nested parse. 

271 

272 - ``block_text`` is a string containing the entire directive. 

273 

274 - ``state`` is the state which called the directive function. 

275 

276 - ``state_machine`` is the state machine which controls the state 

277 which called the directive function. 

278 

279 - ``reporter`` is the state machine's `reporter` instance. 

280 

281 Directive functions return a list of nodes which will be inserted 

282 into the document tree at the point where the directive was 

283 encountered. This can be an empty list if there is nothing to 

284 insert. 

285 

286 For ordinary directives, the list must contain body elements or 

287 structural elements. Some directives are intended specifically 

288 for substitution definitions, and must return a list of `Text` 

289 nodes and/or inline elements (suitable for inline insertion, in 

290 place of the substitution reference). Such directives must verify 

291 substitution definition context, typically using code like this:: 

292 

293 if not isinstance(state, states.SubstitutionDef): 

294 error = self.reporter.error( 

295 'Invalid context: the "%s" directive can only be used ' 

296 'within a substitution definition.' % (name), 

297 nodes.literal_block(block_text, block_text), line=lineno) 

298 return [error] 

299 """ 

300 

301 # There is a "Creating reStructuredText Directives" how-to at 

302 # <https://docutils.sourceforge.io/docs/howto/rst-directives.html>. If you 

303 # update this docstring, please update the how-to as well. 

304 

305 required_arguments = 0 

306 """Number of required directive arguments.""" 

307 

308 optional_arguments = 0 

309 """Number of optional arguments after the required arguments.""" 

310 

311 final_argument_whitespace = False 

312 """May the final argument contain whitespace?""" 

313 

314 option_spec = None 

315 """Mapping of option names to validator functions.""" 

316 

317 has_content = False 

318 """May the directive have content?""" 

319 

320 def __init__(self, name, arguments, options, content, lineno, 

321 content_offset, block_text, state, state_machine) -> None: 

322 self.name = name 

323 self.arguments = arguments 

324 self.options = options 

325 self.content = content 

326 self.lineno = lineno 

327 self.content_offset = content_offset 

328 self.block_text = block_text 

329 self.state = state 

330 self.state_machine = state_machine 

331 self.reporter = state_machine.reporter 

332 

333 def run(self): 

334 raise NotImplementedError('Must override run() in subclass.') 

335 

336 # Directive errors: 

337 

338 def directive_error(self, level, message): 

339 """ 

340 Return a DirectiveError suitable for being thrown as an exception. 

341 

342 Call "raise self.directive_error(level, message)" from within 

343 a directive implementation to return one single system message 

344 at level `level`, which automatically gets the directive block 

345 and the line number added. 

346 

347 Preferably use the `debug`, `info`, `warning`, `error`, or `severe` 

348 wrapper methods, e.g. ``self.error(message)`` to generate an 

349 ERROR-level directive error. 

350 """ 

351 return DirectiveError(level, message) 

352 

353 def debug(self, message): 

354 return self.directive_error(0, message) 

355 

356 def info(self, message): 

357 return self.directive_error(1, message) 

358 

359 def warning(self, message): 

360 return self.directive_error(2, message) 

361 

362 def error(self, message): 

363 return self.directive_error(3, message) 

364 

365 def severe(self, message): 

366 return self.directive_error(4, message) 

367 

368 # Convenience methods: 

369 

370 def assert_has_content(self): 

371 """ 

372 Throw an ERROR-level DirectiveError if the directive doesn't 

373 have contents. 

374 """ 

375 if not self.content: 

376 raise self.error('Content block expected for the "%s" directive; ' 

377 'none found.' % self.name) 

378 

379 def add_name(self, node) -> None: 

380 """Append self.options['name'] to node['names'] if it exists. 

381 

382 Also normalize the name string and register it as explicit target. 

383 """ 

384 if 'name' in self.options: 

385 name = nodes.fully_normalize_name(self.options.pop('name')) 

386 if 'name' in node: 

387 del node['name'] 

388 node['names'].append(name) 

389 self.state.document.note_explicit_target(node, node) 

390 

391 

392def convert_directive_function(directive_fn): 

393 """ 

394 Define & return a directive class generated from `directive_fn`. 

395 

396 `directive_fn` uses the old-style, functional interface. 

397 """ 

398 

399 class FunctionalDirective(Directive): 

400 

401 option_spec = getattr(directive_fn, 'options', None) 

402 has_content = getattr(directive_fn, 'content', False) 

403 _argument_spec = getattr(directive_fn, 'arguments', (0, 0, False)) 

404 required_arguments, optional_arguments, final_argument_whitespace \ 

405 = _argument_spec 

406 

407 def run(self): 

408 return directive_fn( 

409 self.name, self.arguments, self.options, self.content, 

410 self.lineno, self.content_offset, self.block_text, 

411 self.state, self.state_machine) 

412 

413 # Return new-style directive. 

414 return FunctionalDirective