Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/docutils/parsers/rst/__init__.py: 58%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

90 statements  

1# $Id$ 

2# Author: David Goodger <goodger@python.org> 

3# Copyright: This module has been placed in the public domain. 

4 

5""" 

6This is ``docutils.parsers.rst`` package. It exports a single class, `Parser`, 

7the reStructuredText parser. 

8 

9 

10Usage 

11===== 

12 

131. Create a parser:: 

14 

15 parser = docutils.parsers.rst.Parser() 

16 

17 Several optional arguments may be passed to modify the parser's behavior. 

18 Please see `Customizing the Parser`_ below for details. 

19 

202. Gather input (a multi-line string), by reading a file or the standard 

21 input:: 

22 

23 input = sys.stdin.read() 

24 

253. Create a new empty `docutils.nodes.document` tree:: 

26 

27 document = docutils.utils.new_document(source, settings) 

28 

29 See `docutils.utils.new_document()` for parameter details. 

30 

314. Run the parser, populating the document tree:: 

32 

33 parser.parse(input, document) 

34 

35 

36Parser Overview 

37=============== 

38 

39The reStructuredText parser is implemented as a state machine, examining its 

40input one line at a time. To understand how the parser works, please first 

41become familiar with the `docutils.statemachine` module, then see the 

42`states` module. 

43 

44 

45Customizing the Parser 

46---------------------- 

47 

48Anything that isn't already customizable is that way simply because that type 

49of customizability hasn't been implemented yet. Patches welcome! 

50 

51When instantiating an object of the `Parser` class, two parameters may be 

52passed: ``rfc2822`` and ``inliner``. Pass ``rfc2822=True`` to enable an 

53initial RFC-2822 style header block, parsed as a "field_list" element (with 

54"class" attribute set to "rfc2822"). Currently this is the only body-level 

55element which is customizable without subclassing. (Tip: subclass `Parser` 

56and change its "state_classes" and "initial_state" attributes to refer to new 

57classes. Contact the author if you need more details.) 

58 

59The ``inliner`` parameter takes an instance of `states.Inliner` or a subclass. 

60It handles inline markup recognition. A common extension is the addition of 

61further implicit hyperlinks, like "RFC 2822". This can be done by subclassing 

62`states.Inliner`, adding a new method for the implicit markup, and adding a 

63``(pattern, method)`` pair to the "implicit_dispatch" attribute of the 

64subclass. See `states.Inliner.implicit_inline()` for details. Explicit 

65inline markup can be customized in a `states.Inliner` subclass via the 

66``patterns.initial`` and ``dispatch`` attributes (and new methods as 

67appropriate). 

68""" 

69 

70__docformat__ = 'reStructuredText' 

71 

72 

73import docutils.parsers 

74import docutils.statemachine 

75from docutils.parsers.rst import roles, states 

76from docutils import frontend, nodes 

77from docutils.transforms import universal 

78 

79 

80class Parser(docutils.parsers.Parser): 

81 

82 """The reStructuredText parser.""" 

83 

84 supported = ('rst', 'restructuredtext', 'rest', 'restx', 'rtxt', 'rstx') 

85 """Aliases this parser supports.""" 

86 

87 settings_spec = docutils.parsers.Parser.settings_spec + ( 

88 'reStructuredText Parser Options', 

89 None, 

90 (('Recognize and link to standalone PEP references (like "PEP 258").', 

91 ['--pep-references'], 

92 {'action': 'store_true', 'validator': frontend.validate_boolean}), 

93 ('Base URL for PEP references ' 

94 '(default "https://peps.python.org/").', 

95 ['--pep-base-url'], 

96 {'metavar': '<URL>', 'default': 'https://peps.python.org/', 

97 'validator': frontend.validate_url_trailing_slash}), 

98 ('Template for PEP file part of URL. (default "pep-%04d")', 

99 ['--pep-file-url-template'], 

100 {'metavar': '<URL>', 'default': 'pep-%04d'}), 

101 ('Recognize and link to standalone RFC references (like "RFC 822").', 

102 ['--rfc-references'], 

103 {'action': 'store_true', 'validator': frontend.validate_boolean}), 

104 ('Base URL for RFC references ' 

105 '(default "https://tools.ietf.org/html/").', 

106 ['--rfc-base-url'], 

107 {'metavar': '<URL>', 'default': 'https://tools.ietf.org/html/', 

108 'validator': frontend.validate_url_trailing_slash}), 

109 ('Set number of spaces for tab expansion (default 8).', 

110 ['--tab-width'], 

111 {'metavar': '<width>', 'type': 'int', 'default': 8, 

112 'validator': frontend.validate_nonnegative_int}), 

113 ('Remove spaces before footnote references.', 

114 ['--trim-footnote-reference-space'], 

115 {'action': 'store_true', 'validator': frontend.validate_boolean}), 

116 ('Leave spaces before footnote references.', 

117 ['--leave-footnote-reference-space'], 

118 {'action': 'store_false', 'dest': 'trim_footnote_reference_space'}), 

119 ('Token name set for parsing code with Pygments: one of ' 

120 '"long", "short", or "none" (no parsing). Default is "long".', 

121 ['--syntax-highlight'], 

122 {'choices': ['long', 'short', 'none'], 

123 'default': 'long', 'metavar': '<format>'}), 

124 ('Change straight quotation marks to typographic form: ' 

125 'one of "yes", "no", "alt[ernative]" (default "no").', 

126 ['--smart-quotes'], 

127 {'default': False, 'metavar': '<yes/no/alt>', 

128 'validator': frontend.validate_ternary}), 

129 ('Characters to use as "smart quotes" for <language>. ', 

130 ['--smartquotes-locales'], 

131 {'metavar': '<language:quotes[,language:quotes,...]>', 

132 'action': 'append', 

133 'validator': frontend.validate_smartquotes_locales}), 

134 ('Inline markup recognized at word boundaries only ' 

135 '(adjacent to punctuation or whitespace). ' 

136 'Force character-level inline markup recognition with ' 

137 '"\\ " (backslash + space). Default.', 

138 ['--word-level-inline-markup'], 

139 {'action': 'store_false', 'dest': 'character_level_inline_markup'}), 

140 ('Inline markup recognized anywhere, regardless of surrounding ' 

141 'characters. Backslash-escapes must be used to avoid unwanted ' 

142 'markup recognition. Useful for East Asian languages. ' 

143 'Experimental.', 

144 ['--character-level-inline-markup'], 

145 {'action': 'store_true', 'default': False, 

146 'dest': 'character_level_inline_markup'}), 

147 ) 

148 ) 

149 

150 config_section = 'restructuredtext parser' 

151 config_section_dependencies = ('parsers',) 

152 

153 def __init__(self, rfc2822=False, inliner=None): 

154 if rfc2822: 

155 self.initial_state = 'RFC2822Body' 

156 else: 

157 self.initial_state = 'Body' 

158 self.state_classes = states.state_classes 

159 self.inliner = inliner 

160 

161 def get_transforms(self): 

162 return super().get_transforms() + [universal.SmartQuotes] 

163 

164 def parse(self, inputstring, document): 

165 """Parse `inputstring` and populate `document`, a document tree.""" 

166 self.setup_parse(inputstring, document) 

167 # provide fallbacks in case the document has only generic settings 

168 self.document.settings.setdefault('tab_width', 8) 

169 self.document.settings.setdefault('syntax_highlight', 'long') 

170 self.statemachine = states.RSTStateMachine( 

171 state_classes=self.state_classes, 

172 initial_state=self.initial_state, 

173 debug=document.reporter.debug_flag) 

174 inputlines = docutils.statemachine.string2lines( 

175 inputstring, tab_width=document.settings.tab_width, 

176 convert_whitespace=True) 

177 for i, line in enumerate(inputlines): 

178 if len(line) > self.document.settings.line_length_limit: 

179 error = self.document.reporter.error( 

180 'Line %d exceeds the line-length-limit.'%(i+1)) 

181 self.document.append(error) 

182 break 

183 else: 

184 self.statemachine.run(inputlines, document, inliner=self.inliner) 

185 # restore the "default" default role after parsing a document 

186 if '' in roles._roles: 

187 del roles._roles[''] 

188 self.finish_parse() 

189 

190 

191class DirectiveError(Exception): 

192 

193 """ 

194 Store a message and a system message level. 

195 

196 To be thrown from inside directive code. 

197 

198 Do not instantiate directly -- use `Directive.directive_error()` 

199 instead! 

200 """ 

201 

202 def __init__(self, level, message): 

203 """Set error `message` and `level`""" 

204 Exception.__init__(self) 

205 self.level = level 

206 self.msg = message 

207 

208 

209class Directive: 

210 

211 """ 

212 Base class for reStructuredText directives. 

213 

214 The following attributes may be set by subclasses. They are 

215 interpreted by the directive parser (which runs the directive 

216 class): 

217 

218 - `required_arguments`: The number of required arguments (default: 

219 0). 

220 

221 - `optional_arguments`: The number of optional arguments (default: 

222 0). 

223 

224 - `final_argument_whitespace`: A boolean, indicating if the final 

225 argument may contain whitespace (default: False). 

226 

227 - `option_spec`: A dictionary, mapping known option names to 

228 conversion functions such as `int` or `float` (default: {}, no 

229 options). Several conversion functions are defined in the 

230 directives/__init__.py module. 

231 

232 Option conversion functions take a single parameter, the option 

233 argument (a string or ``None``), validate it and/or convert it 

234 to the appropriate form. Conversion functions may raise 

235 `ValueError` and `TypeError` exceptions. 

236 

237 - `has_content`: A boolean; True if content is allowed. Client 

238 code must handle the case where content is required but not 

239 supplied (an empty content list will be supplied). 

240 

241 Arguments are normally single whitespace-separated words. The 

242 final argument may contain whitespace and/or newlines if 

243 `final_argument_whitespace` is True. 

244 

245 If the form of the arguments is more complex, specify only one 

246 argument (either required or optional) and set 

247 `final_argument_whitespace` to True; the client code must do any 

248 context-sensitive parsing. 

249 

250 When a directive implementation is being run, the directive class 

251 is instantiated, and the `run()` method is executed. During 

252 instantiation, the following instance variables are set: 

253 

254 - ``name`` is the directive type or name (string). 

255 

256 - ``arguments`` is the list of positional arguments (strings). 

257 

258 - ``options`` is a dictionary mapping option names (strings) to 

259 values (type depends on option conversion functions; see 

260 `option_spec` above). 

261 

262 - ``content`` is a list of strings, the directive content line by line. 

263 

264 - ``lineno`` is the absolute line number of the first line 

265 of the directive. 

266 

267 - ``content_offset`` is the line offset of the first line 

268 of the content from the beginning of the current input. 

269 Used when initiating a nested parse. 

270 

271 - ``block_text`` is a string containing the entire directive. 

272 

273 - ``state`` is the state which called the directive function. 

274 

275 - ``state_machine`` is the state machine which controls the state 

276 which called the directive function. 

277 

278 - ``reporter`` is the state machine's `reporter` instance. 

279 

280 Directive functions return a list of nodes which will be inserted 

281 into the document tree at the point where the directive was 

282 encountered. This can be an empty list if there is nothing to 

283 insert. 

284 

285 For ordinary directives, the list must contain body elements or 

286 structural elements. Some directives are intended specifically 

287 for substitution definitions, and must return a list of `Text` 

288 nodes and/or inline elements (suitable for inline insertion, in 

289 place of the substitution reference). Such directives must verify 

290 substitution definition context, typically using code like this:: 

291 

292 if not isinstance(state, states.SubstitutionDef): 

293 error = self.reporter.error( 

294 'Invalid context: the "%s" directive can only be used ' 

295 'within a substitution definition.' % (name), 

296 nodes.literal_block(block_text, block_text), line=lineno) 

297 return [error] 

298 """ 

299 

300 # There is a "Creating reStructuredText Directives" how-to at 

301 # <https://docutils.sourceforge.io/docs/howto/rst-directives.html>. If you 

302 # update this docstring, please update the how-to as well. 

303 

304 required_arguments = 0 

305 """Number of required directive arguments.""" 

306 

307 optional_arguments = 0 

308 """Number of optional arguments after the required arguments.""" 

309 

310 final_argument_whitespace = False 

311 """May the final argument contain whitespace?""" 

312 

313 option_spec = None 

314 """Mapping of option names to validator functions.""" 

315 

316 has_content = False 

317 """May the directive have content?""" 

318 

319 def __init__(self, name, arguments, options, content, lineno, 

320 content_offset, block_text, state, state_machine): 

321 self.name = name 

322 self.arguments = arguments 

323 self.options = options 

324 self.content = content 

325 self.lineno = lineno 

326 self.content_offset = content_offset 

327 self.block_text = block_text 

328 self.state = state 

329 self.state_machine = state_machine 

330 self.reporter = state_machine.reporter 

331 

332 def run(self): 

333 raise NotImplementedError('Must override run() in subclass.') 

334 

335 # Directive errors: 

336 

337 def directive_error(self, level, message): 

338 """ 

339 Return a DirectiveError suitable for being thrown as an exception. 

340 

341 Call "raise self.directive_error(level, message)" from within 

342 a directive implementation to return one single system message 

343 at level `level`, which automatically gets the directive block 

344 and the line number added. 

345 

346 Preferably use the `debug`, `info`, `warning`, `error`, or `severe` 

347 wrapper methods, e.g. ``self.error(message)`` to generate an 

348 ERROR-level directive error. 

349 """ 

350 return DirectiveError(level, message) 

351 

352 def debug(self, message): 

353 return self.directive_error(0, message) 

354 

355 def info(self, message): 

356 return self.directive_error(1, message) 

357 

358 def warning(self, message): 

359 return self.directive_error(2, message) 

360 

361 def error(self, message): 

362 return self.directive_error(3, message) 

363 

364 def severe(self, message): 

365 return self.directive_error(4, message) 

366 

367 # Convenience methods: 

368 

369 def assert_has_content(self): 

370 """ 

371 Throw an ERROR-level DirectiveError if the directive doesn't 

372 have contents. 

373 """ 

374 if not self.content: 

375 raise self.error('Content block expected for the "%s" directive; ' 

376 'none found.' % self.name) 

377 

378 def add_name(self, node): 

379 """Append self.options['name'] to node['names'] if it exists. 

380 

381 Also normalize the name string and register it as explicit target. 

382 """ 

383 if 'name' in self.options: 

384 name = nodes.fully_normalize_name(self.options.pop('name')) 

385 if 'name' in node: 

386 del node['name'] 

387 node['names'].append(name) 

388 self.state.document.note_explicit_target(node, node) 

389 

390 

391def convert_directive_function(directive_fn): 

392 """ 

393 Define & return a directive class generated from `directive_fn`. 

394 

395 `directive_fn` uses the old-style, functional interface. 

396 """ 

397 

398 class FunctionalDirective(Directive): 

399 

400 option_spec = getattr(directive_fn, 'options', None) 

401 has_content = getattr(directive_fn, 'content', False) 

402 _argument_spec = getattr(directive_fn, 'arguments', (0, 0, False)) 

403 required_arguments, optional_arguments, final_argument_whitespace \ 

404 = _argument_spec 

405 

406 def run(self): 

407 return directive_fn( 

408 self.name, self.arguments, self.options, self.content, 

409 self.lineno, self.content_offset, self.block_text, 

410 self.state, self.state_machine) 

411 

412 # Return new-style directive. 

413 return FunctionalDirective